]> git.proxmox.com Git - mirror_ovs.git/blame - tests/ovsdb-cluster.at
cirrus: Use FreeBSD 12.2.
[mirror_ovs.git] / tests / ovsdb-cluster.at
CommitLineData
1b1d2e6d
BP
1OVS_START_SHELL_HELPERS
2# ovsdb_check_cluster N_SERVERS SCHEMA_FUNC OUTPUT TRANSACTION...
3ovsdb_check_cluster () {
4 local n=$1 schema_func=$2 output=$3
5 shift; shift; shift
6
7 $schema_func > schema
8 schema=`ovsdb-tool schema-name schema`
9 AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db schema unix:s1.raft], [0], [], [stderr])
10 AT_CHECK([grep -v 'from ephemeral to persistent' stderr], [1])
11 cid=`ovsdb-tool db-cid s1.db`
12 for i in `seq 2 $n`; do
13 AT_CHECK([ovsdb-tool join-cluster s$i.db $schema unix:s$i.raft unix:s1.raft])
14 done
15
16 on_exit 'kill `cat *.pid`'
17 for i in `seq $n`; do
5a0e4aec 18 AT_CHECK([ovsdb-server -vraft -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db])
1b1d2e6d
BP
19 done
20 for i in `seq $n`; do
d97af428 21 AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema connected])
1b1d2e6d
BP
22 done
23
24 for txn
25 do
15394e0f 26 AT_CHECK([ovsdb-client -vjsonrpc -vconsole:off -vsyslog:off -vvlog:off --log-file transact unix:s1.ovsdb,unix:s2.ovsdb,unix:s3.ovsdb "$txn"], [0], [stdout])
1b1d2e6d
BP
27 cat stdout >> output
28 done
29 AT_CHECK_UNQUOTED([uuidfilt output], [0], [$output])
30 for i in `seq $n`; do
5a0e4aec 31 OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid])
1b1d2e6d
BP
32 done
33
34 AT_CHECK([ovsdb-tool check-cluster s*.db])
35}
36OVS_END_SHELL_HELPERS
37
38# Test a 1-server cluster.
39AT_BANNER([OVSDB - clustered transactions (1 server)])
40m4_define([OVSDB_CHECK_EXECUTION],
41 [AT_SETUP([$1 - cluster of 1])
42 AT_KEYWORDS([ovsdb server positive unix cluster cluster1 $5])
43 ovsdb_check_cluster 1 "$2" '$4' m4_foreach([txn], [$3], ['txn' ])
44 AT_CLEANUP])
45EXECUTION_EXAMPLES
46
47# Test a 3-server cluster.
48AT_BANNER([OVSDB - clustered transactions (3 servers)])
49m4_define([OVSDB_CHECK_EXECUTION],
50 [AT_SETUP([$1 - cluster of 3])
51 AT_KEYWORDS([ovsdb server positive unix cluster cluster3 $5])
52 ovsdb_check_cluster 3 "$2" '$4' m4_foreach([txn], [$3], ['txn' ])
53 AT_CLEANUP])
54EXECUTION_EXAMPLES
55
56# Test a 5-server cluster.
57AT_BANNER([OVSDB - clustered transactions (5 servers)])
58m4_define([OVSDB_CHECK_EXECUTION],
59 [AT_SETUP([$1 - cluster of 5])
60 AT_KEYWORDS([ovsdb server positive unix cluster cluster5 $5])
61 ovsdb_check_cluster 5 "$2" '$4' m4_foreach([txn], [$3], ['txn' ])
62 AT_CLEANUP])
63EXECUTION_EXAMPLES
eb692258
HZ
64\f
65
ca367fa5
HZ
66AT_BANNER([OVSDB - disconnect from cluster])
67
89771c1e 68OVS_START_SHELL_HELPERS
923f01ca
HZ
69# ovsdb_test_cluster_disconnect N_SERVERS LEADER_OR_FOLLOWER [CHECK_FLAPPING]
70# Test server disconnected from the cluster.
71# N_SERVERS: Number of servers in the cluster.
72# LEADER_OR_FOLLOWER: The role of the server that is disconnected from the
73# cluster: "leader" or "follower".
74# CHECK_FLAPPING: Whether to check if is_disconnected flapped. "yes", "no".
89771c1e 75ovsdb_test_cluster_disconnect () {
923f01ca
HZ
76 n=$1
77 leader_or_follower=$2
78 check_flapping=$3
89771c1e
HZ
79 schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
80 ordinal_schema > schema
81 AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db $abs_srcdir/idltest.ovsschema unix:s1.raft], [0], [], [stderr])
82 cid=`ovsdb-tool db-cid s1.db`
83 schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
923f01ca 84 for i in `seq 2 $n`; do
89771c1e
HZ
85 AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft])
86 done
87
88 on_exit 'kill `cat *.pid`'
923f01ca 89 for i in `seq $n`; do
89771c1e
HZ
90 AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db])
91 done
923f01ca 92 for i in `seq $n`; do
89771c1e
HZ
93 AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected])
94 done
95
96 AT_CHECK([ovsdb-client transact unix:s1.ovsdb '[["idltest",
97 {"op": "insert",
98 "table": "simple",
99 "row": {"i": 1}}]]'], [0], [ignore], [ignore])
100
101 # When a node is disconnected from the cluster, the IDL should disconnect
102 # and retry even if it uses a single remote, because the remote IP can be
103 # a VIP on a load-balance. So we use single remote to test here.
104 if test $leader_or_follower == "leader"; then
105 target=1
923f01ca
HZ
106 shutdown=`seq $(($n/2 + 1)) $n`
107 cleanup=`seq $(($n/2))`
89771c1e 108 else
923f01ca 109 target=$n
89771c1e 110
923f01ca
HZ
111 # shutdown followers before the leader (s1) so that there is no chance for
112 # s$n to become leader during the process.
113 shutdown="`seq 2 $(($n/2 + 1))` 1"
114 cleanup=`seq $(($n/2 + 2)) $n`
89771c1e 115 fi
923f01ca
HZ
116 echo shutdown=$shutdown
117 echo cleanup=$cleanup
89771c1e
HZ
118
119 # Connect to $target. Use "wait" to trigger a non-op transaction so
120 # that test-ovsdb will not quit.
121
122 test-ovsdb '-vPATTERN:console:test-ovsdb|%c|%m' -v -t10 idl unix:s$target.ovsdb '[["idltest",
123 {"op": "wait",
124 "table": "simple",
125 "where": [["i", "==", 1]],
126 "columns": ["i"],
127 "until": "==",
128 "rows": [{"i": 1}]}]]' > test-ovsdb.log 2>&1 &
129 echo $! > test-ovsdb.pid
ca367fa5 130
89771c1e 131 OVS_WAIT_UNTIL([grep "000: i=1" test-ovsdb.log])
ca367fa5 132
923f01ca
HZ
133 # Start collecting raft_is_connected logs for $target before shutting down
134 # any servers.
135 tail -f s$target.log > raft_is_connected.log &
136 echo $! > tail.pid
137
89771c1e
HZ
138 # Shutdown the other servers so that $target is disconnected from the cluster.
139 for i in $shutdown; do
140 OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid])
141 done
142
143 # The test-ovsdb should detect the disconnect and retry.
144 OVS_WAIT_UNTIL([grep disconnect test-ovsdb.log])
145
923f01ca
HZ
146 # The $target debug log should show raft_is_connected: false.
147 OVS_WAIT_UNTIL([grep "raft_is_connected: false" raft_is_connected.log])
148
149 # Save the current count of "raft_is_connected: true"
150 count_old=`grep "raft_is_connected: true" raft_is_connected.log | wc -l`
151 echo count_old $count_old
152
153 if test X$check_flapping == X"yes"; then
154 sleep 10
155 fi
156 # Make sure raft_is_connected didn't flap from false to true.
157 count_new=`grep "raft_is_connected: true" raft_is_connected.log | wc -l`
158 echo count_new $count_new
159 AT_CHECK([test $count_new == $count_old])
160
161 for i in $cleanup; do
162 OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid])
163 done
89771c1e
HZ
164}
165OVS_END_SHELL_HELPERS
166
167AT_SETUP([OVSDB cluster - follower disconnect from cluster, single remote])
168AT_KEYWORDS([ovsdb server negative unix cluster disconnect])
923f01ca 169ovsdb_test_cluster_disconnect 3 follower
89771c1e
HZ
170AT_CLEANUP
171
172AT_SETUP([OVSDB cluster - leader disconnect from cluster, single remote])
173AT_KEYWORDS([ovsdb server negative unix cluster disconnect])
923f01ca
HZ
174ovsdb_test_cluster_disconnect 3 leader
175AT_CLEANUP
176
177AT_SETUP([OVSDB cluster - leader disconnect from cluster, check flapping])
178AT_KEYWORDS([ovsdb server negative unix cluster disconnect])
179ovsdb_test_cluster_disconnect 5 leader yes
ca367fa5 180AT_CLEANUP
89771c1e 181
2833885f
HZ
182AT_SETUP([OVSDB cluster - initial status should be disconnected])
183AT_KEYWORDS([ovsdb server negative unix cluster disconnect])
184
185n=3
186schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
187ordinal_schema > schema
188AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db $abs_srcdir/idltest.ovsschema unix:s1.raft], [0], [], [stderr])
189cid=`ovsdb-tool db-cid s1.db`
190schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
191for i in `seq 2 $n`; do
192 AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft])
193done
194
195on_exit 'kill `cat *.pid`'
196for i in `seq $n`; do
197 AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db])
198done
199for i in `seq $n`; do
200 AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected])
201done
202
203# Stop all servers, and start the s1 only, to test initial connection status
204# when there is no leader yet.
205for i in `seq 1 $n`; do
206 OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid])
207done
208i=1
209AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db])
210
211# The initial status should be disconnected. So wait should fail.
212AT_CHECK([ovsdb_client_wait --timeout=1 unix:s$i.ovsdb $schema_name connected], [142], [ignore], [ignore])
213OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid])
214
215AT_CLEANUP
216
ca367fa5
HZ
217\f
218
8e354614
HZ
219AT_BANNER([OVSDB cluster election timer change])
220
221AT_SETUP([OVSDB cluster - election timer change])
222AT_KEYWORDS([ovsdb server positive unix cluster timer])
223
224n=3
225schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
226ordinal_schema > schema
227AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db $abs_srcdir/idltest.ovsschema unix:s1.raft], [0], [], [stderr])
228cid=`ovsdb-tool db-cid s1.db`
229schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
230for i in `seq 2 $n`; do
231 AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft])
232done
233
234on_exit 'kill `cat *.pid`'
235for i in `seq $n`; do
236 AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db])
237done
238for i in `seq $n`; do
239 AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected])
240done
241
242# Change not allowed through follower.
243AT_CHECK([ovs-appctl -t "`pwd`"/s2 cluster/change-election-timer $schema_name 2000], [2], [], [ignore])
244
245# Timer cannot be changed to bigger than 2x the original value.
246AT_CHECK([ovs-appctl -t "`pwd`"/s1 cluster/change-election-timer $schema_name 4000], [2], [], [ignore])
247
248AT_CHECK([ovs-appctl -t "`pwd`"/s1 cluster/change-election-timer $schema_name 2000], [0], [dnl
249change of election timer initiated.
250], [])
251OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s1 cluster/status $schema_name | grep "Election timer: 2000"])
252OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s2 cluster/status $schema_name | grep "Election timer: 2000"])
253
254AT_CHECK([ovs-appctl -t "`pwd`"/s1 cluster/change-election-timer $schema_name 4000], [0], [dnl
255change of election timer initiated.
256], [])
257OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s1 cluster/status $schema_name | grep "Election timer: 4000"])
258OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s2 cluster/status $schema_name | grep "Election timer: 4000"])
259
260# Latest timer should be used after restart
261for i in `seq $n`; do
262 printf "\ns$i: stopping\n"
263 OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid])
264done
265for i in `seq $n`; do
266 AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db])
267done
268OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s1 cluster/status $schema_name | grep "Election timer: 4000"])
269OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s2 cluster/status $schema_name | grep "Election timer: 4000"])
270
6d034053
HZ
271# Wait until cluster is ready
272for i in `seq $n`; do
273 OVS_WAIT_WHILE([ovs-appctl -t "`pwd`"/s$i cluster/status $schema_name | grep "Leader: unknown"])
274done
275
9bfb280a
HZ
276# Latest timer should be restored after DB compact and restart.
277# This is to test the install_snapshot RPC.
278
9bfb280a
HZ
279# Compact online
280for i in `seq $n`; do
281 AT_CHECK([ovs-appctl -t "`pwd`"/s$i ovsdb-server/compact])
282done
283
9bfb280a
HZ
284for i in `seq $n`; do
285 printf "\ns$i: stopping\n"
286 OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid])
287done
288for i in `seq $n`; do
289 AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db])
290done
291for i in `seq $n`; do
292 OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s$i cluster/status $schema_name | grep "Election timer: 4000"])
293done
294
295# Wait until cluster is ready
296for i in `seq $n`; do
297 OVS_WAIT_WHILE([ovs-appctl -t "`pwd`"/s$i cluster/status $schema_name | grep "Leader: unknown"])
298done
299
300# Newly joined member should use latest timer value
301AT_CHECK([ovsdb-tool join-cluster s4.db $schema_name unix:s4.raft unix:s1.raft])
302AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s4.log --pidfile=s4.pid --unixctl=s4 --remote=punix:s4.ovsdb s4.db])
303OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s4 cluster/status $schema_name | grep "Election timer: 4000"])
304# for i in `seq 10`; do
305# ovs-appctl -t "`pwd`"/s4 cluster/status $schema_name
306# sleep 1
307# done
308
8e354614
HZ
309AT_CLEANUP
310
bda1f6b6
HZ
311\f
312AT_BANNER([OVSDB cluster install snapshot RPC])
313
314AT_SETUP([OVSDB cluster - install snapshot RPC])
315AT_KEYWORDS([ovsdb server positive unix cluster snapshot])
316
317n=3
318schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
319ordinal_schema > schema
320AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db $abs_srcdir/idltest.ovsschema unix:s1.raft], [0], [], [stderr])
321cid=`ovsdb-tool db-cid s1.db`
322schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
323for i in `seq 2 $n`; do
324 AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft])
325done
326
327on_exit 'kill `cat *.pid`'
328for i in `seq $n`; do
329 AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db])
330done
331for i in `seq $n`; do
332 AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected])
333done
334
f2cf6677
DC
335AT_CHECK([ovsdb-client transact unix:s1.ovsdb '[["idltest",
336 {"op": "insert",
337 "table": "indexed",
338 "row": {"i": 0}}]]'], [0], [ignore], [ignore])
339
bda1f6b6
HZ
340# Kill one follower (s2) and write some data to cluster, so that the follower is falling behind
341printf "\ns2: stopping\n"
342OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s2], [s2.pid])
343
f2cf6677
DC
344# Delete "i":0 and readd it to get a different UUID for it.
345AT_CHECK([ovsdb-client transact unix:s1.ovsdb '[["idltest",
346 {"op": "delete",
347 "table": "indexed",
348 "where": [["i", "==", 0]]}]]'], [0], [ignore], [ignore])
349
bda1f6b6
HZ
350AT_CHECK([ovsdb-client transact unix:s1.ovsdb '[["idltest",
351 {"op": "insert",
f2cf6677
DC
352 "table": "indexed",
353 "row": {"i": 0}}]]'], [0], [ignore], [ignore])
354
355AT_CHECK([ovsdb-client transact unix:s1.ovsdb '[["idltest",
356 {"op": "insert",
357 "table": "indexed",
bda1f6b6
HZ
358 "row": {"i": 1}}]]'], [0], [ignore], [ignore])
359
360# Compact leader online to generate snapshot
361AT_CHECK([ovs-appctl -t "`pwd`"/s1 ovsdb-server/compact])
362
363# Start the follower s2 again.
364AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s2.log --pidfile=s2.pid --unixctl=s2 --remote=punix:s2.ovsdb s2.db])
365AT_CHECK([ovsdb_client_wait unix:s2.ovsdb $schema_name connected])
366
367# A client transaction through s2. During this transaction, there will be a
368# install_snapshot RPC because s2 detects it is behind and s1 doesn't have the
369# pre_log_index requested by s2 because it is already compacted.
370# After the install_snapshot RPC process, the transaction through s2 should
371# succeed.
372AT_CHECK([ovsdb-client transact unix:s2.ovsdb '[["idltest",
373 {"op": "insert",
f2cf6677
DC
374 "table": "indexed",
375 "row": {"i": 2}}]]'], [0], [ignore], [ignore])
376
377# The snapshot should overwrite the in-memory contents of the DB on S2
378# without generating any constraint violations. All tree records (0, 1, 2)
379# should be in the DB at this point.
380AT_CHECK([ovsdb-client --no-headings dump unix:s2.ovsdb idltest indexed | uuidfilt | sort -k 2], [0], [dnl
381<0> 0
382<1> 1
383<2> 2
384indexed table
385])
bda1f6b6
HZ
386
387for i in `seq $n`; do
388 OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid])
389done
390
391AT_CLEANUP
392
8e354614
HZ
393\f
394
eb692258
HZ
395OVS_START_SHELL_HELPERS
396# ovsdb_cluster_failure_test SCHEMA_FUNC OUTPUT TRANSACTION...
397ovsdb_cluster_failure_test () {
398 # Initial state: s1 is leader, s2 and s3 are followers
399 remote_1=$1
400 remote_2=$2
401 crash_node=$3
402 crash_command=$4
403 if test "$crash_node" == "1"; then
404 new_leader=$5
405 fi
406
817db730 407 cp $top_srcdir/vswitchd/vswitch.ovsschema schema
eb692258
HZ
408 schema=`ovsdb-tool schema-name schema`
409 AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db schema unix:s1.raft], [0], [], [dnl
817db730 410ovsdb|WARN|schema: changed 30 columns in 'Open_vSwitch' database from ephemeral to persistent, including 'status' column in 'Manager' table, because clusters do not support ephemeral columns
eb692258
HZ
411])
412
413 n=3
414 join_cluster() {
415 local i=$1
416 others=
417 for j in `seq 1 $n`; do
418 if test $i != $j; then
419 others="$others unix:s$j.raft"
420 fi
421 done
422 AT_CHECK([ovsdb-tool join-cluster s$i.db $schema unix:s$i.raft $others])
423 }
424 start_server() {
425 local i=$1
426 printf "\ns$i: starting\n"
427 AT_CHECK([ovsdb-server -vjsonrpc -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db])
428 }
429 connect_server() {
430 local i=$1
431 printf "\ns$i: waiting to connect to storage\n"
432 AT_CHECK([ovsdb_client_wait --log-file=connect$i.log unix:s$i.ovsdb $schema connected])
433 }
434 cid=`ovsdb-tool db-cid s1.db`
435 for i in `seq 2 $n`; do join_cluster $i; done
436
437 on_exit 'kill `cat *.pid`'
438 for i in `seq $n`; do start_server $i; done
439 for i in `seq $n`; do connect_server $i; done
440
817db730 441 db=unix:s$remote_1.ovsdb,unix:s$remote_2.ovsdb
eb692258
HZ
442
443 # To ensure $new_leader node the new leader, we delay election timer for
444 # the other follower.
445 if test -n "$new_leader"; then
446 if test "$new_leader" == "2"; then
447 delay_election_node=3
448 else
449 delay_election_node=2
450 fi
451 AT_CHECK([ovs-appctl -t "`pwd`"/s$delay_election_node cluster/failure-test delay-election], [0], [ignore])
452 fi
453 AT_CHECK([ovs-appctl -t "`pwd`"/s$crash_node cluster/failure-test $crash_command], [0], [ignore])
15394e0f 454 AT_CHECK([ovs-vsctl -v --db="$db" --no-leader-only --no-shuffle-remotes --no-wait create QoS type=x], [0], [ignore], [ignore])
eb692258
HZ
455
456 # Make sure that the node really crashed.
457 AT_CHECK([ls s$crash_node.ovsdb], [2], [ignore], [ignore])
458 # XXX: Client will fail if remotes contains unix socket that doesn't exist (killed).
817db730
BP
459 if test "$remote_1" = "$crash_node"; then
460 db=unix:s$remote_2.ovsdb
eb692258 461 fi
817db730 462 AT_CHECK([ovs-vsctl --db="$db" --no-leader-only --no-wait --columns=type --bare list QoS], [0], [x
eb692258
HZ
463])
464}
465OVS_END_SHELL_HELPERS
466AT_BANNER([OVSDB - cluster failure with pending transaction])
467
468AT_SETUP([OVSDB cluster - txn on follower-2, leader crash before sending appendReq, follower-2 becomes leader])
469AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
eb692258
HZ
470ovsdb_cluster_failure_test 2 3 1 crash-before-sending-append-request 2
471AT_CLEANUP
472
473AT_SETUP([OVSDB cluster - txn on follower-2, leader crash before sending appendReq, follower-3 becomes leader])
474AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
475ovsdb_cluster_failure_test 2 3 1 crash-before-sending-append-request 3
476AT_CLEANUP
477
478AT_SETUP([OVSDB cluster - txn on follower-2, leader crash before sending execRep, follower-2 becomes leader])
479AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
eb692258
HZ
480ovsdb_cluster_failure_test 2 3 1 crash-before-sending-execute-command-reply 2
481AT_CLEANUP
482
483AT_SETUP([OVSDB cluster - txn on follower-2, leader crash before sending execRep, follower-3 becomes leader])
484AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
eb692258
HZ
485ovsdb_cluster_failure_test 2 3 1 crash-before-sending-execute-command-reply 3
486AT_CLEANUP
487
488AT_SETUP([OVSDB cluster - txn on follower-2, leader crash after sending execRep, follower-2 becomes leader])
489AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
eb692258
HZ
490ovsdb_cluster_failure_test 2 3 1 crash-after-sending-execute-command-reply 2
491AT_CLEANUP
492
493AT_SETUP([OVSDB cluster - txn on follower-2, leader crash after sending execRep, follower-3 becomes leader])
494AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
495ovsdb_cluster_failure_test 2 3 1 crash-after-sending-execute-command-reply 3
496AT_CLEANUP
497
498AT_SETUP([OVSDB cluster - txn on leader, leader crash before sending appendReq, follower-2 becomes leader])
499AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
eb692258
HZ
500ovsdb_cluster_failure_test 1 2 1 crash-before-sending-append-request 2
501AT_CLEANUP
502
503AT_SETUP([OVSDB cluster - txn on leader, leader crash before sending appendReq, follower-3 becomes leader])
504AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
505ovsdb_cluster_failure_test 1 2 1 crash-before-sending-append-request 3
506AT_CLEANUP
507
508AT_SETUP([OVSDB cluster - txn on leader, leader crash after sending appendReq, follower-2 becomes leader])
509AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
510# XXX: Detect and skip repeated transaction before enabling this test
511AT_CHECK([exit 77])
512ovsdb_cluster_failure_test 1 2 1 crash-after-sending-append-request 2
513AT_CLEANUP
514
515AT_SETUP([OVSDB cluster - txn on leader, leader crash after sending appendReq, follower-3 becomes leader])
516AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
517# XXX: Detect and skip repeated transaction before enabling this test
518AT_CHECK([exit 77])
519ovsdb_cluster_failure_test 1 2 1 crash-after-sending-append-request 3
520AT_CLEANUP
521
522AT_SETUP([OVSDB cluster - txn on follower-2, follower-2 crash before sending execReq, reconnect to follower-3])
523AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
524ovsdb_cluster_failure_test 2 3 2 crash-before-sending-execute-command-request
525AT_CLEANUP
526
527AT_SETUP([OVSDB cluster - txn on follower-2, follower-2 crash before sending execReq, reconnect to leader])
528AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
529ovsdb_cluster_failure_test 2 1 2 crash-before-sending-execute-command-request
530AT_CLEANUP
531
532AT_SETUP([OVSDB cluster - txn on follower-2, follower-2 crash after sending execReq, reconnect to follower-3])
533AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
534# XXX: Detect and skip repeated transaction before enabling this test
535AT_CHECK([exit 77])
536ovsdb_cluster_failure_test 2 3 2 crash-after-sending-execute-command-request
537AT_CLEANUP
538
539AT_SETUP([OVSDB cluster - txn on follower-2, follower-2 crash after sending execReq, reconnect to leader])
540AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
541# XXX: Detect and skip repeated transaction before enabling this test
542AT_CHECK([exit 77])
543ovsdb_cluster_failure_test 2 1 2 crash-after-sending-execute-command-request
544AT_CLEANUP
545
546AT_SETUP([OVSDB cluster - txn on leader, follower-2 crash after receiving appendReq for the update])
547AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
548ovsdb_cluster_failure_test 1 1 2 crash-after-receiving-append-request-update
549AT_CLEANUP
550
551AT_SETUP([OVSDB cluster - txn on follower-2, follower-3 crash after receiving appendReq for the update])
552AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
553ovsdb_cluster_failure_test 2 2 3 crash-after-receiving-append-request-update
554AT_CLEANUP
555
93ee4209
HZ
556\f
557AT_SETUP([OVSDB cluster - competing candidates])
558AT_KEYWORDS([ovsdb server negative unix cluster competing-candidates])
559
560n=3
561schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
562ordinal_schema > schema
563AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db $abs_srcdir/idltest.ovsschema unix:s1.raft], [0], [], [stderr])
564cid=`ovsdb-tool db-cid s1.db`
565schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
566for i in `seq 2 $n`; do
567 AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft])
568done
569
570on_exit 'kill `cat *.pid`'
571for i in `seq $n`; do
572 AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db])
573done
574for i in `seq $n`; do
575 AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected])
576done
577
578# We need to simulate the situation when 2 candidates starts election with same
579# term.
580#
581# Before triggering leader election, tell follower s2 don't send vote request (simulating
582# vote-request lost or not handled in time), and tell follower s3 to delay
583# election timer to make sure s3 doesn't send vote-request before s2 enters
584# term 2.
585AT_CHECK([ovs-appctl -t "`pwd`"/s2 cluster/failure-test dont-send-vote-request], [0], [ignore])
586AT_CHECK([ovs-appctl -t "`pwd`"/s3 cluster/failure-test delay-election], [0], [ignore])
587
588# Restart leader, which will become follower, and both old followers will start
589# election as candidate. The new follower (old leader) will vote one of them,
590# and the other candidate should step back as follower as again.
591kill -9 `cat s1.pid`
592AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s1.log --pidfile=s1.pid --unixctl=s1 --remote=punix:s1.ovsdb s1.db])
593
594# Tell s1 to delay election timer so that it won't start election before s3
595# becomes candidate.
596AT_CHECK([ovs-appctl -t "`pwd`"/s1 cluster/failure-test delay-election], [0], [ignore])
597
598OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s1 cluster/status $schema_name | grep "Term: 2"])
599
600for i in `seq $n`; do
601 OVS_WAIT_WHILE([ovs-appctl -t "`pwd`"/s$i cluster/status $schema_name | grep "candidate"])
602 AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected])
603done
604
605for i in `seq $n`; do
606 OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid])
607done
608
609AT_CLEANUP
610
1b1d2e6d
BP
611\f
612AT_BANNER([OVSDB - cluster tests])
613
614# Torture test.
615OVS_START_SHELL_HELPERS
616ovsdb_torture_test () {
5a0e4aec
BP
617 local n=$1 # Number of cluster members
618 local victim=$2 # Cluster member to kill or remove
1b1d2e6d 619 local variant=$3 # 'kill' and restart or 'remove' and add
817db730 620 cp $top_srcdir/vswitchd/vswitch.ovsschema schema
1b1d2e6d
BP
621 schema=`ovsdb-tool schema-name schema`
622 AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db schema unix:s1.raft], [0], [], [dnl
817db730 623ovsdb|WARN|schema: changed 30 columns in 'Open_vSwitch' database from ephemeral to persistent, including 'status' column in 'Manager' table, because clusters do not support ephemeral columns
1b1d2e6d
BP
624])
625
626 join_cluster() {
627 local i=$1
5a0e4aec
BP
628 others=
629 for j in `seq 1 $n`; do
630 if test $i != $j; then
631 others="$others unix:s$j.raft"
1b1d2e6d 632 fi
5a0e4aec
BP
633 done
634 AT_CHECK([ovsdb-tool join-cluster s$i.db $schema unix:s$i.raft $others])
1b1d2e6d
BP
635 }
636
637 start_server() {
638 local i=$1
5a0e4aec
BP
639 printf "\ns$i: starting\n"
640 AT_CHECK([ovsdb-server -vjsonrpc -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db])
1b1d2e6d
BP
641 }
642 stop_server() {
643 local i=$1
5a0e4aec 644 printf "\ns$i: stopping\n"
1b1d2e6d
BP
645 OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid])
646 }
647 connect_server() {
648 local i=$1
5a0e4aec 649 printf "\ns$i: waiting to connect to storage\n"
d97af428 650 AT_CHECK([ovsdb_client_wait --log-file=connect$i.log unix:s$i.ovsdb $schema connected])
1b1d2e6d
BP
651 }
652 remove_server() {
653 local i=$1
5a0e4aec 654 printf "\ns$i: removing from cluster\n"
15394e0f 655 AT_CHECK([ovs-appctl -t "`pwd`"/s$i cluster/leave Open_vSwitch])
5a0e4aec 656 printf "\ns$i: waiting for removal to complete\n"
d97af428 657 AT_CHECK([ovsdb_client_wait --log-file=remove$i.log unix:s$i.ovsdb $schema removed])
c7b5c534 658 stop_server $i
1b1d2e6d
BP
659 }
660 add_server() {
661 local i=$1
5a0e4aec 662 rm s$i.db
1b1d2e6d 663 join_cluster $i
5a0e4aec
BP
664 start_server $i
665 connect_server $i
1b1d2e6d
BP
666 }
667
668 cid=`ovsdb-tool db-cid s1.db`
669 for i in `seq 2 $n`; do join_cluster $i; done
670
671 on_exit 'kill `cat *.pid`'
672 for i in `seq $n`; do start_server $i; done
673 for i in `seq $n`; do connect_server $i; done
674
817db730 675 db=unix:s1.ovsdb
1b1d2e6d 676 for i in `seq 2 $n`; do
817db730 677 db=$db,unix:s$i.ovsdb
1b1d2e6d 678 done
1b1d2e6d 679
0f954f32 680 n1=10 n2=5 n3=50
817db730 681 echo "starting $n1*$n2 ovs-vsctl processes..."
1b1d2e6d 682 for i in $(seq 0 $(expr $n1 - 1) ); do
5a0e4aec 683 (for j in $(seq $n2); do
1b1d2e6d 684 : > $i-$j.running
817db730 685 txn="add Open_vSwitch . external_ids $i-$j=$i-$j"
0f954f32 686 for k in $(seq $n3); do
817db730 687 txn="$txn -- add Open_vSwitch . external_ids $i-$j-$k=$i-$j-$k"
0f954f32 688 done
817db730 689 run_as "ovs-vsctl($i-$j)" ovs-vsctl "-vPATTERN:console:ovs-vsctl($i-$j)|%D{%H:%M:%S}|%05N|%c|%p|%m" --log-file=$i-$j.log -vfile -vsyslog:off -vtimeval:off --timeout=120 --db="$db" --no-leader-only --no-wait $txn
5a0e4aec
BP
690 status=$?
691 if test $status != 0; then
692 echo "$i-$j exited with status $status" > $i-$j:$status
1b1d2e6d
BP
693 fi
694 rm $i-$j.running
5a0e4aec
BP
695 done
696 : > $i.done)&
1b1d2e6d
BP
697 done
698 echo "...done"
1b1d2e6d 699
817db730 700 echo "waiting for ovs-vsctl processes to exit..."
6c8dd8ca 701 # Use file instead of var because code inside "while" runs in a subshell.
7ee9c6e0 702 echo 0 > phase
0f03ae37 703 i=0
35454eba 704 (while :; do echo || exit 0; sleep 0.1; done) | while read REPLY; do
1b1d2e6d 705 printf "t=%2d s:" $i
5a0e4aec 706 done=0
1b1d2e6d 707 for j in $(seq 0 $(expr $n1 - 1)); do
5a0e4aec
BP
708 if test -f $j.done; then
709 printf " $j"
710 done=$(expr $done + 1)
1b1d2e6d 711 fi
5a0e4aec
BP
712 done
713 printf '\n'
714 if test $done = $n1; then
715 break
1b1d2e6d
BP
716 fi
717
7ee9c6e0 718 case $(cat phase) in # (
5a0e4aec 719 0)
0f954f32 720 if test $done -ge $(expr $n1 / 10); then
5a0e4aec
BP
721 if test $variant = kill; then
722 stop_server $victim
723 else
724 remove_server $victim
725 fi
7ee9c6e0 726 echo 1 > phase
5a0e4aec 727 next=$(expr $i + 2)
1b1d2e6d 728 fi
5a0e4aec 729 ;; # (
1b1d2e6d 730 1)
5a0e4aec
BP
731 if test $i -ge $next; then
732 if test $variant = kill; then
733 start_server $victim
734 connect_server $victim
735 else
736 add_server $victim
737 fi
7ee9c6e0 738 echo 2 > phase
5a0e4aec
BP
739 fi
740 ;;
741 esac
742
743 i=$(expr $i + 1)
1b1d2e6d
BP
744 done
745 echo "...done"
7ee9c6e0 746 AT_CHECK([if test $(cat phase) != 2; then exit 77; fi])
1b1d2e6d 747
dff60a1e
BP
748 for i in $(seq 0 $(expr $n1 - 1) ); do
749 for j in `seq $n2`; do
750 echo "$i-$j=$i-$j"
0f954f32
HZ
751 for k in `seq $n3`; do
752 echo "$i-$j-$k=$i-$j-$k"
753 done
dff60a1e
BP
754 done
755 done | sort > expout
15394e0f 756 AT_CHECK([ovs-vsctl --db="$db" --no-wait --log-file=finalize.log -vtimeval:off -vfile -vsyslog:off --bare get Open_vSwitch . external-ids | tr ',' '\n' | sed 's/[[{}"" ]]//g' | sort], [0], [expout])
1b1d2e6d
BP
757
758 for i in `seq $n`; do
7ee9c6e0 759 if test $i != $victim || test $(cat phase) != 1; then
5a0e4aec
BP
760 stop_server $i
761 fi
1b1d2e6d
BP
762 done
763
764 # We ignore stdout because non-fatal warnings get printed there.
765 AT_CHECK([ovsdb-tool check-cluster s*.db], [0], [ignore])
766}
767OVS_END_SHELL_HELPERS
768
769AT_SETUP([OVSDB 3-server torture test - kill/restart leader])
770AT_KEYWORDS([ovsdb server positive unix cluster cluster3])
771ovsdb_torture_test 3 1 kill
772AT_CLEANUP
773AT_SETUP([OVSDB 3-server torture test - kill/restart follower 1])
774AT_KEYWORDS([ovsdb server positive unix cluster cluster3])
775ovsdb_torture_test 3 2 kill
776AT_CLEANUP
777AT_SETUP([OVSDB 3-server torture test - kill/restart follower 2])
778AT_KEYWORDS([ovsdb server positive unix cluster cluster3])
779ovsdb_torture_test 3 3 kill
780AT_CLEANUP
781AT_SETUP([OVSDB 5-server torture test - kill/restart leader])
782AT_KEYWORDS([ovsdb server positive unix cluster cluster5])
783ovsdb_torture_test 5 1 kill
784AT_CLEANUP
785AT_SETUP([OVSDB 5-server torture test - kill/restart follower 1])
786AT_KEYWORDS([ovsdb server positive unix cluster cluster5])
787ovsdb_torture_test 5 2 kill
788AT_CLEANUP
789AT_SETUP([OVSDB 5-server torture test - kill/restart follower 2])
790AT_KEYWORDS([ovsdb server positive unix cluster cluster5])
791ovsdb_torture_test 5 3 kill
792AT_CLEANUP
793AT_SETUP([OVSDB 5-server torture test - kill/restart follower 3])
794AT_KEYWORDS([ovsdb server positive unix cluster cluster5])
795ovsdb_torture_test 5 4 kill
796AT_CLEANUP
797AT_SETUP([OVSDB 5-server torture test - kill/restart follower 4])
798AT_KEYWORDS([ovsdb server positive unix cluster cluster5])
799ovsdb_torture_test 5 5 kill
800AT_CLEANUP
801
802AT_SETUP([OVSDB 3-server torture test - remove/re-add leader])
803AT_KEYWORDS([ovsdb server positive unix cluster cluster3])
804ovsdb_torture_test 3 1 remove
805AT_CLEANUP
806AT_SETUP([OVSDB 3-server torture test - remove/re-add follower 1])
807AT_KEYWORDS([ovsdb server positive unix cluster cluster3])
808ovsdb_torture_test 3 2 remove
809AT_CLEANUP
810AT_SETUP([OVSDB 3-server torture test - remove/re-add follower 2])
811AT_KEYWORDS([ovsdb server positive unix cluster cluster3])
812ovsdb_torture_test 3 3 remove
813AT_CLEANUP
814AT_SETUP([OVSDB 5-server torture test - remove/re-add leader])
815AT_KEYWORDS([ovsdb server positive unix cluster cluster5])
816ovsdb_torture_test 5 1 remove
817AT_CLEANUP
818AT_SETUP([OVSDB 5-server torture test - remove/re-add follower 1])
819AT_KEYWORDS([ovsdb server positive unix cluster cluster5])
820ovsdb_torture_test 5 2 remove
821AT_CLEANUP
822AT_SETUP([OVSDB 5-server torture test - remove/re-add follower 2])
823AT_KEYWORDS([ovsdb server positive unix cluster cluster5])
824ovsdb_torture_test 5 3 remove
825AT_CLEANUP
826AT_SETUP([OVSDB 5-server torture test - remove/re-add follower 3])
827AT_KEYWORDS([ovsdb server positive unix cluster cluster5])
828ovsdb_torture_test 5 4 remove
829AT_CLEANUP
830AT_SETUP([OVSDB 5-server torture test - remove/re-add follower 4])
831AT_KEYWORDS([ovsdb server positive unix cluster cluster5])
832ovsdb_torture_test 5 5 remove
833AT_CLEANUP