]> git.proxmox.com Git - mirror_ovs.git/blame - tests/ovsdb-cluster.at
ovsdb raft: Avoid unnecessary reconnecting during leader election.
[mirror_ovs.git] / tests / ovsdb-cluster.at
CommitLineData
1b1d2e6d
BP
1OVS_START_SHELL_HELPERS
2# ovsdb_check_cluster N_SERVERS SCHEMA_FUNC OUTPUT TRANSACTION...
3ovsdb_check_cluster () {
4 local n=$1 schema_func=$2 output=$3
5 shift; shift; shift
6
7 $schema_func > schema
8 schema=`ovsdb-tool schema-name schema`
9 AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db schema unix:s1.raft], [0], [], [stderr])
10 AT_CHECK([grep -v 'from ephemeral to persistent' stderr], [1])
11 cid=`ovsdb-tool db-cid s1.db`
12 for i in `seq 2 $n`; do
13 AT_CHECK([ovsdb-tool join-cluster s$i.db $schema unix:s$i.raft unix:s1.raft])
14 done
15
16 on_exit 'kill `cat *.pid`'
17 for i in `seq $n`; do
5a0e4aec 18 AT_CHECK([ovsdb-server -vraft -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db])
1b1d2e6d
BP
19 done
20 for i in `seq $n`; do
d97af428 21 AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema connected])
1b1d2e6d
BP
22 done
23
24 for txn
25 do
26 AT_CHECK([ovsdb-client --timeout=30 -vjsonrpc -vconsole:off -vsyslog:off -vvlog:off --log-file transact unix:s1.ovsdb,unix:s2.ovsdb,unix:s3.ovsdb "$txn"], [0], [stdout])
27 cat stdout >> output
28 done
29 AT_CHECK_UNQUOTED([uuidfilt output], [0], [$output])
30 for i in `seq $n`; do
5a0e4aec 31 OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid])
1b1d2e6d
BP
32 done
33
34 AT_CHECK([ovsdb-tool check-cluster s*.db])
35}
36OVS_END_SHELL_HELPERS
37
38# Test a 1-server cluster.
39AT_BANNER([OVSDB - clustered transactions (1 server)])
40m4_define([OVSDB_CHECK_EXECUTION],
41 [AT_SETUP([$1 - cluster of 1])
42 AT_KEYWORDS([ovsdb server positive unix cluster cluster1 $5])
43 ovsdb_check_cluster 1 "$2" '$4' m4_foreach([txn], [$3], ['txn' ])
44 AT_CLEANUP])
45EXECUTION_EXAMPLES
46
47# Test a 3-server cluster.
48AT_BANNER([OVSDB - clustered transactions (3 servers)])
49m4_define([OVSDB_CHECK_EXECUTION],
50 [AT_SETUP([$1 - cluster of 3])
51 AT_KEYWORDS([ovsdb server positive unix cluster cluster3 $5])
52 ovsdb_check_cluster 3 "$2" '$4' m4_foreach([txn], [$3], ['txn' ])
53 AT_CLEANUP])
54EXECUTION_EXAMPLES
55
56# Test a 5-server cluster.
57AT_BANNER([OVSDB - clustered transactions (5 servers)])
58m4_define([OVSDB_CHECK_EXECUTION],
59 [AT_SETUP([$1 - cluster of 5])
60 AT_KEYWORDS([ovsdb server positive unix cluster cluster5 $5])
61 ovsdb_check_cluster 5 "$2" '$4' m4_foreach([txn], [$3], ['txn' ])
62 AT_CLEANUP])
63EXECUTION_EXAMPLES
eb692258
HZ
64\f
65
66OVS_START_SHELL_HELPERS
67# ovsdb_cluster_failure_test SCHEMA_FUNC OUTPUT TRANSACTION...
68ovsdb_cluster_failure_test () {
69 # Initial state: s1 is leader, s2 and s3 are followers
70 remote_1=$1
71 remote_2=$2
72 crash_node=$3
73 crash_command=$4
74 if test "$crash_node" == "1"; then
75 new_leader=$5
76 fi
77
78 cp $top_srcdir/ovn/ovn-nb.ovsschema schema
79 schema=`ovsdb-tool schema-name schema`
80 AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db schema unix:s1.raft], [0], [], [dnl
81ovsdb|WARN|schema: changed 2 columns in 'OVN_Northbound' database from ephemeral to persistent, including 'status' column in 'Connection' table, because clusters do not support ephemeral columns
82])
83
84 n=3
85 join_cluster() {
86 local i=$1
87 others=
88 for j in `seq 1 $n`; do
89 if test $i != $j; then
90 others="$others unix:s$j.raft"
91 fi
92 done
93 AT_CHECK([ovsdb-tool join-cluster s$i.db $schema unix:s$i.raft $others])
94 }
95 start_server() {
96 local i=$1
97 printf "\ns$i: starting\n"
98 AT_CHECK([ovsdb-server -vjsonrpc -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db])
99 }
100 connect_server() {
101 local i=$1
102 printf "\ns$i: waiting to connect to storage\n"
103 AT_CHECK([ovsdb_client_wait --log-file=connect$i.log unix:s$i.ovsdb $schema connected])
104 }
105 cid=`ovsdb-tool db-cid s1.db`
106 for i in `seq 2 $n`; do join_cluster $i; done
107
108 on_exit 'kill `cat *.pid`'
109 for i in `seq $n`; do start_server $i; done
110 for i in `seq $n`; do connect_server $i; done
111
112 export OVN_NB_DB=unix:s$remote_1.ovsdb,unix:s$remote_2.ovsdb
113
114 # To ensure $new_leader node the new leader, we delay election timer for
115 # the other follower.
116 if test -n "$new_leader"; then
117 if test "$new_leader" == "2"; then
118 delay_election_node=3
119 else
120 delay_election_node=2
121 fi
122 AT_CHECK([ovs-appctl -t "`pwd`"/s$delay_election_node cluster/failure-test delay-election], [0], [ignore])
123 fi
124 AT_CHECK([ovs-appctl -t "`pwd`"/s$crash_node cluster/failure-test $crash_command], [0], [ignore])
125 AT_CHECK([ovn-nbctl -v --timeout=10 --no-leader-only --no-shuffle-remotes create logical_switch name=ls1], [0], [ignore], [ignore])
126
127 # Make sure that the node really crashed.
128 AT_CHECK([ls s$crash_node.ovsdb], [2], [ignore], [ignore])
129 # XXX: Client will fail if remotes contains unix socket that doesn't exist (killed).
130 if test "$remote_1" == "$crash_node"; then
131 export OVN_NB_DB=unix:s$remote_2.ovsdb
132 fi
133 AT_CHECK([ovn-nbctl --no-leader-only ls-list | awk '{ print $2 }'], [0], [(ls1)
134])
135}
136OVS_END_SHELL_HELPERS
137AT_BANNER([OVSDB - cluster failure with pending transaction])
138
139AT_SETUP([OVSDB cluster - txn on follower-2, leader crash before sending appendReq, follower-2 becomes leader])
140AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
eb692258
HZ
141ovsdb_cluster_failure_test 2 3 1 crash-before-sending-append-request 2
142AT_CLEANUP
143
144AT_SETUP([OVSDB cluster - txn on follower-2, leader crash before sending appendReq, follower-3 becomes leader])
145AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
146ovsdb_cluster_failure_test 2 3 1 crash-before-sending-append-request 3
147AT_CLEANUP
148
149AT_SETUP([OVSDB cluster - txn on follower-2, leader crash before sending execRep, follower-2 becomes leader])
150AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
eb692258
HZ
151ovsdb_cluster_failure_test 2 3 1 crash-before-sending-execute-command-reply 2
152AT_CLEANUP
153
154AT_SETUP([OVSDB cluster - txn on follower-2, leader crash before sending execRep, follower-3 becomes leader])
155AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
eb692258
HZ
156ovsdb_cluster_failure_test 2 3 1 crash-before-sending-execute-command-reply 3
157AT_CLEANUP
158
159AT_SETUP([OVSDB cluster - txn on follower-2, leader crash after sending execRep, follower-2 becomes leader])
160AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
eb692258
HZ
161ovsdb_cluster_failure_test 2 3 1 crash-after-sending-execute-command-reply 2
162AT_CLEANUP
163
164AT_SETUP([OVSDB cluster - txn on follower-2, leader crash after sending execRep, follower-3 becomes leader])
165AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
166ovsdb_cluster_failure_test 2 3 1 crash-after-sending-execute-command-reply 3
167AT_CLEANUP
168
169AT_SETUP([OVSDB cluster - txn on leader, leader crash before sending appendReq, follower-2 becomes leader])
170AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
eb692258
HZ
171ovsdb_cluster_failure_test 1 2 1 crash-before-sending-append-request 2
172AT_CLEANUP
173
174AT_SETUP([OVSDB cluster - txn on leader, leader crash before sending appendReq, follower-3 becomes leader])
175AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
176ovsdb_cluster_failure_test 1 2 1 crash-before-sending-append-request 3
177AT_CLEANUP
178
179AT_SETUP([OVSDB cluster - txn on leader, leader crash after sending appendReq, follower-2 becomes leader])
180AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
181# XXX: Detect and skip repeated transaction before enabling this test
182AT_CHECK([exit 77])
183ovsdb_cluster_failure_test 1 2 1 crash-after-sending-append-request 2
184AT_CLEANUP
185
186AT_SETUP([OVSDB cluster - txn on leader, leader crash after sending appendReq, follower-3 becomes leader])
187AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
188# XXX: Detect and skip repeated transaction before enabling this test
189AT_CHECK([exit 77])
190ovsdb_cluster_failure_test 1 2 1 crash-after-sending-append-request 3
191AT_CLEANUP
192
193AT_SETUP([OVSDB cluster - txn on follower-2, follower-2 crash before sending execReq, reconnect to follower-3])
194AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
195ovsdb_cluster_failure_test 2 3 2 crash-before-sending-execute-command-request
196AT_CLEANUP
197
198AT_SETUP([OVSDB cluster - txn on follower-2, follower-2 crash before sending execReq, reconnect to leader])
199AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
200ovsdb_cluster_failure_test 2 1 2 crash-before-sending-execute-command-request
201AT_CLEANUP
202
203AT_SETUP([OVSDB cluster - txn on follower-2, follower-2 crash after sending execReq, reconnect to follower-3])
204AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
205# XXX: Detect and skip repeated transaction before enabling this test
206AT_CHECK([exit 77])
207ovsdb_cluster_failure_test 2 3 2 crash-after-sending-execute-command-request
208AT_CLEANUP
209
210AT_SETUP([OVSDB cluster - txn on follower-2, follower-2 crash after sending execReq, reconnect to leader])
211AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
212# XXX: Detect and skip repeated transaction before enabling this test
213AT_CHECK([exit 77])
214ovsdb_cluster_failure_test 2 1 2 crash-after-sending-execute-command-request
215AT_CLEANUP
216
217AT_SETUP([OVSDB cluster - txn on leader, follower-2 crash after receiving appendReq for the update])
218AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
219ovsdb_cluster_failure_test 1 1 2 crash-after-receiving-append-request-update
220AT_CLEANUP
221
222AT_SETUP([OVSDB cluster - txn on follower-2, follower-3 crash after receiving appendReq for the update])
223AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
224ovsdb_cluster_failure_test 2 2 3 crash-after-receiving-append-request-update
225AT_CLEANUP
226
1b1d2e6d
BP
227\f
228AT_BANNER([OVSDB - cluster tests])
229
230# Torture test.
231OVS_START_SHELL_HELPERS
232ovsdb_torture_test () {
5a0e4aec
BP
233 local n=$1 # Number of cluster members
234 local victim=$2 # Cluster member to kill or remove
1b1d2e6d
BP
235 local variant=$3 # 'kill' and restart or 'remove' and add
236 cp $top_srcdir/ovn/ovn-sb.ovsschema schema
237 schema=`ovsdb-tool schema-name schema`
238 AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db schema unix:s1.raft], [0], [], [dnl
239ovsdb|WARN|schema: changed 2 columns in 'OVN_Southbound' database from ephemeral to persistent, including 'status' column in 'Connection' table, because clusters do not support ephemeral columns
240])
241
242 join_cluster() {
243 local i=$1
5a0e4aec
BP
244 others=
245 for j in `seq 1 $n`; do
246 if test $i != $j; then
247 others="$others unix:s$j.raft"
1b1d2e6d 248 fi
5a0e4aec
BP
249 done
250 AT_CHECK([ovsdb-tool join-cluster s$i.db $schema unix:s$i.raft $others])
1b1d2e6d
BP
251 }
252
253 start_server() {
254 local i=$1
5a0e4aec
BP
255 printf "\ns$i: starting\n"
256 AT_CHECK([ovsdb-server -vjsonrpc -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db])
1b1d2e6d
BP
257 }
258 stop_server() {
259 local i=$1
5a0e4aec 260 printf "\ns$i: stopping\n"
1b1d2e6d
BP
261 OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid])
262 }
263 connect_server() {
264 local i=$1
5a0e4aec 265 printf "\ns$i: waiting to connect to storage\n"
d97af428 266 AT_CHECK([ovsdb_client_wait --log-file=connect$i.log unix:s$i.ovsdb $schema connected])
1b1d2e6d
BP
267 }
268 remove_server() {
269 local i=$1
5a0e4aec
BP
270 printf "\ns$i: removing from cluster\n"
271 AT_CHECK([ovs-appctl --timeout=30 -t "`pwd`"/s$i cluster/leave OVN_Southbound])
272 printf "\ns$i: waiting for removal to complete\n"
d97af428 273 AT_CHECK([ovsdb_client_wait --log-file=remove$i.log unix:s$i.ovsdb $schema removed])
c7b5c534 274 stop_server $i
1b1d2e6d
BP
275 }
276 add_server() {
277 local i=$1
5a0e4aec 278 rm s$i.db
1b1d2e6d 279 join_cluster $i
5a0e4aec
BP
280 start_server $i
281 connect_server $i
1b1d2e6d
BP
282 }
283
284 cid=`ovsdb-tool db-cid s1.db`
285 for i in `seq 2 $n`; do join_cluster $i; done
286
287 on_exit 'kill `cat *.pid`'
288 for i in `seq $n`; do start_server $i; done
289 for i in `seq $n`; do connect_server $i; done
290
291 OVN_SB_DB=unix:s1.ovsdb
292 for i in `seq 2 $n`; do
293 OVN_SB_DB=$OVN_SB_DB,unix:s$i.ovsdb
294 done
295 export OVN_SB_DB
296
0f954f32 297 n1=10 n2=5 n3=50
1b1d2e6d
BP
298 echo "starting $n1*$n2 ovn-sbctl processes..."
299 for i in $(seq 0 $(expr $n1 - 1) ); do
5a0e4aec 300 (for j in $(seq $n2); do
1b1d2e6d 301 : > $i-$j.running
0f954f32
HZ
302 txn="add SB_Global . external_ids $i-$j=$i-$j"
303 for k in $(seq $n3); do
304 txn="$txn -- add SB_Global . external_ids $i-$j-$k=$i-$j-$k"
305 done
306 run_as "ovn-sbctl($i-$j)" ovn-sbctl "-vPATTERN:console:ovn-sbctl($i-$j)|%D{%H:%M:%S}|%05N|%c|%p|%m" --log-file=$i-$j.log -vfile -vsyslog:off -vtimeval:off --timeout=120 --no-leader-only $txn
5a0e4aec
BP
307 status=$?
308 if test $status != 0; then
309 echo "$i-$j exited with status $status" > $i-$j:$status
1b1d2e6d
BP
310 fi
311 rm $i-$j.running
5a0e4aec
BP
312 done
313 : > $i.done)&
1b1d2e6d
BP
314 done
315 echo "...done"
1b1d2e6d
BP
316
317 echo "waiting for ovn-sbctl processes to exit..."
6c8dd8ca 318 # Use file instead of var because code inside "while" runs in a subshell.
7ee9c6e0 319 echo 0 > phase
0f03ae37 320 i=0
0f954f32 321 (while :; do echo; sleep 0.1; done) | while read REPLY; do
1b1d2e6d 322 printf "t=%2d s:" $i
5a0e4aec 323 done=0
1b1d2e6d 324 for j in $(seq 0 $(expr $n1 - 1)); do
5a0e4aec
BP
325 if test -f $j.done; then
326 printf " $j"
327 done=$(expr $done + 1)
1b1d2e6d 328 fi
5a0e4aec
BP
329 done
330 printf '\n'
331 if test $done = $n1; then
332 break
1b1d2e6d
BP
333 fi
334
7ee9c6e0 335 case $(cat phase) in # (
5a0e4aec 336 0)
0f954f32 337 if test $done -ge $(expr $n1 / 10); then
5a0e4aec
BP
338 if test $variant = kill; then
339 stop_server $victim
340 else
341 remove_server $victim
342 fi
7ee9c6e0 343 echo 1 > phase
5a0e4aec 344 next=$(expr $i + 2)
1b1d2e6d 345 fi
5a0e4aec 346 ;; # (
1b1d2e6d 347 1)
5a0e4aec
BP
348 if test $i -ge $next; then
349 if test $variant = kill; then
350 start_server $victim
351 connect_server $victim
352 else
353 add_server $victim
354 fi
7ee9c6e0 355 echo 2 > phase
5a0e4aec
BP
356 fi
357 ;;
358 esac
359
360 i=$(expr $i + 1)
1b1d2e6d
BP
361 done
362 echo "...done"
7ee9c6e0 363 AT_CHECK([if test $(cat phase) != 2; then exit 77; fi])
1b1d2e6d 364
dff60a1e
BP
365 for i in $(seq 0 $(expr $n1 - 1) ); do
366 for j in `seq $n2`; do
367 echo "$i-$j=$i-$j"
0f954f32
HZ
368 for k in `seq $n3`; do
369 echo "$i-$j-$k=$i-$j-$k"
370 done
dff60a1e
BP
371 done
372 done | sort > expout
b6c9325b 373 AT_CHECK([ovn-sbctl --timeout=30 --log-file=finalize.log -vtimeval:off -vfile -vsyslog:off --bare get SB_Global . external-ids | tr ',' '\n' | sed 's/[[{}"" ]]//g' | sort], [0], [expout])
1b1d2e6d
BP
374
375 for i in `seq $n`; do
7ee9c6e0 376 if test $i != $victim || test $(cat phase) != 1; then
5a0e4aec
BP
377 stop_server $i
378 fi
1b1d2e6d
BP
379 done
380
381 # We ignore stdout because non-fatal warnings get printed there.
382 AT_CHECK([ovsdb-tool check-cluster s*.db], [0], [ignore])
383}
384OVS_END_SHELL_HELPERS
385
386AT_SETUP([OVSDB 3-server torture test - kill/restart leader])
387AT_KEYWORDS([ovsdb server positive unix cluster cluster3])
388ovsdb_torture_test 3 1 kill
389AT_CLEANUP
390AT_SETUP([OVSDB 3-server torture test - kill/restart follower 1])
391AT_KEYWORDS([ovsdb server positive unix cluster cluster3])
392ovsdb_torture_test 3 2 kill
393AT_CLEANUP
394AT_SETUP([OVSDB 3-server torture test - kill/restart follower 2])
395AT_KEYWORDS([ovsdb server positive unix cluster cluster3])
396ovsdb_torture_test 3 3 kill
397AT_CLEANUP
398AT_SETUP([OVSDB 5-server torture test - kill/restart leader])
399AT_KEYWORDS([ovsdb server positive unix cluster cluster5])
400ovsdb_torture_test 5 1 kill
401AT_CLEANUP
402AT_SETUP([OVSDB 5-server torture test - kill/restart follower 1])
403AT_KEYWORDS([ovsdb server positive unix cluster cluster5])
404ovsdb_torture_test 5 2 kill
405AT_CLEANUP
406AT_SETUP([OVSDB 5-server torture test - kill/restart follower 2])
407AT_KEYWORDS([ovsdb server positive unix cluster cluster5])
408ovsdb_torture_test 5 3 kill
409AT_CLEANUP
410AT_SETUP([OVSDB 5-server torture test - kill/restart follower 3])
411AT_KEYWORDS([ovsdb server positive unix cluster cluster5])
412ovsdb_torture_test 5 4 kill
413AT_CLEANUP
414AT_SETUP([OVSDB 5-server torture test - kill/restart follower 4])
415AT_KEYWORDS([ovsdb server positive unix cluster cluster5])
416ovsdb_torture_test 5 5 kill
417AT_CLEANUP
418
419AT_SETUP([OVSDB 3-server torture test - remove/re-add leader])
420AT_KEYWORDS([ovsdb server positive unix cluster cluster3])
421ovsdb_torture_test 3 1 remove
422AT_CLEANUP
423AT_SETUP([OVSDB 3-server torture test - remove/re-add follower 1])
424AT_KEYWORDS([ovsdb server positive unix cluster cluster3])
425ovsdb_torture_test 3 2 remove
426AT_CLEANUP
427AT_SETUP([OVSDB 3-server torture test - remove/re-add follower 2])
428AT_KEYWORDS([ovsdb server positive unix cluster cluster3])
429ovsdb_torture_test 3 3 remove
430AT_CLEANUP
431AT_SETUP([OVSDB 5-server torture test - remove/re-add leader])
432AT_KEYWORDS([ovsdb server positive unix cluster cluster5])
433ovsdb_torture_test 5 1 remove
434AT_CLEANUP
435AT_SETUP([OVSDB 5-server torture test - remove/re-add follower 1])
436AT_KEYWORDS([ovsdb server positive unix cluster cluster5])
437ovsdb_torture_test 5 2 remove
438AT_CLEANUP
439AT_SETUP([OVSDB 5-server torture test - remove/re-add follower 2])
440AT_KEYWORDS([ovsdb server positive unix cluster cluster5])
441ovsdb_torture_test 5 3 remove
442AT_CLEANUP
443AT_SETUP([OVSDB 5-server torture test - remove/re-add follower 3])
444AT_KEYWORDS([ovsdb server positive unix cluster cluster5])
445ovsdb_torture_test 5 4 remove
446AT_CLEANUP
447AT_SETUP([OVSDB 5-server torture test - remove/re-add follower 4])
448AT_KEYWORDS([ovsdb server positive unix cluster cluster5])
449ovsdb_torture_test 5 5 remove
450AT_CLEANUP