]> git.proxmox.com Git - mirror_ovs.git/blob - tests/ovsdb-cluster.at
tests: Log commands being executed for async message control test.
[mirror_ovs.git] / tests / ovsdb-cluster.at
1 OVS_START_SHELL_HELPERS
2 # ovsdb_check_cluster N_SERVERS SCHEMA_FUNC OUTPUT TRANSACTION...
3 ovsdb_check_cluster () {
4 local n=$1 schema_func=$2 output=$3
5 shift; shift; shift
6
7 $schema_func > schema
8 schema=`ovsdb-tool schema-name schema`
9 AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db schema unix:s1.raft], [0], [], [stderr])
10 AT_CHECK([grep -v 'from ephemeral to persistent' stderr], [1])
11 cid=`ovsdb-tool db-cid s1.db`
12 for i in `seq 2 $n`; do
13 AT_CHECK([ovsdb-tool join-cluster s$i.db $schema unix:s$i.raft unix:s1.raft])
14 done
15
16 on_exit 'kill `cat *.pid`'
17 for i in `seq $n`; do
18 AT_CHECK([ovsdb-server -vraft -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db])
19 done
20 for i in `seq $n`; do
21 AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema connected])
22 done
23
24 for txn
25 do
26 AT_CHECK([ovsdb-client -vjsonrpc -vconsole:off -vsyslog:off -vvlog:off --log-file transact unix:s1.ovsdb,unix:s2.ovsdb,unix:s3.ovsdb "$txn"], [0], [stdout])
27 cat stdout >> output
28 done
29 AT_CHECK_UNQUOTED([uuidfilt output], [0], [$output])
30 for i in `seq $n`; do
31 OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid])
32 done
33
34 AT_CHECK([ovsdb-tool check-cluster s*.db])
35 }
36 OVS_END_SHELL_HELPERS
37
38 # Test a 1-server cluster.
39 AT_BANNER([OVSDB - clustered transactions (1 server)])
40 m4_define([OVSDB_CHECK_EXECUTION],
41 [AT_SETUP([$1 - cluster of 1])
42 AT_KEYWORDS([ovsdb server positive unix cluster cluster1 $5])
43 ovsdb_check_cluster 1 "$2" '$4' m4_foreach([txn], [$3], ['txn' ])
44 AT_CLEANUP])
45 EXECUTION_EXAMPLES
46
47 # Test a 3-server cluster.
48 AT_BANNER([OVSDB - clustered transactions (3 servers)])
49 m4_define([OVSDB_CHECK_EXECUTION],
50 [AT_SETUP([$1 - cluster of 3])
51 AT_KEYWORDS([ovsdb server positive unix cluster cluster3 $5])
52 ovsdb_check_cluster 3 "$2" '$4' m4_foreach([txn], [$3], ['txn' ])
53 AT_CLEANUP])
54 EXECUTION_EXAMPLES
55
56 # Test a 5-server cluster.
57 AT_BANNER([OVSDB - clustered transactions (5 servers)])
58 m4_define([OVSDB_CHECK_EXECUTION],
59 [AT_SETUP([$1 - cluster of 5])
60 AT_KEYWORDS([ovsdb server positive unix cluster cluster5 $5])
61 ovsdb_check_cluster 5 "$2" '$4' m4_foreach([txn], [$3], ['txn' ])
62 AT_CLEANUP])
63 EXECUTION_EXAMPLES
64 \f
65
66 AT_BANNER([OVSDB - disconnect from cluster])
67
68 OVS_START_SHELL_HELPERS
69 # ovsdb_test_cluster_disconnect N_SERVERS LEADER_OR_FOLLOWER [CHECK_FLAPPING]
70 # Test server disconnected from the cluster.
71 # N_SERVERS: Number of servers in the cluster.
72 # LEADER_OR_FOLLOWER: The role of the server that is disconnected from the
73 # cluster: "leader" or "follower".
74 # CHECK_FLAPPING: Whether to check if is_disconnected flapped. "yes", "no".
75 ovsdb_test_cluster_disconnect () {
76 n=$1
77 leader_or_follower=$2
78 check_flapping=$3
79 schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
80 ordinal_schema > schema
81 AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db $abs_srcdir/idltest.ovsschema unix:s1.raft], [0], [], [stderr])
82 cid=`ovsdb-tool db-cid s1.db`
83 schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
84 for i in `seq 2 $n`; do
85 AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft])
86 done
87
88 on_exit 'kill `cat *.pid`'
89 for i in `seq $n`; do
90 AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db])
91 done
92 for i in `seq $n`; do
93 AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected])
94 done
95
96 AT_CHECK([ovsdb-client transact unix:s1.ovsdb '[["idltest",
97 {"op": "insert",
98 "table": "simple",
99 "row": {"i": 1}}]]'], [0], [ignore], [ignore])
100
101 # When a node is disconnected from the cluster, the IDL should disconnect
102 # and retry even if it uses a single remote, because the remote IP can be
103 # a VIP on a load-balance. So we use single remote to test here.
104 if test $leader_or_follower == "leader"; then
105 target=1
106 shutdown=`seq $(($n/2 + 1)) $n`
107 cleanup=`seq $(($n/2))`
108 else
109 target=$n
110
111 # shutdown followers before the leader (s1) so that there is no chance for
112 # s$n to become leader during the process.
113 shutdown="`seq 2 $(($n/2 + 1))` 1"
114 cleanup=`seq $(($n/2 + 2)) $n`
115 fi
116 echo shutdown=$shutdown
117 echo cleanup=$cleanup
118
119 # Connect to $target. Use "wait" to trigger a non-op transaction so
120 # that test-ovsdb will not quit.
121
122 test-ovsdb '-vPATTERN:console:test-ovsdb|%c|%m' -v -t10 idl unix:s$target.ovsdb '[["idltest",
123 {"op": "wait",
124 "table": "simple",
125 "where": [["i", "==", 1]],
126 "columns": ["i"],
127 "until": "==",
128 "rows": [{"i": 1}]}]]' > test-ovsdb.log 2>&1 &
129 echo $! > test-ovsdb.pid
130
131 OVS_WAIT_UNTIL([grep "000: i=1" test-ovsdb.log])
132
133 # Start collecting raft_is_connected logs for $target before shutting down
134 # any servers.
135 tail -f s$target.log > raft_is_connected.log &
136 echo $! > tail.pid
137
138 # Shutdown the other servers so that $target is disconnected from the cluster.
139 for i in $shutdown; do
140 OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid])
141 done
142
143 # The test-ovsdb should detect the disconnect and retry.
144 OVS_WAIT_UNTIL([grep disconnect test-ovsdb.log])
145
146 # The $target debug log should show raft_is_connected: false.
147 OVS_WAIT_UNTIL([grep "raft_is_connected: false" raft_is_connected.log])
148
149 # Save the current count of "raft_is_connected: true"
150 count_old=`grep "raft_is_connected: true" raft_is_connected.log | wc -l`
151 echo count_old $count_old
152
153 if test X$check_flapping == X"yes"; then
154 sleep 10
155 fi
156 # Make sure raft_is_connected didn't flap from false to true.
157 count_new=`grep "raft_is_connected: true" raft_is_connected.log | wc -l`
158 echo count_new $count_new
159 AT_CHECK([test $count_new == $count_old])
160
161 for i in $cleanup; do
162 OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid])
163 done
164 }
165 OVS_END_SHELL_HELPERS
166
167 AT_SETUP([OVSDB cluster - follower disconnect from cluster, single remote])
168 AT_KEYWORDS([ovsdb server negative unix cluster disconnect])
169 ovsdb_test_cluster_disconnect 3 follower
170 AT_CLEANUP
171
172 AT_SETUP([OVSDB cluster - leader disconnect from cluster, single remote])
173 AT_KEYWORDS([ovsdb server negative unix cluster disconnect])
174 ovsdb_test_cluster_disconnect 3 leader
175 AT_CLEANUP
176
177 AT_SETUP([OVSDB cluster - leader disconnect from cluster, check flapping])
178 AT_KEYWORDS([ovsdb server negative unix cluster disconnect])
179 ovsdb_test_cluster_disconnect 5 leader yes
180 AT_CLEANUP
181
182 \f
183
184 AT_BANNER([OVSDB cluster election timer change])
185
186 AT_SETUP([OVSDB cluster - election timer change])
187 AT_KEYWORDS([ovsdb server positive unix cluster timer])
188
189 n=3
190 schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
191 ordinal_schema > schema
192 AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db $abs_srcdir/idltest.ovsschema unix:s1.raft], [0], [], [stderr])
193 cid=`ovsdb-tool db-cid s1.db`
194 schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
195 for i in `seq 2 $n`; do
196 AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft])
197 done
198
199 on_exit 'kill `cat *.pid`'
200 for i in `seq $n`; do
201 AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db])
202 done
203 for i in `seq $n`; do
204 AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected])
205 done
206
207 # Change not allowed through follower.
208 AT_CHECK([ovs-appctl -t "`pwd`"/s2 cluster/change-election-timer $schema_name 2000], [2], [], [ignore])
209
210 # Timer cannot be changed to bigger than 2x the original value.
211 AT_CHECK([ovs-appctl -t "`pwd`"/s1 cluster/change-election-timer $schema_name 4000], [2], [], [ignore])
212
213 AT_CHECK([ovs-appctl -t "`pwd`"/s1 cluster/change-election-timer $schema_name 2000], [0], [dnl
214 change of election timer initiated.
215 ], [])
216 OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s1 cluster/status $schema_name | grep "Election timer: 2000"])
217 OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s2 cluster/status $schema_name | grep "Election timer: 2000"])
218
219 AT_CHECK([ovs-appctl -t "`pwd`"/s1 cluster/change-election-timer $schema_name 4000], [0], [dnl
220 change of election timer initiated.
221 ], [])
222 OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s1 cluster/status $schema_name | grep "Election timer: 4000"])
223 OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s2 cluster/status $schema_name | grep "Election timer: 4000"])
224
225 # Latest timer should be used after restart
226 for i in `seq $n`; do
227 printf "\ns$i: stopping\n"
228 OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid])
229 done
230 for i in `seq $n`; do
231 AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db])
232 done
233 OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s1 cluster/status $schema_name | grep "Election timer: 4000"])
234 OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s2 cluster/status $schema_name | grep "Election timer: 4000"])
235
236 # Latest timer should be restored after DB compact and restart.
237 # This is to test the install_snapshot RPC.
238
239 # XXX: Insert data before compact, because otherwise transaction will trigger
240 # busy loop after compact.
241 # poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164 (89% CPU usage)
242 AT_CHECK([ovsdb-client transact unix:s1.ovsdb '[["idltest",
243 {"op": "insert",
244 "table": "simple",
245 "row": {"i": 1}}]]'], [0], [ignore], [ignore])
246
247 # Compact online
248 for i in `seq $n`; do
249 AT_CHECK([ovs-appctl -t "`pwd`"/s$i ovsdb-server/compact])
250 done
251
252 # XXX: Insert data after compact, because otherwise vote will fail after
253 # cluster restart after compact. There will be error logs like:
254 # raft|ERR|internal error: deferred vote_request message completed but not ready to send because message index 9 is past last synced index 0: s2 vote_request: term=6 last_log_index=9 last_log_term=4
255 AT_CHECK([ovsdb-client transact unix:s1.ovsdb '[["idltest",
256 {"op": "insert",
257 "table": "simple",
258 "row": {"i": 1}}]]'], [0], [ignore], [ignore])
259
260 for i in `seq $n`; do
261 printf "\ns$i: stopping\n"
262 OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid])
263 done
264 for i in `seq $n`; do
265 AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db])
266 done
267 for i in `seq $n`; do
268 OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s$i cluster/status $schema_name | grep "Election timer: 4000"])
269 done
270
271 # Wait until cluster is ready
272 for i in `seq $n`; do
273 OVS_WAIT_WHILE([ovs-appctl -t "`pwd`"/s$i cluster/status $schema_name | grep "Leader: unknown"])
274 done
275
276 # Newly joined member should use latest timer value
277 AT_CHECK([ovsdb-tool join-cluster s4.db $schema_name unix:s4.raft unix:s1.raft])
278 AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s4.log --pidfile=s4.pid --unixctl=s4 --remote=punix:s4.ovsdb s4.db])
279 OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s4 cluster/status $schema_name | grep "Election timer: 4000"])
280 # for i in `seq 10`; do
281 # ovs-appctl -t "`pwd`"/s4 cluster/status $schema_name
282 # sleep 1
283 # done
284
285 AT_CLEANUP
286
287 \f
288
289 OVS_START_SHELL_HELPERS
290 # ovsdb_cluster_failure_test SCHEMA_FUNC OUTPUT TRANSACTION...
291 ovsdb_cluster_failure_test () {
292 # Initial state: s1 is leader, s2 and s3 are followers
293 remote_1=$1
294 remote_2=$2
295 crash_node=$3
296 crash_command=$4
297 if test "$crash_node" == "1"; then
298 new_leader=$5
299 fi
300
301 cp $top_srcdir/vswitchd/vswitch.ovsschema schema
302 schema=`ovsdb-tool schema-name schema`
303 AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db schema unix:s1.raft], [0], [], [dnl
304 ovsdb|WARN|schema: changed 30 columns in 'Open_vSwitch' database from ephemeral to persistent, including 'status' column in 'Manager' table, because clusters do not support ephemeral columns
305 ])
306
307 n=3
308 join_cluster() {
309 local i=$1
310 others=
311 for j in `seq 1 $n`; do
312 if test $i != $j; then
313 others="$others unix:s$j.raft"
314 fi
315 done
316 AT_CHECK([ovsdb-tool join-cluster s$i.db $schema unix:s$i.raft $others])
317 }
318 start_server() {
319 local i=$1
320 printf "\ns$i: starting\n"
321 AT_CHECK([ovsdb-server -vjsonrpc -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db])
322 }
323 connect_server() {
324 local i=$1
325 printf "\ns$i: waiting to connect to storage\n"
326 AT_CHECK([ovsdb_client_wait --log-file=connect$i.log unix:s$i.ovsdb $schema connected])
327 }
328 cid=`ovsdb-tool db-cid s1.db`
329 for i in `seq 2 $n`; do join_cluster $i; done
330
331 on_exit 'kill `cat *.pid`'
332 for i in `seq $n`; do start_server $i; done
333 for i in `seq $n`; do connect_server $i; done
334
335 db=unix:s$remote_1.ovsdb,unix:s$remote_2.ovsdb
336
337 # To ensure $new_leader node the new leader, we delay election timer for
338 # the other follower.
339 if test -n "$new_leader"; then
340 if test "$new_leader" == "2"; then
341 delay_election_node=3
342 else
343 delay_election_node=2
344 fi
345 AT_CHECK([ovs-appctl -t "`pwd`"/s$delay_election_node cluster/failure-test delay-election], [0], [ignore])
346 fi
347 AT_CHECK([ovs-appctl -t "`pwd`"/s$crash_node cluster/failure-test $crash_command], [0], [ignore])
348 AT_CHECK([ovs-vsctl -v --db="$db" --no-leader-only --no-shuffle-remotes --no-wait create QoS type=x], [0], [ignore], [ignore])
349
350 # Make sure that the node really crashed.
351 AT_CHECK([ls s$crash_node.ovsdb], [2], [ignore], [ignore])
352 # XXX: Client will fail if remotes contains unix socket that doesn't exist (killed).
353 if test "$remote_1" = "$crash_node"; then
354 db=unix:s$remote_2.ovsdb
355 fi
356 AT_CHECK([ovs-vsctl --db="$db" --no-leader-only --no-wait --columns=type --bare list QoS], [0], [x
357 ])
358 }
359 OVS_END_SHELL_HELPERS
360 AT_BANNER([OVSDB - cluster failure with pending transaction])
361
362 AT_SETUP([OVSDB cluster - txn on follower-2, leader crash before sending appendReq, follower-2 becomes leader])
363 AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
364 ovsdb_cluster_failure_test 2 3 1 crash-before-sending-append-request 2
365 AT_CLEANUP
366
367 AT_SETUP([OVSDB cluster - txn on follower-2, leader crash before sending appendReq, follower-3 becomes leader])
368 AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
369 ovsdb_cluster_failure_test 2 3 1 crash-before-sending-append-request 3
370 AT_CLEANUP
371
372 AT_SETUP([OVSDB cluster - txn on follower-2, leader crash before sending execRep, follower-2 becomes leader])
373 AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
374 ovsdb_cluster_failure_test 2 3 1 crash-before-sending-execute-command-reply 2
375 AT_CLEANUP
376
377 AT_SETUP([OVSDB cluster - txn on follower-2, leader crash before sending execRep, follower-3 becomes leader])
378 AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
379 ovsdb_cluster_failure_test 2 3 1 crash-before-sending-execute-command-reply 3
380 AT_CLEANUP
381
382 AT_SETUP([OVSDB cluster - txn on follower-2, leader crash after sending execRep, follower-2 becomes leader])
383 AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
384 ovsdb_cluster_failure_test 2 3 1 crash-after-sending-execute-command-reply 2
385 AT_CLEANUP
386
387 AT_SETUP([OVSDB cluster - txn on follower-2, leader crash after sending execRep, follower-3 becomes leader])
388 AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
389 ovsdb_cluster_failure_test 2 3 1 crash-after-sending-execute-command-reply 3
390 AT_CLEANUP
391
392 AT_SETUP([OVSDB cluster - txn on leader, leader crash before sending appendReq, follower-2 becomes leader])
393 AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
394 ovsdb_cluster_failure_test 1 2 1 crash-before-sending-append-request 2
395 AT_CLEANUP
396
397 AT_SETUP([OVSDB cluster - txn on leader, leader crash before sending appendReq, follower-3 becomes leader])
398 AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
399 ovsdb_cluster_failure_test 1 2 1 crash-before-sending-append-request 3
400 AT_CLEANUP
401
402 AT_SETUP([OVSDB cluster - txn on leader, leader crash after sending appendReq, follower-2 becomes leader])
403 AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
404 # XXX: Detect and skip repeated transaction before enabling this test
405 AT_CHECK([exit 77])
406 ovsdb_cluster_failure_test 1 2 1 crash-after-sending-append-request 2
407 AT_CLEANUP
408
409 AT_SETUP([OVSDB cluster - txn on leader, leader crash after sending appendReq, follower-3 becomes leader])
410 AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
411 # XXX: Detect and skip repeated transaction before enabling this test
412 AT_CHECK([exit 77])
413 ovsdb_cluster_failure_test 1 2 1 crash-after-sending-append-request 3
414 AT_CLEANUP
415
416 AT_SETUP([OVSDB cluster - txn on follower-2, follower-2 crash before sending execReq, reconnect to follower-3])
417 AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
418 ovsdb_cluster_failure_test 2 3 2 crash-before-sending-execute-command-request
419 AT_CLEANUP
420
421 AT_SETUP([OVSDB cluster - txn on follower-2, follower-2 crash before sending execReq, reconnect to leader])
422 AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
423 ovsdb_cluster_failure_test 2 1 2 crash-before-sending-execute-command-request
424 AT_CLEANUP
425
426 AT_SETUP([OVSDB cluster - txn on follower-2, follower-2 crash after sending execReq, reconnect to follower-3])
427 AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
428 # XXX: Detect and skip repeated transaction before enabling this test
429 AT_CHECK([exit 77])
430 ovsdb_cluster_failure_test 2 3 2 crash-after-sending-execute-command-request
431 AT_CLEANUP
432
433 AT_SETUP([OVSDB cluster - txn on follower-2, follower-2 crash after sending execReq, reconnect to leader])
434 AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
435 # XXX: Detect and skip repeated transaction before enabling this test
436 AT_CHECK([exit 77])
437 ovsdb_cluster_failure_test 2 1 2 crash-after-sending-execute-command-request
438 AT_CLEANUP
439
440 AT_SETUP([OVSDB cluster - txn on leader, follower-2 crash after receiving appendReq for the update])
441 AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
442 ovsdb_cluster_failure_test 1 1 2 crash-after-receiving-append-request-update
443 AT_CLEANUP
444
445 AT_SETUP([OVSDB cluster - txn on follower-2, follower-3 crash after receiving appendReq for the update])
446 AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
447 ovsdb_cluster_failure_test 2 2 3 crash-after-receiving-append-request-update
448 AT_CLEANUP
449
450 \f
451 AT_BANNER([OVSDB - cluster tests])
452
453 # Torture test.
454 OVS_START_SHELL_HELPERS
455 ovsdb_torture_test () {
456 local n=$1 # Number of cluster members
457 local victim=$2 # Cluster member to kill or remove
458 local variant=$3 # 'kill' and restart or 'remove' and add
459 cp $top_srcdir/vswitchd/vswitch.ovsschema schema
460 schema=`ovsdb-tool schema-name schema`
461 AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db schema unix:s1.raft], [0], [], [dnl
462 ovsdb|WARN|schema: changed 30 columns in 'Open_vSwitch' database from ephemeral to persistent, including 'status' column in 'Manager' table, because clusters do not support ephemeral columns
463 ])
464
465 join_cluster() {
466 local i=$1
467 others=
468 for j in `seq 1 $n`; do
469 if test $i != $j; then
470 others="$others unix:s$j.raft"
471 fi
472 done
473 AT_CHECK([ovsdb-tool join-cluster s$i.db $schema unix:s$i.raft $others])
474 }
475
476 start_server() {
477 local i=$1
478 printf "\ns$i: starting\n"
479 AT_CHECK([ovsdb-server -vjsonrpc -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db])
480 }
481 stop_server() {
482 local i=$1
483 printf "\ns$i: stopping\n"
484 OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid])
485 }
486 connect_server() {
487 local i=$1
488 printf "\ns$i: waiting to connect to storage\n"
489 AT_CHECK([ovsdb_client_wait --log-file=connect$i.log unix:s$i.ovsdb $schema connected])
490 }
491 remove_server() {
492 local i=$1
493 printf "\ns$i: removing from cluster\n"
494 AT_CHECK([ovs-appctl -t "`pwd`"/s$i cluster/leave Open_vSwitch])
495 printf "\ns$i: waiting for removal to complete\n"
496 AT_CHECK([ovsdb_client_wait --log-file=remove$i.log unix:s$i.ovsdb $schema removed])
497 stop_server $i
498 }
499 add_server() {
500 local i=$1
501 rm s$i.db
502 join_cluster $i
503 start_server $i
504 connect_server $i
505 }
506
507 cid=`ovsdb-tool db-cid s1.db`
508 for i in `seq 2 $n`; do join_cluster $i; done
509
510 on_exit 'kill `cat *.pid`'
511 for i in `seq $n`; do start_server $i; done
512 for i in `seq $n`; do connect_server $i; done
513
514 db=unix:s1.ovsdb
515 for i in `seq 2 $n`; do
516 db=$db,unix:s$i.ovsdb
517 done
518
519 n1=10 n2=5 n3=50
520 echo "starting $n1*$n2 ovs-vsctl processes..."
521 for i in $(seq 0 $(expr $n1 - 1) ); do
522 (for j in $(seq $n2); do
523 : > $i-$j.running
524 txn="add Open_vSwitch . external_ids $i-$j=$i-$j"
525 for k in $(seq $n3); do
526 txn="$txn -- add Open_vSwitch . external_ids $i-$j-$k=$i-$j-$k"
527 done
528 run_as "ovs-vsctl($i-$j)" ovs-vsctl "-vPATTERN:console:ovs-vsctl($i-$j)|%D{%H:%M:%S}|%05N|%c|%p|%m" --log-file=$i-$j.log -vfile -vsyslog:off -vtimeval:off --timeout=120 --db="$db" --no-leader-only --no-wait $txn
529 status=$?
530 if test $status != 0; then
531 echo "$i-$j exited with status $status" > $i-$j:$status
532 fi
533 rm $i-$j.running
534 done
535 : > $i.done)&
536 done
537 echo "...done"
538
539 echo "waiting for ovs-vsctl processes to exit..."
540 # Use file instead of var because code inside "while" runs in a subshell.
541 echo 0 > phase
542 i=0
543 (while :; do echo; sleep 0.1; done) | while read REPLY; do
544 printf "t=%2d s:" $i
545 done=0
546 for j in $(seq 0 $(expr $n1 - 1)); do
547 if test -f $j.done; then
548 printf " $j"
549 done=$(expr $done + 1)
550 fi
551 done
552 printf '\n'
553 if test $done = $n1; then
554 break
555 fi
556
557 case $(cat phase) in # (
558 0)
559 if test $done -ge $(expr $n1 / 10); then
560 if test $variant = kill; then
561 stop_server $victim
562 else
563 remove_server $victim
564 fi
565 echo 1 > phase
566 next=$(expr $i + 2)
567 fi
568 ;; # (
569 1)
570 if test $i -ge $next; then
571 if test $variant = kill; then
572 start_server $victim
573 connect_server $victim
574 else
575 add_server $victim
576 fi
577 echo 2 > phase
578 fi
579 ;;
580 esac
581
582 i=$(expr $i + 1)
583 done
584 echo "...done"
585 AT_CHECK([if test $(cat phase) != 2; then exit 77; fi])
586
587 for i in $(seq 0 $(expr $n1 - 1) ); do
588 for j in `seq $n2`; do
589 echo "$i-$j=$i-$j"
590 for k in `seq $n3`; do
591 echo "$i-$j-$k=$i-$j-$k"
592 done
593 done
594 done | sort > expout
595 AT_CHECK([ovs-vsctl --db="$db" --no-wait --log-file=finalize.log -vtimeval:off -vfile -vsyslog:off --bare get Open_vSwitch . external-ids | tr ',' '\n' | sed 's/[[{}"" ]]//g' | sort], [0], [expout])
596
597 for i in `seq $n`; do
598 if test $i != $victim || test $(cat phase) != 1; then
599 stop_server $i
600 fi
601 done
602
603 # We ignore stdout because non-fatal warnings get printed there.
604 AT_CHECK([ovsdb-tool check-cluster s*.db], [0], [ignore])
605 }
606 OVS_END_SHELL_HELPERS
607
608 AT_SETUP([OVSDB 3-server torture test - kill/restart leader])
609 AT_KEYWORDS([ovsdb server positive unix cluster cluster3])
610 ovsdb_torture_test 3 1 kill
611 AT_CLEANUP
612 AT_SETUP([OVSDB 3-server torture test - kill/restart follower 1])
613 AT_KEYWORDS([ovsdb server positive unix cluster cluster3])
614 ovsdb_torture_test 3 2 kill
615 AT_CLEANUP
616 AT_SETUP([OVSDB 3-server torture test - kill/restart follower 2])
617 AT_KEYWORDS([ovsdb server positive unix cluster cluster3])
618 ovsdb_torture_test 3 3 kill
619 AT_CLEANUP
620 AT_SETUP([OVSDB 5-server torture test - kill/restart leader])
621 AT_KEYWORDS([ovsdb server positive unix cluster cluster5])
622 ovsdb_torture_test 5 1 kill
623 AT_CLEANUP
624 AT_SETUP([OVSDB 5-server torture test - kill/restart follower 1])
625 AT_KEYWORDS([ovsdb server positive unix cluster cluster5])
626 ovsdb_torture_test 5 2 kill
627 AT_CLEANUP
628 AT_SETUP([OVSDB 5-server torture test - kill/restart follower 2])
629 AT_KEYWORDS([ovsdb server positive unix cluster cluster5])
630 ovsdb_torture_test 5 3 kill
631 AT_CLEANUP
632 AT_SETUP([OVSDB 5-server torture test - kill/restart follower 3])
633 AT_KEYWORDS([ovsdb server positive unix cluster cluster5])
634 ovsdb_torture_test 5 4 kill
635 AT_CLEANUP
636 AT_SETUP([OVSDB 5-server torture test - kill/restart follower 4])
637 AT_KEYWORDS([ovsdb server positive unix cluster cluster5])
638 ovsdb_torture_test 5 5 kill
639 AT_CLEANUP
640
641 AT_SETUP([OVSDB 3-server torture test - remove/re-add leader])
642 AT_KEYWORDS([ovsdb server positive unix cluster cluster3])
643 ovsdb_torture_test 3 1 remove
644 AT_CLEANUP
645 AT_SETUP([OVSDB 3-server torture test - remove/re-add follower 1])
646 AT_KEYWORDS([ovsdb server positive unix cluster cluster3])
647 ovsdb_torture_test 3 2 remove
648 AT_CLEANUP
649 AT_SETUP([OVSDB 3-server torture test - remove/re-add follower 2])
650 AT_KEYWORDS([ovsdb server positive unix cluster cluster3])
651 ovsdb_torture_test 3 3 remove
652 AT_CLEANUP
653 AT_SETUP([OVSDB 5-server torture test - remove/re-add leader])
654 AT_KEYWORDS([ovsdb server positive unix cluster cluster5])
655 ovsdb_torture_test 5 1 remove
656 AT_CLEANUP
657 AT_SETUP([OVSDB 5-server torture test - remove/re-add follower 1])
658 AT_KEYWORDS([ovsdb server positive unix cluster cluster5])
659 ovsdb_torture_test 5 2 remove
660 AT_CLEANUP
661 AT_SETUP([OVSDB 5-server torture test - remove/re-add follower 2])
662 AT_KEYWORDS([ovsdb server positive unix cluster cluster5])
663 ovsdb_torture_test 5 3 remove
664 AT_CLEANUP
665 AT_SETUP([OVSDB 5-server torture test - remove/re-add follower 3])
666 AT_KEYWORDS([ovsdb server positive unix cluster cluster5])
667 ovsdb_torture_test 5 4 remove
668 AT_CLEANUP
669 AT_SETUP([OVSDB 5-server torture test - remove/re-add follower 4])
670 AT_KEYWORDS([ovsdb server positive unix cluster cluster5])
671 ovsdb_torture_test 5 5 remove
672 AT_CLEANUP