]>
Commit | Line | Data |
---|---|---|
1b1d2e6d BP |
1 | OVS_START_SHELL_HELPERS |
2 | # ovsdb_check_cluster N_SERVERS SCHEMA_FUNC OUTPUT TRANSACTION... | |
3 | ovsdb_check_cluster () { | |
4 | local n=$1 schema_func=$2 output=$3 | |
5 | shift; shift; shift | |
6 | ||
7 | $schema_func > schema | |
8 | schema=`ovsdb-tool schema-name schema` | |
9 | AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db schema unix:s1.raft], [0], [], [stderr]) | |
10 | AT_CHECK([grep -v 'from ephemeral to persistent' stderr], [1]) | |
11 | cid=`ovsdb-tool db-cid s1.db` | |
12 | for i in `seq 2 $n`; do | |
13 | AT_CHECK([ovsdb-tool join-cluster s$i.db $schema unix:s$i.raft unix:s1.raft]) | |
14 | done | |
15 | ||
16 | on_exit 'kill `cat *.pid`' | |
17 | for i in `seq $n`; do | |
5a0e4aec | 18 | AT_CHECK([ovsdb-server -vraft -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db]) |
1b1d2e6d BP |
19 | done |
20 | for i in `seq $n`; do | |
d97af428 | 21 | AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema connected]) |
1b1d2e6d BP |
22 | done |
23 | ||
24 | for txn | |
25 | do | |
15394e0f | 26 | AT_CHECK([ovsdb-client -vjsonrpc -vconsole:off -vsyslog:off -vvlog:off --log-file transact unix:s1.ovsdb,unix:s2.ovsdb,unix:s3.ovsdb "$txn"], [0], [stdout]) |
1b1d2e6d BP |
27 | cat stdout >> output |
28 | done | |
29 | AT_CHECK_UNQUOTED([uuidfilt output], [0], [$output]) | |
30 | for i in `seq $n`; do | |
5a0e4aec | 31 | OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid]) |
1b1d2e6d BP |
32 | done |
33 | ||
34 | AT_CHECK([ovsdb-tool check-cluster s*.db]) | |
35 | } | |
36 | OVS_END_SHELL_HELPERS | |
37 | ||
38 | # Test a 1-server cluster. | |
39 | AT_BANNER([OVSDB - clustered transactions (1 server)]) | |
40 | m4_define([OVSDB_CHECK_EXECUTION], | |
41 | [AT_SETUP([$1 - cluster of 1]) | |
42 | AT_KEYWORDS([ovsdb server positive unix cluster cluster1 $5]) | |
43 | ovsdb_check_cluster 1 "$2" '$4' m4_foreach([txn], [$3], ['txn' ]) | |
44 | AT_CLEANUP]) | |
45 | EXECUTION_EXAMPLES | |
46 | ||
47 | # Test a 3-server cluster. | |
48 | AT_BANNER([OVSDB - clustered transactions (3 servers)]) | |
49 | m4_define([OVSDB_CHECK_EXECUTION], | |
50 | [AT_SETUP([$1 - cluster of 3]) | |
51 | AT_KEYWORDS([ovsdb server positive unix cluster cluster3 $5]) | |
52 | ovsdb_check_cluster 3 "$2" '$4' m4_foreach([txn], [$3], ['txn' ]) | |
53 | AT_CLEANUP]) | |
54 | EXECUTION_EXAMPLES | |
55 | ||
56 | # Test a 5-server cluster. | |
57 | AT_BANNER([OVSDB - clustered transactions (5 servers)]) | |
58 | m4_define([OVSDB_CHECK_EXECUTION], | |
59 | [AT_SETUP([$1 - cluster of 5]) | |
60 | AT_KEYWORDS([ovsdb server positive unix cluster cluster5 $5]) | |
61 | ovsdb_check_cluster 5 "$2" '$4' m4_foreach([txn], [$3], ['txn' ]) | |
62 | AT_CLEANUP]) | |
63 | EXECUTION_EXAMPLES | |
eb692258 HZ |
64 | \f |
65 | ||
ca367fa5 HZ |
66 | AT_BANNER([OVSDB - disconnect from cluster]) |
67 | ||
89771c1e | 68 | OVS_START_SHELL_HELPERS |
923f01ca HZ |
69 | # ovsdb_test_cluster_disconnect N_SERVERS LEADER_OR_FOLLOWER [CHECK_FLAPPING] |
70 | # Test server disconnected from the cluster. | |
71 | # N_SERVERS: Number of servers in the cluster. | |
72 | # LEADER_OR_FOLLOWER: The role of the server that is disconnected from the | |
73 | # cluster: "leader" or "follower". | |
74 | # CHECK_FLAPPING: Whether to check if is_disconnected flapped. "yes", "no". | |
89771c1e | 75 | ovsdb_test_cluster_disconnect () { |
923f01ca HZ |
76 | n=$1 |
77 | leader_or_follower=$2 | |
78 | check_flapping=$3 | |
89771c1e HZ |
79 | schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema` |
80 | ordinal_schema > schema | |
81 | AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db $abs_srcdir/idltest.ovsschema unix:s1.raft], [0], [], [stderr]) | |
82 | cid=`ovsdb-tool db-cid s1.db` | |
83 | schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema` | |
923f01ca | 84 | for i in `seq 2 $n`; do |
89771c1e HZ |
85 | AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft]) |
86 | done | |
87 | ||
88 | on_exit 'kill `cat *.pid`' | |
923f01ca | 89 | for i in `seq $n`; do |
89771c1e HZ |
90 | AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db]) |
91 | done | |
923f01ca | 92 | for i in `seq $n`; do |
89771c1e HZ |
93 | AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected]) |
94 | done | |
95 | ||
96 | AT_CHECK([ovsdb-client transact unix:s1.ovsdb '[["idltest", | |
97 | {"op": "insert", | |
98 | "table": "simple", | |
99 | "row": {"i": 1}}]]'], [0], [ignore], [ignore]) | |
100 | ||
101 | # When a node is disconnected from the cluster, the IDL should disconnect | |
102 | # and retry even if it uses a single remote, because the remote IP can be | |
103 | # a VIP on a load-balance. So we use single remote to test here. | |
104 | if test $leader_or_follower == "leader"; then | |
105 | target=1 | |
923f01ca HZ |
106 | shutdown=`seq $(($n/2 + 1)) $n` |
107 | cleanup=`seq $(($n/2))` | |
89771c1e | 108 | else |
923f01ca | 109 | target=$n |
89771c1e | 110 | |
923f01ca HZ |
111 | # shutdown followers before the leader (s1) so that there is no chance for |
112 | # s$n to become leader during the process. | |
113 | shutdown="`seq 2 $(($n/2 + 1))` 1" | |
114 | cleanup=`seq $(($n/2 + 2)) $n` | |
89771c1e | 115 | fi |
923f01ca HZ |
116 | echo shutdown=$shutdown |
117 | echo cleanup=$cleanup | |
89771c1e HZ |
118 | |
119 | # Connect to $target. Use "wait" to trigger a non-op transaction so | |
120 | # that test-ovsdb will not quit. | |
121 | ||
122 | test-ovsdb '-vPATTERN:console:test-ovsdb|%c|%m' -v -t10 idl unix:s$target.ovsdb '[["idltest", | |
123 | {"op": "wait", | |
124 | "table": "simple", | |
125 | "where": [["i", "==", 1]], | |
126 | "columns": ["i"], | |
127 | "until": "==", | |
128 | "rows": [{"i": 1}]}]]' > test-ovsdb.log 2>&1 & | |
129 | echo $! > test-ovsdb.pid | |
ca367fa5 | 130 | |
89771c1e | 131 | OVS_WAIT_UNTIL([grep "000: i=1" test-ovsdb.log]) |
ca367fa5 | 132 | |
923f01ca HZ |
133 | # Start collecting raft_is_connected logs for $target before shutting down |
134 | # any servers. | |
135 | tail -f s$target.log > raft_is_connected.log & | |
136 | echo $! > tail.pid | |
137 | ||
89771c1e HZ |
138 | # Shutdown the other servers so that $target is disconnected from the cluster. |
139 | for i in $shutdown; do | |
140 | OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid]) | |
141 | done | |
142 | ||
143 | # The test-ovsdb should detect the disconnect and retry. | |
144 | OVS_WAIT_UNTIL([grep disconnect test-ovsdb.log]) | |
145 | ||
923f01ca HZ |
146 | # The $target debug log should show raft_is_connected: false. |
147 | OVS_WAIT_UNTIL([grep "raft_is_connected: false" raft_is_connected.log]) | |
148 | ||
149 | # Save the current count of "raft_is_connected: true" | |
150 | count_old=`grep "raft_is_connected: true" raft_is_connected.log | wc -l` | |
151 | echo count_old $count_old | |
152 | ||
153 | if test X$check_flapping == X"yes"; then | |
154 | sleep 10 | |
155 | fi | |
156 | # Make sure raft_is_connected didn't flap from false to true. | |
157 | count_new=`grep "raft_is_connected: true" raft_is_connected.log | wc -l` | |
158 | echo count_new $count_new | |
159 | AT_CHECK([test $count_new == $count_old]) | |
160 | ||
161 | for i in $cleanup; do | |
162 | OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid]) | |
163 | done | |
89771c1e HZ |
164 | } |
165 | OVS_END_SHELL_HELPERS | |
166 | ||
167 | AT_SETUP([OVSDB cluster - follower disconnect from cluster, single remote]) | |
168 | AT_KEYWORDS([ovsdb server negative unix cluster disconnect]) | |
923f01ca | 169 | ovsdb_test_cluster_disconnect 3 follower |
89771c1e HZ |
170 | AT_CLEANUP |
171 | ||
172 | AT_SETUP([OVSDB cluster - leader disconnect from cluster, single remote]) | |
173 | AT_KEYWORDS([ovsdb server negative unix cluster disconnect]) | |
923f01ca HZ |
174 | ovsdb_test_cluster_disconnect 3 leader |
175 | AT_CLEANUP | |
176 | ||
177 | AT_SETUP([OVSDB cluster - leader disconnect from cluster, check flapping]) | |
178 | AT_KEYWORDS([ovsdb server negative unix cluster disconnect]) | |
179 | ovsdb_test_cluster_disconnect 5 leader yes | |
ca367fa5 | 180 | AT_CLEANUP |
89771c1e | 181 | |
ca367fa5 HZ |
182 | \f |
183 | ||
8e354614 HZ |
184 | AT_BANNER([OVSDB cluster election timer change]) |
185 | ||
186 | AT_SETUP([OVSDB cluster - election timer change]) | |
187 | AT_KEYWORDS([ovsdb server positive unix cluster timer]) | |
188 | ||
189 | n=3 | |
190 | schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema` | |
191 | ordinal_schema > schema | |
192 | AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db $abs_srcdir/idltest.ovsschema unix:s1.raft], [0], [], [stderr]) | |
193 | cid=`ovsdb-tool db-cid s1.db` | |
194 | schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema` | |
195 | for i in `seq 2 $n`; do | |
196 | AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft]) | |
197 | done | |
198 | ||
199 | on_exit 'kill `cat *.pid`' | |
200 | for i in `seq $n`; do | |
201 | AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db]) | |
202 | done | |
203 | for i in `seq $n`; do | |
204 | AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected]) | |
205 | done | |
206 | ||
207 | # Change not allowed through follower. | |
208 | AT_CHECK([ovs-appctl -t "`pwd`"/s2 cluster/change-election-timer $schema_name 2000], [2], [], [ignore]) | |
209 | ||
210 | # Timer cannot be changed to bigger than 2x the original value. | |
211 | AT_CHECK([ovs-appctl -t "`pwd`"/s1 cluster/change-election-timer $schema_name 4000], [2], [], [ignore]) | |
212 | ||
213 | AT_CHECK([ovs-appctl -t "`pwd`"/s1 cluster/change-election-timer $schema_name 2000], [0], [dnl | |
214 | change of election timer initiated. | |
215 | ], []) | |
216 | OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s1 cluster/status $schema_name | grep "Election timer: 2000"]) | |
217 | OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s2 cluster/status $schema_name | grep "Election timer: 2000"]) | |
218 | ||
219 | AT_CHECK([ovs-appctl -t "`pwd`"/s1 cluster/change-election-timer $schema_name 4000], [0], [dnl | |
220 | change of election timer initiated. | |
221 | ], []) | |
222 | OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s1 cluster/status $schema_name | grep "Election timer: 4000"]) | |
223 | OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s2 cluster/status $schema_name | grep "Election timer: 4000"]) | |
224 | ||
225 | # Latest timer should be used after restart | |
226 | for i in `seq $n`; do | |
227 | printf "\ns$i: stopping\n" | |
228 | OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid]) | |
229 | done | |
230 | for i in `seq $n`; do | |
231 | AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db]) | |
232 | done | |
233 | OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s1 cluster/status $schema_name | grep "Election timer: 4000"]) | |
234 | OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s2 cluster/status $schema_name | grep "Election timer: 4000"]) | |
235 | ||
9bfb280a HZ |
236 | # Latest timer should be restored after DB compact and restart. |
237 | # This is to test the install_snapshot RPC. | |
238 | ||
239 | # XXX: Insert data before compact, because otherwise transaction will trigger | |
240 | # busy loop after compact. | |
241 | # poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164 (89% CPU usage) | |
242 | AT_CHECK([ovsdb-client transact unix:s1.ovsdb '[["idltest", | |
243 | {"op": "insert", | |
244 | "table": "simple", | |
245 | "row": {"i": 1}}]]'], [0], [ignore], [ignore]) | |
246 | ||
247 | # Compact online | |
248 | for i in `seq $n`; do | |
249 | AT_CHECK([ovs-appctl -t "`pwd`"/s$i ovsdb-server/compact]) | |
250 | done | |
251 | ||
252 | # XXX: Insert data after compact, because otherwise vote will fail after | |
253 | # cluster restart after compact. There will be error logs like: | |
254 | # raft|ERR|internal error: deferred vote_request message completed but not ready to send because message index 9 is past last synced index 0: s2 vote_request: term=6 last_log_index=9 last_log_term=4 | |
255 | AT_CHECK([ovsdb-client transact unix:s1.ovsdb '[["idltest", | |
256 | {"op": "insert", | |
257 | "table": "simple", | |
258 | "row": {"i": 1}}]]'], [0], [ignore], [ignore]) | |
259 | ||
260 | for i in `seq $n`; do | |
261 | printf "\ns$i: stopping\n" | |
262 | OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid]) | |
263 | done | |
264 | for i in `seq $n`; do | |
265 | AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db]) | |
266 | done | |
267 | for i in `seq $n`; do | |
268 | OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s$i cluster/status $schema_name | grep "Election timer: 4000"]) | |
269 | done | |
270 | ||
271 | # Wait until cluster is ready | |
272 | for i in `seq $n`; do | |
273 | OVS_WAIT_WHILE([ovs-appctl -t "`pwd`"/s$i cluster/status $schema_name | grep "Leader: unknown"]) | |
274 | done | |
275 | ||
276 | # Newly joined member should use latest timer value | |
277 | AT_CHECK([ovsdb-tool join-cluster s4.db $schema_name unix:s4.raft unix:s1.raft]) | |
278 | AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s4.log --pidfile=s4.pid --unixctl=s4 --remote=punix:s4.ovsdb s4.db]) | |
279 | OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s4 cluster/status $schema_name | grep "Election timer: 4000"]) | |
280 | # for i in `seq 10`; do | |
281 | # ovs-appctl -t "`pwd`"/s4 cluster/status $schema_name | |
282 | # sleep 1 | |
283 | # done | |
284 | ||
8e354614 HZ |
285 | AT_CLEANUP |
286 | ||
287 | \f | |
288 | ||
eb692258 HZ |
289 | OVS_START_SHELL_HELPERS |
290 | # ovsdb_cluster_failure_test SCHEMA_FUNC OUTPUT TRANSACTION... | |
291 | ovsdb_cluster_failure_test () { | |
292 | # Initial state: s1 is leader, s2 and s3 are followers | |
293 | remote_1=$1 | |
294 | remote_2=$2 | |
295 | crash_node=$3 | |
296 | crash_command=$4 | |
297 | if test "$crash_node" == "1"; then | |
298 | new_leader=$5 | |
299 | fi | |
300 | ||
817db730 | 301 | cp $top_srcdir/vswitchd/vswitch.ovsschema schema |
eb692258 HZ |
302 | schema=`ovsdb-tool schema-name schema` |
303 | AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db schema unix:s1.raft], [0], [], [dnl | |
817db730 | 304 | ovsdb|WARN|schema: changed 30 columns in 'Open_vSwitch' database from ephemeral to persistent, including 'status' column in 'Manager' table, because clusters do not support ephemeral columns |
eb692258 HZ |
305 | ]) |
306 | ||
307 | n=3 | |
308 | join_cluster() { | |
309 | local i=$1 | |
310 | others= | |
311 | for j in `seq 1 $n`; do | |
312 | if test $i != $j; then | |
313 | others="$others unix:s$j.raft" | |
314 | fi | |
315 | done | |
316 | AT_CHECK([ovsdb-tool join-cluster s$i.db $schema unix:s$i.raft $others]) | |
317 | } | |
318 | start_server() { | |
319 | local i=$1 | |
320 | printf "\ns$i: starting\n" | |
321 | AT_CHECK([ovsdb-server -vjsonrpc -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db]) | |
322 | } | |
323 | connect_server() { | |
324 | local i=$1 | |
325 | printf "\ns$i: waiting to connect to storage\n" | |
326 | AT_CHECK([ovsdb_client_wait --log-file=connect$i.log unix:s$i.ovsdb $schema connected]) | |
327 | } | |
328 | cid=`ovsdb-tool db-cid s1.db` | |
329 | for i in `seq 2 $n`; do join_cluster $i; done | |
330 | ||
331 | on_exit 'kill `cat *.pid`' | |
332 | for i in `seq $n`; do start_server $i; done | |
333 | for i in `seq $n`; do connect_server $i; done | |
334 | ||
817db730 | 335 | db=unix:s$remote_1.ovsdb,unix:s$remote_2.ovsdb |
eb692258 HZ |
336 | |
337 | # To ensure $new_leader node the new leader, we delay election timer for | |
338 | # the other follower. | |
339 | if test -n "$new_leader"; then | |
340 | if test "$new_leader" == "2"; then | |
341 | delay_election_node=3 | |
342 | else | |
343 | delay_election_node=2 | |
344 | fi | |
345 | AT_CHECK([ovs-appctl -t "`pwd`"/s$delay_election_node cluster/failure-test delay-election], [0], [ignore]) | |
346 | fi | |
347 | AT_CHECK([ovs-appctl -t "`pwd`"/s$crash_node cluster/failure-test $crash_command], [0], [ignore]) | |
15394e0f | 348 | AT_CHECK([ovs-vsctl -v --db="$db" --no-leader-only --no-shuffle-remotes --no-wait create QoS type=x], [0], [ignore], [ignore]) |
eb692258 HZ |
349 | |
350 | # Make sure that the node really crashed. | |
351 | AT_CHECK([ls s$crash_node.ovsdb], [2], [ignore], [ignore]) | |
352 | # XXX: Client will fail if remotes contains unix socket that doesn't exist (killed). | |
817db730 BP |
353 | if test "$remote_1" = "$crash_node"; then |
354 | db=unix:s$remote_2.ovsdb | |
eb692258 | 355 | fi |
817db730 | 356 | AT_CHECK([ovs-vsctl --db="$db" --no-leader-only --no-wait --columns=type --bare list QoS], [0], [x |
eb692258 HZ |
357 | ]) |
358 | } | |
359 | OVS_END_SHELL_HELPERS | |
360 | AT_BANNER([OVSDB - cluster failure with pending transaction]) | |
361 | ||
362 | AT_SETUP([OVSDB cluster - txn on follower-2, leader crash before sending appendReq, follower-2 becomes leader]) | |
363 | AT_KEYWORDS([ovsdb server negative unix cluster pending-txn]) | |
eb692258 HZ |
364 | ovsdb_cluster_failure_test 2 3 1 crash-before-sending-append-request 2 |
365 | AT_CLEANUP | |
366 | ||
367 | AT_SETUP([OVSDB cluster - txn on follower-2, leader crash before sending appendReq, follower-3 becomes leader]) | |
368 | AT_KEYWORDS([ovsdb server negative unix cluster pending-txn]) | |
369 | ovsdb_cluster_failure_test 2 3 1 crash-before-sending-append-request 3 | |
370 | AT_CLEANUP | |
371 | ||
372 | AT_SETUP([OVSDB cluster - txn on follower-2, leader crash before sending execRep, follower-2 becomes leader]) | |
373 | AT_KEYWORDS([ovsdb server negative unix cluster pending-txn]) | |
eb692258 HZ |
374 | ovsdb_cluster_failure_test 2 3 1 crash-before-sending-execute-command-reply 2 |
375 | AT_CLEANUP | |
376 | ||
377 | AT_SETUP([OVSDB cluster - txn on follower-2, leader crash before sending execRep, follower-3 becomes leader]) | |
378 | AT_KEYWORDS([ovsdb server negative unix cluster pending-txn]) | |
eb692258 HZ |
379 | ovsdb_cluster_failure_test 2 3 1 crash-before-sending-execute-command-reply 3 |
380 | AT_CLEANUP | |
381 | ||
382 | AT_SETUP([OVSDB cluster - txn on follower-2, leader crash after sending execRep, follower-2 becomes leader]) | |
383 | AT_KEYWORDS([ovsdb server negative unix cluster pending-txn]) | |
eb692258 HZ |
384 | ovsdb_cluster_failure_test 2 3 1 crash-after-sending-execute-command-reply 2 |
385 | AT_CLEANUP | |
386 | ||
387 | AT_SETUP([OVSDB cluster - txn on follower-2, leader crash after sending execRep, follower-3 becomes leader]) | |
388 | AT_KEYWORDS([ovsdb server negative unix cluster pending-txn]) | |
389 | ovsdb_cluster_failure_test 2 3 1 crash-after-sending-execute-command-reply 3 | |
390 | AT_CLEANUP | |
391 | ||
392 | AT_SETUP([OVSDB cluster - txn on leader, leader crash before sending appendReq, follower-2 becomes leader]) | |
393 | AT_KEYWORDS([ovsdb server negative unix cluster pending-txn]) | |
eb692258 HZ |
394 | ovsdb_cluster_failure_test 1 2 1 crash-before-sending-append-request 2 |
395 | AT_CLEANUP | |
396 | ||
397 | AT_SETUP([OVSDB cluster - txn on leader, leader crash before sending appendReq, follower-3 becomes leader]) | |
398 | AT_KEYWORDS([ovsdb server negative unix cluster pending-txn]) | |
399 | ovsdb_cluster_failure_test 1 2 1 crash-before-sending-append-request 3 | |
400 | AT_CLEANUP | |
401 | ||
402 | AT_SETUP([OVSDB cluster - txn on leader, leader crash after sending appendReq, follower-2 becomes leader]) | |
403 | AT_KEYWORDS([ovsdb server negative unix cluster pending-txn]) | |
404 | # XXX: Detect and skip repeated transaction before enabling this test | |
405 | AT_CHECK([exit 77]) | |
406 | ovsdb_cluster_failure_test 1 2 1 crash-after-sending-append-request 2 | |
407 | AT_CLEANUP | |
408 | ||
409 | AT_SETUP([OVSDB cluster - txn on leader, leader crash after sending appendReq, follower-3 becomes leader]) | |
410 | AT_KEYWORDS([ovsdb server negative unix cluster pending-txn]) | |
411 | # XXX: Detect and skip repeated transaction before enabling this test | |
412 | AT_CHECK([exit 77]) | |
413 | ovsdb_cluster_failure_test 1 2 1 crash-after-sending-append-request 3 | |
414 | AT_CLEANUP | |
415 | ||
416 | AT_SETUP([OVSDB cluster - txn on follower-2, follower-2 crash before sending execReq, reconnect to follower-3]) | |
417 | AT_KEYWORDS([ovsdb server negative unix cluster pending-txn]) | |
418 | ovsdb_cluster_failure_test 2 3 2 crash-before-sending-execute-command-request | |
419 | AT_CLEANUP | |
420 | ||
421 | AT_SETUP([OVSDB cluster - txn on follower-2, follower-2 crash before sending execReq, reconnect to leader]) | |
422 | AT_KEYWORDS([ovsdb server negative unix cluster pending-txn]) | |
423 | ovsdb_cluster_failure_test 2 1 2 crash-before-sending-execute-command-request | |
424 | AT_CLEANUP | |
425 | ||
426 | AT_SETUP([OVSDB cluster - txn on follower-2, follower-2 crash after sending execReq, reconnect to follower-3]) | |
427 | AT_KEYWORDS([ovsdb server negative unix cluster pending-txn]) | |
428 | # XXX: Detect and skip repeated transaction before enabling this test | |
429 | AT_CHECK([exit 77]) | |
430 | ovsdb_cluster_failure_test 2 3 2 crash-after-sending-execute-command-request | |
431 | AT_CLEANUP | |
432 | ||
433 | AT_SETUP([OVSDB cluster - txn on follower-2, follower-2 crash after sending execReq, reconnect to leader]) | |
434 | AT_KEYWORDS([ovsdb server negative unix cluster pending-txn]) | |
435 | # XXX: Detect and skip repeated transaction before enabling this test | |
436 | AT_CHECK([exit 77]) | |
437 | ovsdb_cluster_failure_test 2 1 2 crash-after-sending-execute-command-request | |
438 | AT_CLEANUP | |
439 | ||
440 | AT_SETUP([OVSDB cluster - txn on leader, follower-2 crash after receiving appendReq for the update]) | |
441 | AT_KEYWORDS([ovsdb server negative unix cluster pending-txn]) | |
442 | ovsdb_cluster_failure_test 1 1 2 crash-after-receiving-append-request-update | |
443 | AT_CLEANUP | |
444 | ||
445 | AT_SETUP([OVSDB cluster - txn on follower-2, follower-3 crash after receiving appendReq for the update]) | |
446 | AT_KEYWORDS([ovsdb server negative unix cluster pending-txn]) | |
447 | ovsdb_cluster_failure_test 2 2 3 crash-after-receiving-append-request-update | |
448 | AT_CLEANUP | |
449 | ||
1b1d2e6d BP |
450 | \f |
451 | AT_BANNER([OVSDB - cluster tests]) | |
452 | ||
453 | # Torture test. | |
454 | OVS_START_SHELL_HELPERS | |
455 | ovsdb_torture_test () { | |
5a0e4aec BP |
456 | local n=$1 # Number of cluster members |
457 | local victim=$2 # Cluster member to kill or remove | |
1b1d2e6d | 458 | local variant=$3 # 'kill' and restart or 'remove' and add |
817db730 | 459 | cp $top_srcdir/vswitchd/vswitch.ovsschema schema |
1b1d2e6d BP |
460 | schema=`ovsdb-tool schema-name schema` |
461 | AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db schema unix:s1.raft], [0], [], [dnl | |
817db730 | 462 | ovsdb|WARN|schema: changed 30 columns in 'Open_vSwitch' database from ephemeral to persistent, including 'status' column in 'Manager' table, because clusters do not support ephemeral columns |
1b1d2e6d BP |
463 | ]) |
464 | ||
465 | join_cluster() { | |
466 | local i=$1 | |
5a0e4aec BP |
467 | others= |
468 | for j in `seq 1 $n`; do | |
469 | if test $i != $j; then | |
470 | others="$others unix:s$j.raft" | |
1b1d2e6d | 471 | fi |
5a0e4aec BP |
472 | done |
473 | AT_CHECK([ovsdb-tool join-cluster s$i.db $schema unix:s$i.raft $others]) | |
1b1d2e6d BP |
474 | } |
475 | ||
476 | start_server() { | |
477 | local i=$1 | |
5a0e4aec BP |
478 | printf "\ns$i: starting\n" |
479 | AT_CHECK([ovsdb-server -vjsonrpc -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db]) | |
1b1d2e6d BP |
480 | } |
481 | stop_server() { | |
482 | local i=$1 | |
5a0e4aec | 483 | printf "\ns$i: stopping\n" |
1b1d2e6d BP |
484 | OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid]) |
485 | } | |
486 | connect_server() { | |
487 | local i=$1 | |
5a0e4aec | 488 | printf "\ns$i: waiting to connect to storage\n" |
d97af428 | 489 | AT_CHECK([ovsdb_client_wait --log-file=connect$i.log unix:s$i.ovsdb $schema connected]) |
1b1d2e6d BP |
490 | } |
491 | remove_server() { | |
492 | local i=$1 | |
5a0e4aec | 493 | printf "\ns$i: removing from cluster\n" |
15394e0f | 494 | AT_CHECK([ovs-appctl -t "`pwd`"/s$i cluster/leave Open_vSwitch]) |
5a0e4aec | 495 | printf "\ns$i: waiting for removal to complete\n" |
d97af428 | 496 | AT_CHECK([ovsdb_client_wait --log-file=remove$i.log unix:s$i.ovsdb $schema removed]) |
c7b5c534 | 497 | stop_server $i |
1b1d2e6d BP |
498 | } |
499 | add_server() { | |
500 | local i=$1 | |
5a0e4aec | 501 | rm s$i.db |
1b1d2e6d | 502 | join_cluster $i |
5a0e4aec BP |
503 | start_server $i |
504 | connect_server $i | |
1b1d2e6d BP |
505 | } |
506 | ||
507 | cid=`ovsdb-tool db-cid s1.db` | |
508 | for i in `seq 2 $n`; do join_cluster $i; done | |
509 | ||
510 | on_exit 'kill `cat *.pid`' | |
511 | for i in `seq $n`; do start_server $i; done | |
512 | for i in `seq $n`; do connect_server $i; done | |
513 | ||
817db730 | 514 | db=unix:s1.ovsdb |
1b1d2e6d | 515 | for i in `seq 2 $n`; do |
817db730 | 516 | db=$db,unix:s$i.ovsdb |
1b1d2e6d | 517 | done |
1b1d2e6d | 518 | |
0f954f32 | 519 | n1=10 n2=5 n3=50 |
817db730 | 520 | echo "starting $n1*$n2 ovs-vsctl processes..." |
1b1d2e6d | 521 | for i in $(seq 0 $(expr $n1 - 1) ); do |
5a0e4aec | 522 | (for j in $(seq $n2); do |
1b1d2e6d | 523 | : > $i-$j.running |
817db730 | 524 | txn="add Open_vSwitch . external_ids $i-$j=$i-$j" |
0f954f32 | 525 | for k in $(seq $n3); do |
817db730 | 526 | txn="$txn -- add Open_vSwitch . external_ids $i-$j-$k=$i-$j-$k" |
0f954f32 | 527 | done |
817db730 | 528 | run_as "ovs-vsctl($i-$j)" ovs-vsctl "-vPATTERN:console:ovs-vsctl($i-$j)|%D{%H:%M:%S}|%05N|%c|%p|%m" --log-file=$i-$j.log -vfile -vsyslog:off -vtimeval:off --timeout=120 --db="$db" --no-leader-only --no-wait $txn |
5a0e4aec BP |
529 | status=$? |
530 | if test $status != 0; then | |
531 | echo "$i-$j exited with status $status" > $i-$j:$status | |
1b1d2e6d BP |
532 | fi |
533 | rm $i-$j.running | |
5a0e4aec BP |
534 | done |
535 | : > $i.done)& | |
1b1d2e6d BP |
536 | done |
537 | echo "...done" | |
1b1d2e6d | 538 | |
817db730 | 539 | echo "waiting for ovs-vsctl processes to exit..." |
6c8dd8ca | 540 | # Use file instead of var because code inside "while" runs in a subshell. |
7ee9c6e0 | 541 | echo 0 > phase |
0f03ae37 | 542 | i=0 |
0f954f32 | 543 | (while :; do echo; sleep 0.1; done) | while read REPLY; do |
1b1d2e6d | 544 | printf "t=%2d s:" $i |
5a0e4aec | 545 | done=0 |
1b1d2e6d | 546 | for j in $(seq 0 $(expr $n1 - 1)); do |
5a0e4aec BP |
547 | if test -f $j.done; then |
548 | printf " $j" | |
549 | done=$(expr $done + 1) | |
1b1d2e6d | 550 | fi |
5a0e4aec BP |
551 | done |
552 | printf '\n' | |
553 | if test $done = $n1; then | |
554 | break | |
1b1d2e6d BP |
555 | fi |
556 | ||
7ee9c6e0 | 557 | case $(cat phase) in # ( |
5a0e4aec | 558 | 0) |
0f954f32 | 559 | if test $done -ge $(expr $n1 / 10); then |
5a0e4aec BP |
560 | if test $variant = kill; then |
561 | stop_server $victim | |
562 | else | |
563 | remove_server $victim | |
564 | fi | |
7ee9c6e0 | 565 | echo 1 > phase |
5a0e4aec | 566 | next=$(expr $i + 2) |
1b1d2e6d | 567 | fi |
5a0e4aec | 568 | ;; # ( |
1b1d2e6d | 569 | 1) |
5a0e4aec BP |
570 | if test $i -ge $next; then |
571 | if test $variant = kill; then | |
572 | start_server $victim | |
573 | connect_server $victim | |
574 | else | |
575 | add_server $victim | |
576 | fi | |
7ee9c6e0 | 577 | echo 2 > phase |
5a0e4aec BP |
578 | fi |
579 | ;; | |
580 | esac | |
581 | ||
582 | i=$(expr $i + 1) | |
1b1d2e6d BP |
583 | done |
584 | echo "...done" | |
7ee9c6e0 | 585 | AT_CHECK([if test $(cat phase) != 2; then exit 77; fi]) |
1b1d2e6d | 586 | |
dff60a1e BP |
587 | for i in $(seq 0 $(expr $n1 - 1) ); do |
588 | for j in `seq $n2`; do | |
589 | echo "$i-$j=$i-$j" | |
0f954f32 HZ |
590 | for k in `seq $n3`; do |
591 | echo "$i-$j-$k=$i-$j-$k" | |
592 | done | |
dff60a1e BP |
593 | done |
594 | done | sort > expout | |
15394e0f | 595 | AT_CHECK([ovs-vsctl --db="$db" --no-wait --log-file=finalize.log -vtimeval:off -vfile -vsyslog:off --bare get Open_vSwitch . external-ids | tr ',' '\n' | sed 's/[[{}"" ]]//g' | sort], [0], [expout]) |
1b1d2e6d BP |
596 | |
597 | for i in `seq $n`; do | |
7ee9c6e0 | 598 | if test $i != $victim || test $(cat phase) != 1; then |
5a0e4aec BP |
599 | stop_server $i |
600 | fi | |
1b1d2e6d BP |
601 | done |
602 | ||
603 | # We ignore stdout because non-fatal warnings get printed there. | |
604 | AT_CHECK([ovsdb-tool check-cluster s*.db], [0], [ignore]) | |
605 | } | |
606 | OVS_END_SHELL_HELPERS | |
607 | ||
608 | AT_SETUP([OVSDB 3-server torture test - kill/restart leader]) | |
609 | AT_KEYWORDS([ovsdb server positive unix cluster cluster3]) | |
610 | ovsdb_torture_test 3 1 kill | |
611 | AT_CLEANUP | |
612 | AT_SETUP([OVSDB 3-server torture test - kill/restart follower 1]) | |
613 | AT_KEYWORDS([ovsdb server positive unix cluster cluster3]) | |
614 | ovsdb_torture_test 3 2 kill | |
615 | AT_CLEANUP | |
616 | AT_SETUP([OVSDB 3-server torture test - kill/restart follower 2]) | |
617 | AT_KEYWORDS([ovsdb server positive unix cluster cluster3]) | |
618 | ovsdb_torture_test 3 3 kill | |
619 | AT_CLEANUP | |
620 | AT_SETUP([OVSDB 5-server torture test - kill/restart leader]) | |
621 | AT_KEYWORDS([ovsdb server positive unix cluster cluster5]) | |
622 | ovsdb_torture_test 5 1 kill | |
623 | AT_CLEANUP | |
624 | AT_SETUP([OVSDB 5-server torture test - kill/restart follower 1]) | |
625 | AT_KEYWORDS([ovsdb server positive unix cluster cluster5]) | |
626 | ovsdb_torture_test 5 2 kill | |
627 | AT_CLEANUP | |
628 | AT_SETUP([OVSDB 5-server torture test - kill/restart follower 2]) | |
629 | AT_KEYWORDS([ovsdb server positive unix cluster cluster5]) | |
630 | ovsdb_torture_test 5 3 kill | |
631 | AT_CLEANUP | |
632 | AT_SETUP([OVSDB 5-server torture test - kill/restart follower 3]) | |
633 | AT_KEYWORDS([ovsdb server positive unix cluster cluster5]) | |
634 | ovsdb_torture_test 5 4 kill | |
635 | AT_CLEANUP | |
636 | AT_SETUP([OVSDB 5-server torture test - kill/restart follower 4]) | |
637 | AT_KEYWORDS([ovsdb server positive unix cluster cluster5]) | |
638 | ovsdb_torture_test 5 5 kill | |
639 | AT_CLEANUP | |
640 | ||
641 | AT_SETUP([OVSDB 3-server torture test - remove/re-add leader]) | |
642 | AT_KEYWORDS([ovsdb server positive unix cluster cluster3]) | |
643 | ovsdb_torture_test 3 1 remove | |
644 | AT_CLEANUP | |
645 | AT_SETUP([OVSDB 3-server torture test - remove/re-add follower 1]) | |
646 | AT_KEYWORDS([ovsdb server positive unix cluster cluster3]) | |
647 | ovsdb_torture_test 3 2 remove | |
648 | AT_CLEANUP | |
649 | AT_SETUP([OVSDB 3-server torture test - remove/re-add follower 2]) | |
650 | AT_KEYWORDS([ovsdb server positive unix cluster cluster3]) | |
651 | ovsdb_torture_test 3 3 remove | |
652 | AT_CLEANUP | |
653 | AT_SETUP([OVSDB 5-server torture test - remove/re-add leader]) | |
654 | AT_KEYWORDS([ovsdb server positive unix cluster cluster5]) | |
655 | ovsdb_torture_test 5 1 remove | |
656 | AT_CLEANUP | |
657 | AT_SETUP([OVSDB 5-server torture test - remove/re-add follower 1]) | |
658 | AT_KEYWORDS([ovsdb server positive unix cluster cluster5]) | |
659 | ovsdb_torture_test 5 2 remove | |
660 | AT_CLEANUP | |
661 | AT_SETUP([OVSDB 5-server torture test - remove/re-add follower 2]) | |
662 | AT_KEYWORDS([ovsdb server positive unix cluster cluster5]) | |
663 | ovsdb_torture_test 5 3 remove | |
664 | AT_CLEANUP | |
665 | AT_SETUP([OVSDB 5-server torture test - remove/re-add follower 3]) | |
666 | AT_KEYWORDS([ovsdb server positive unix cluster cluster5]) | |
667 | ovsdb_torture_test 5 4 remove | |
668 | AT_CLEANUP | |
669 | AT_SETUP([OVSDB 5-server torture test - remove/re-add follower 4]) | |
670 | AT_KEYWORDS([ovsdb server positive unix cluster cluster5]) | |
671 | ovsdb_torture_test 5 5 remove | |
672 | AT_CLEANUP |