]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | #!/bin/bash |
2 | # vim: ts=8 sw=2 smarttab | |
3 | # | |
4 | # run_seed_to.sh - Run ceph_test_filestore_idempotent_sequence up until an | |
5 | # injection point, generating a sequence of operations based on a | |
6 | # provided seed. | |
7 | # | |
8 | # We also perform three additional tests, focused on assessing if | |
9 | # replaying a larger chunck of the journal affects the expected store | |
10 | # behavior. These tests will be performed by increasing the store's | |
11 | # journal sync interval to a very large value, allowing the store to | |
12 | # finish execution before the first sync (unless the store runs for | |
13 | # over 10 hours, case on which the interval variables must be changed | |
14 | # to an appropriate value). Unless the '--no-journal-test' option is | |
15 | # specified, we will run the 3 following scenarios: | |
16 | # | |
17 | # 1) journal sync'ing for both stores is good as disabled | |
18 | # (we call it '00', for store naming purposes) | |
19 | # 2) journal sync'ing for store A is as good as disabled | |
20 | # (we call it '01', for store naming purposes) | |
21 | # 3) journal sync'ing for store B is as good as disabled | |
22 | # (we call it '10', for store naming purposes) | |
23 | # | |
24 | # All log files are also appropriately named accordingly (i.e., a.00.fail, | |
25 | # a.10.recover, or b.01.clean). | |
26 | # | |
27 | # By default, the test will not exit on error, although it will show the | |
28 | # fail message. This behavior is so defined so we run the whole battery of | |
29 | # tests, and obtain as many mismatches as possible in one go. We may force | |
30 | # the test to exit on error by specifying the '--exit-on-error' option. | |
31 | # | |
32 | # | |
33 | set -e | |
34 | ||
35 | test_opts="" | |
36 | ||
37 | usage() { | |
38 | echo "usage: $1 [options..] <seed> <kill-at>" | |
39 | echo | |
40 | echo "options:" | |
41 | echo " -c, --colls <VAL> # of collections" | |
42 | echo " -o, --objs <VAL> # of objects" | |
43 | echo " -b, --btrfs <VAL> seq number for btrfs stores" | |
44 | echo " --no-journal-test don't perform journal replay tests" | |
45 | echo " -e, --exit-on-error exit with 1 on error" | |
46 | echo " -v, --valgrind run commands through valgrind" | |
47 | echo | |
48 | echo "env vars:" | |
49 | echo " OPTS_STORE additional opts for both stores" | |
50 | echo " OPTS_STORE_A additional opts for store A" | |
51 | echo " OPTS_STORE_B additional opts for store B" | |
52 | echo | |
53 | } | |
54 | ||
55 | die_on_missing_arg() { | |
56 | if [[ "$2" == "" ]]; then | |
57 | echo "$1: missing required parameter" | |
58 | exit 1 | |
59 | fi | |
60 | } | |
61 | ||
62 | ||
63 | required_args=2 | |
64 | obtained_args=0 | |
65 | ||
66 | seed="" | |
67 | killat="" | |
68 | on_btrfs=0 | |
69 | on_btrfs_seq=0 | |
70 | journal_test=1 | |
71 | min_sync_interval="36000" # ten hours, yes. | |
72 | max_sync_interval="36001" | |
73 | exit_on_error=0 | |
74 | v="" | |
75 | ||
76 | do_rm() { | |
77 | if [[ $on_btrfs -eq 0 ]]; then | |
78 | rm -fr $* | |
79 | fi | |
80 | } | |
81 | ||
82 | set_arg() { | |
83 | if [[ $1 -eq 1 ]]; then | |
84 | seed=$2 | |
85 | elif [[ $1 -eq 2 ]]; then | |
86 | killat=$2 | |
87 | else | |
88 | echo "error: unknown purpose for '$2'" | |
89 | usage $0 | |
90 | exit 1 | |
91 | fi | |
92 | } | |
93 | ||
94 | while [[ $# -gt 0 ]]; | |
95 | do | |
96 | case "$1" in | |
97 | -c | --colls) | |
98 | die_on_missing_arg "$1" "$2" | |
99 | test_opts="$test_opts --test-num-colls $2" | |
100 | shift 2 | |
101 | ;; | |
102 | -o | --objs) | |
103 | die_on_missing_arg "$1" "$2" | |
104 | test_opts="$test_opts --test-num-objs $2" | |
105 | shift 2 | |
106 | ;; | |
107 | -h | --help) | |
108 | usage $0 ; | |
109 | exit 0 | |
110 | ;; | |
111 | -b | --btrfs) | |
112 | die_on_missing_arg "$1" "$2" | |
113 | on_btrfs=1 | |
114 | on_btrfs_seq=$2 | |
115 | shift 2 | |
116 | ;; | |
117 | --no-journal-test) | |
118 | journal_test=0 | |
119 | shift | |
120 | ;; | |
121 | -e | --exit-on-error) | |
122 | exit_on_error=1 | |
123 | shift | |
124 | ;; | |
125 | -v | --valgrind) | |
126 | v="valgrind --leak-check=full" | |
127 | shift | |
128 | ;; | |
129 | --) | |
130 | shift | |
131 | break | |
132 | ;; | |
133 | -*) | |
134 | echo "$1: unknown option" >&2 | |
135 | usage $0 | |
136 | exit 1 | |
137 | ;; | |
138 | *) | |
139 | obtained_args=$(($obtained_args+1)) | |
140 | set_arg $obtained_args $1 | |
141 | shift | |
142 | ;; | |
143 | esac | |
144 | done | |
145 | ||
146 | if [[ $obtained_args -ne $required_args ]]; then | |
147 | echo "error: missing argument" | |
148 | usage $0 ; | |
149 | exit 1 | |
150 | fi | |
151 | ||
152 | if [[ "$OPTS_STORE" != "" ]]; then | |
153 | test_opts="$test_opts $OPTS_STORE" | |
154 | fi | |
155 | ||
156 | test_opts_a="$test_opts" | |
157 | test_opts_b="$test_opts" | |
158 | ||
159 | if [[ "$OPTS_STORE_A" != "" ]]; then | |
160 | test_opts_a="$test_opts_a $OPTS_STORE_A" | |
161 | fi | |
162 | if [[ "$OPTS_STORE_B" != "" ]]; then | |
163 | test_opts_b="$test_opts_b $OPTS_STORE_B" | |
164 | fi | |
165 | ||
166 | echo seed $seed | |
167 | echo kill at $killat | |
168 | ||
169 | # run forever, until $killat... | |
170 | to=1000000000 | |
171 | ||
172 | # | |
173 | # store names | |
174 | # | |
175 | # We need these for two reasons: | |
176 | # 1) if we are running the tests on a btrfs volume, then we need to use | |
177 | # a seq number for each run. Being on btrfs means we will fail when | |
178 | # removing the store's directories and it's far more simple to just | |
179 | # specify differente store names such as 'a.$seq' or 'b.$seq'. | |
180 | # | |
181 | # 2) unless the '--no-journal-test' option is specified, we will run | |
182 | # three additional tests for each store, and we will reuse the same | |
183 | # command for each one of the runs, but varying the store's name and | |
184 | # arguments. | |
185 | # | |
186 | store_a="a" | |
187 | store_b="b" | |
188 | ||
189 | if [[ $on_btrfs -eq 1 ]]; then | |
190 | store_a="$store_a.$on_btrfs_seq" | |
191 | store_b="$store_b.$on_btrfs_seq" | |
192 | fi | |
193 | ||
194 | total_runs=1 | |
195 | ||
196 | if [[ $journal_test -eq 1 ]]; then | |
197 | total_runs=$(($total_runs + 3)) | |
198 | fi | |
199 | ||
200 | num_runs=0 | |
201 | ||
202 | opt_min_sync="--filestore-min-sync-interval $min_sync_interval" | |
203 | opt_max_sync="--filestore-max-sync-interval $max_sync_interval" | |
204 | ||
205 | ret=0 | |
206 | ||
207 | while [[ $num_runs -lt $total_runs ]]; | |
208 | do | |
209 | tmp_name_a=$store_a | |
210 | tmp_name_b=$store_b | |
211 | tmp_opts_a=$test_opts_a | |
212 | tmp_opts_b=$test_opts_b | |
213 | ||
214 | # | |
215 | # We have already tested whether there are diffs when both journals | |
216 | # are properly working. Now let's try on three other scenarios: | |
217 | # 1) journal sync'ing for both stores is good as disabled | |
218 | # (we call it '00') | |
219 | # 2) journal sync'ing for store A is as good as disabled | |
220 | # (we call it '01') | |
221 | # 3) journal sync'ing for store B is as good as disabled | |
222 | # (we call it '10') | |
223 | # | |
224 | if [[ $num_runs -gt 0 && $journal_test -eq 1 ]]; then | |
225 | echo "run #$num_runs" | |
226 | case $num_runs in | |
227 | 1) | |
228 | tmp_name_a="$tmp_name_a.00" | |
229 | tmp_name_b="$tmp_name_b.00" | |
230 | tmp_opts_a="$tmp_opts_a $opt_min_sync $opt_max_sync" | |
231 | tmp_opts_b="$tmp_opts_b $opt_min_sync $opt_max_sync" | |
232 | ;; | |
233 | 2) | |
234 | tmp_name_a="$tmp_name_a.01" | |
235 | tmp_name_b="$tmp_name_b.01" | |
236 | tmp_opts_a="$tmp_opts_a $opt_min_sync $opt_max_sync" | |
237 | ;; | |
238 | 3) | |
239 | tmp_name_a="$tmp_name_a.10" | |
240 | tmp_name_b="$tmp_name_b.10" | |
241 | tmp_opts_b="$tmp_opts_b $opt_min_sync $opt_max_sync" | |
242 | ;; | |
243 | esac | |
244 | fi | |
245 | ||
246 | do_rm $tmp_name_a $tmp_name_a.fail $tmp_name_a.recover | |
247 | $v ceph_test_filestore_idempotent_sequence run-sequence-to $to \ | |
248 | $tmp_name_a $tmp_name_a/journal \ | |
249 | --test-seed $seed --osd-journal-size 100 \ | |
250 | --filestore-kill-at $killat $tmp_opts_a \ | |
251 | --log-file $tmp_name_a.fail --debug-filestore 20 || true | |
252 | ||
253 | stop_at=`ceph_test_filestore_idempotent_sequence get-last-op \ | |
254 | $tmp_name_a $tmp_name_a/journal \ | |
255 | --log-file $tmp_name_a.recover \ | |
256 | --debug-filestore 20 --debug-journal 20` | |
257 | ||
258 | if [[ "`expr $stop_at - $stop_at 2>/dev/null`" != "0" ]]; then | |
259 | echo "error: get-last-op returned '$stop_at'" | |
260 | exit 1 | |
261 | fi | |
262 | ||
263 | echo stopped at $stop_at | |
264 | ||
265 | do_rm $tmp_name_b $tmp_name_b.clean | |
266 | $v ceph_test_filestore_idempotent_sequence run-sequence-to \ | |
267 | $stop_at $tmp_name_b $tmp_name_b/journal \ | |
268 | --test-seed $seed --osd-journal-size 100 \ | |
269 | --log-file $tmp_name_b.clean --debug-filestore 20 $tmp_opts_b | |
270 | ||
271 | if $v ceph_test_filestore_idempotent_sequence diff \ | |
272 | $tmp_name_a $tmp_name_a/journal $tmp_name_b $tmp_name_b/journal ; then | |
273 | echo OK | |
274 | else | |
275 | echo "FAIL" | |
276 | echo " see:" | |
277 | echo " $tmp_name_a.fail -- leading up to failure" | |
278 | echo " $tmp_name_a.recover -- journal replay" | |
279 | echo " $tmp_name_b.clean -- the clean reference" | |
280 | ||
281 | ret=1 | |
282 | if [[ $exit_on_error -eq 1 ]]; then | |
283 | exit 1 | |
284 | fi | |
285 | fi | |
286 | ||
287 | num_runs=$(($num_runs+1)) | |
288 | done | |
289 | ||
290 | exit $ret |