]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | #!/bin/bash |
2 | # | |
3 | # Copyright (C) 2015 Red Hat <contact@redhat.com> | |
4 | # | |
5 | # | |
6 | # Author: Kefu Chai <kchai@redhat.com> | |
7 | # | |
8 | # This program is free software; you can redistribute it and/or modify | |
9 | # it under the terms of the GNU Library Public License as published by | |
10 | # the Free Software Foundation; either version 2, or (at your option) | |
11 | # any later version. | |
12 | # | |
13 | # This program is distributed in the hope that it will be useful, | |
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | # GNU Library Public License for more details. | |
17 | # | |
18 | ||
c07f9fc5 | 19 | source $CEPH_ROOT/qa/standalone/ceph-helpers.sh |
7c673cae FG |
20 | |
21 | function run() { | |
22 | local dir=$1 | |
23 | shift | |
24 | ||
25 | export CEPH_MON="127.0.0.1:7112" # git grep '\<7112\>' : there must be only one | |
26 | export CEPH_ARGS | |
27 | CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " | |
28 | CEPH_ARGS+="--mon-host=$CEPH_MON " | |
29 | ||
30 | local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} | |
31 | for func in $funcs ; do | |
32 | setup $dir || return 1 | |
33 | run_mon $dir a || return 1 | |
34 | run_mgr $dir x || return 1 | |
c07f9fc5 FG |
35 | create_rbd_pool || return 1 |
36 | ||
7c673cae | 37 | # check that erasure code plugins are preloaded |
c07f9fc5 | 38 | CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1 |
7c673cae FG |
39 | grep 'load: jerasure.*lrc' $dir/mon.a.log || return 1 |
40 | $func $dir || return 1 | |
41 | teardown $dir || return 1 | |
42 | done | |
43 | } | |
44 | ||
45 | function setup_osds() { | |
b32b8144 FG |
46 | local count=$1 |
47 | shift | |
48 | ||
49 | for id in $(seq 0 $(expr $count - 1)) ; do | |
7c673cae FG |
50 | run_osd $dir $id || return 1 |
51 | done | |
52 | wait_for_clean || return 1 | |
53 | ||
54 | # check that erasure code plugins are preloaded | |
c07f9fc5 | 55 | CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) log flush || return 1 |
7c673cae FG |
56 | grep 'load: jerasure.*lrc' $dir/osd.0.log || return 1 |
57 | } | |
58 | ||
b32b8144 FG |
59 | function get_state() { |
60 | local pgid=$1 | |
61 | local sname=state | |
62 | ceph --format json pg dump pgs 2>/dev/null | \ | |
63 | jq -r ".[] | select(.pgid==\"$pgid\") | .$sname" | |
64 | } | |
65 | ||
7c673cae FG |
66 | function create_erasure_coded_pool() { |
67 | local poolname=$1 | |
b32b8144 FG |
68 | shift |
69 | local k=$1 | |
70 | shift | |
71 | local m=$1 | |
72 | shift | |
7c673cae FG |
73 | |
74 | ceph osd erasure-code-profile set myprofile \ | |
75 | plugin=jerasure \ | |
b32b8144 | 76 | k=$k m=$m \ |
224ce89b | 77 | crush-failure-domain=osd || return 1 |
b5b8bbf5 | 78 | create_pool $poolname 1 1 erasure myprofile \ |
7c673cae FG |
79 | || return 1 |
80 | wait_for_clean || return 1 | |
81 | } | |
82 | ||
28e407b8 | 83 | function delete_erasure_coded_pool() { |
7c673cae | 84 | local poolname=$1 |
7c673cae FG |
85 | ceph osd pool delete $poolname $poolname --yes-i-really-really-mean-it |
86 | ceph osd erasure-code-profile rm myprofile | |
87 | } | |
88 | ||
89 | function rados_put() { | |
90 | local dir=$1 | |
91 | local poolname=$2 | |
92 | local objname=${3:-SOMETHING} | |
93 | ||
94 | for marker in AAA BBB CCCC DDDD ; do | |
95 | printf "%*s" 1024 $marker | |
96 | done > $dir/ORIGINAL | |
97 | # | |
98 | # get and put an object, compare they are equal | |
99 | # | |
100 | rados --pool $poolname put $objname $dir/ORIGINAL || return 1 | |
101 | } | |
102 | ||
103 | function rados_get() { | |
104 | local dir=$1 | |
105 | local poolname=$2 | |
106 | local objname=${3:-SOMETHING} | |
107 | local expect=${4:-ok} | |
108 | ||
109 | # | |
110 | # Expect a failure to get object | |
111 | # | |
112 | if [ $expect = "fail" ]; | |
113 | then | |
114 | ! rados --pool $poolname get $objname $dir/COPY | |
115 | return | |
116 | fi | |
117 | # | |
118 | # get an object, compare with $dir/ORIGINAL | |
119 | # | |
120 | rados --pool $poolname get $objname $dir/COPY || return 1 | |
121 | diff $dir/ORIGINAL $dir/COPY || return 1 | |
122 | rm $dir/COPY | |
123 | } | |
124 | ||
b32b8144 FG |
125 | |
126 | function inject_remove() { | |
127 | local pooltype=$1 | |
128 | shift | |
129 | local which=$1 | |
130 | shift | |
131 | local poolname=$1 | |
132 | shift | |
133 | local objname=$1 | |
134 | shift | |
7c673cae | 135 | local dir=$1 |
b32b8144 FG |
136 | shift |
137 | local shard_id=$1 | |
138 | shift | |
7c673cae | 139 | |
b32b8144 FG |
140 | local -a initial_osds=($(get_osds $poolname $objname)) |
141 | local osd_id=${initial_osds[$shard_id]} | |
142 | objectstore_tool $dir $osd_id $objname remove || return 1 | |
143 | } | |
144 | ||
145 | # Test with an inject error | |
146 | function rados_put_get_data() { | |
147 | local inject=$1 | |
148 | shift | |
149 | local dir=$1 | |
150 | shift | |
151 | local shard_id=$1 | |
152 | shift | |
153 | local arg=$1 | |
154 | ||
155 | # inject eio to speificied shard | |
7c673cae | 156 | # |
b32b8144 FG |
157 | local poolname=pool-jerasure |
158 | local objname=obj-$inject-$$-$shard_id | |
7c673cae | 159 | rados_put $dir $poolname $objname || return 1 |
b32b8144 | 160 | inject_$inject ec data $poolname $objname $dir $shard_id || return 1 |
7c673cae FG |
161 | rados_get $dir $poolname $objname || return 1 |
162 | ||
b32b8144 | 163 | if [ "$arg" = "recovery" ]; |
7c673cae FG |
164 | then |
165 | # | |
166 | # take out the last OSD used to store the object, | |
167 | # bring it back, and check for clean PGs which means | |
168 | # recovery didn't crash the primary. | |
169 | # | |
170 | local -a initial_osds=($(get_osds $poolname $objname)) | |
b32b8144 | 171 | local last_osd=${initial_osds[-1]} |
7c673cae | 172 | # Kill OSD |
b32b8144 FG |
173 | kill_daemons $dir TERM osd.${last_osd} >&2 < /dev/null || return 1 |
174 | ceph osd out ${last_osd} || return 1 | |
175 | ! get_osds $poolname $objname | grep '\<'${last_osd}'\>' || return 1 | |
176 | ceph osd in ${last_osd} || return 1 | |
177 | run_osd $dir ${last_osd} || return 1 | |
7c673cae FG |
178 | wait_for_clean || return 1 |
179 | fi | |
180 | ||
7c673cae | 181 | shard_id=$(expr $shard_id + 1) |
b32b8144 FG |
182 | inject_$inject ec data $poolname $objname $dir $shard_id || return 1 |
183 | # Now 2 out of 3 shards get an error, so should fail | |
7c673cae | 184 | rados_get $dir $poolname $objname fail || return 1 |
b32b8144 | 185 | rm $dir/ORIGINAL |
7c673cae FG |
186 | } |
187 | ||
188 | # Change the size of speificied shard | |
189 | # | |
190 | function set_size() { | |
191 | local objname=$1 | |
192 | shift | |
193 | local dir=$1 | |
194 | shift | |
195 | local shard_id=$1 | |
196 | shift | |
197 | local bytes=$1 | |
198 | shift | |
199 | local mode=${1} | |
200 | ||
201 | local poolname=pool-jerasure | |
202 | local -a initial_osds=($(get_osds $poolname $objname)) | |
203 | local osd_id=${initial_osds[$shard_id]} | |
204 | ceph osd set noout | |
205 | if [ "$mode" = "add" ]; | |
206 | then | |
207 | objectstore_tool $dir $osd_id $objname get-bytes $dir/CORRUPT || return 1 | |
208 | dd if=/dev/urandom bs=$bytes count=1 >> $dir/CORRUPT | |
209 | elif [ "$bytes" = "0" ]; | |
210 | then | |
211 | touch $dir/CORRUPT | |
212 | else | |
213 | dd if=/dev/urandom bs=$bytes count=1 of=$dir/CORRUPT | |
214 | fi | |
215 | objectstore_tool $dir $osd_id $objname set-bytes $dir/CORRUPT || return 1 | |
216 | rm -f $dir/CORRUPT | |
217 | ceph osd unset noout | |
218 | } | |
219 | ||
220 | function rados_get_data_bad_size() { | |
221 | local dir=$1 | |
222 | shift | |
223 | local shard_id=$1 | |
224 | shift | |
225 | local bytes=$1 | |
226 | shift | |
227 | local mode=${1:-set} | |
228 | ||
229 | local poolname=pool-jerasure | |
230 | local objname=obj-size-$$-$shard_id-$bytes | |
231 | rados_put $dir $poolname $objname || return 1 | |
232 | ||
233 | # Change the size of speificied shard | |
234 | # | |
235 | set_size $objname $dir $shard_id $bytes $mode || return 1 | |
236 | ||
237 | rados_get $dir $poolname $objname || return 1 | |
238 | ||
239 | # Leave objname and modify another shard | |
240 | shard_id=$(expr $shard_id + 1) | |
241 | set_size $objname $dir $shard_id $bytes $mode || return 1 | |
242 | rados_get $dir $poolname $objname fail || return 1 | |
b32b8144 | 243 | rm $dir/ORIGINAL |
7c673cae FG |
244 | } |
245 | ||
246 | # | |
247 | # These two test cases try to validate the following behavior: | |
248 | # For object on EC pool, if there is one shard having read error ( | |
249 | # either primary or replica), client can still read object. | |
250 | # | |
251 | # If 2 shards have read errors the client will get an error. | |
252 | # | |
253 | function TEST_rados_get_subread_eio_shard_0() { | |
254 | local dir=$1 | |
b32b8144 | 255 | setup_osds 4 || return 1 |
7c673cae FG |
256 | |
257 | local poolname=pool-jerasure | |
b32b8144 | 258 | create_erasure_coded_pool $poolname 2 1 || return 1 |
7c673cae FG |
259 | # inject eio on primary OSD (0) and replica OSD (1) |
260 | local shard_id=0 | |
b32b8144 | 261 | rados_put_get_data eio $dir $shard_id || return 1 |
28e407b8 | 262 | delete_erasure_coded_pool $poolname |
7c673cae FG |
263 | } |
264 | ||
265 | function TEST_rados_get_subread_eio_shard_1() { | |
266 | local dir=$1 | |
b32b8144 | 267 | setup_osds 4 || return 1 |
7c673cae FG |
268 | |
269 | local poolname=pool-jerasure | |
b32b8144 | 270 | create_erasure_coded_pool $poolname 2 1 || return 1 |
7c673cae FG |
271 | # inject eio into replicas OSD (1) and OSD (2) |
272 | local shard_id=1 | |
b32b8144 | 273 | rados_put_get_data eio $dir $shard_id || return 1 |
28e407b8 | 274 | delete_erasure_coded_pool $poolname |
7c673cae FG |
275 | } |
276 | ||
b32b8144 FG |
277 | # We don't remove the object from the primary because |
278 | # that just causes it to appear to be missing | |
279 | ||
280 | function TEST_rados_get_subread_missing() { | |
281 | local dir=$1 | |
282 | setup_osds 4 || return 1 | |
283 | ||
284 | local poolname=pool-jerasure | |
285 | create_erasure_coded_pool $poolname 2 1 || return 1 | |
286 | # inject remove into replicas OSD (1) and OSD (2) | |
287 | local shard_id=1 | |
288 | rados_put_get_data remove $dir $shard_id || return 1 | |
28e407b8 | 289 | delete_erasure_coded_pool $poolname |
b32b8144 FG |
290 | } |
291 | ||
292 | # | |
7c673cae FG |
293 | # |
294 | # These two test cases try to validate that following behavior: | |
295 | # For object on EC pool, if there is one shard which an incorrect | |
296 | # size this will cause an internal read error, client can still read object. | |
297 | # | |
298 | # If 2 shards have incorrect size the client will get an error. | |
299 | # | |
300 | function TEST_rados_get_bad_size_shard_0() { | |
301 | local dir=$1 | |
b32b8144 | 302 | setup_osds 4 || return 1 |
7c673cae FG |
303 | |
304 | local poolname=pool-jerasure | |
b32b8144 | 305 | create_erasure_coded_pool $poolname 2 1 || return 1 |
7c673cae FG |
306 | # Set incorrect size into primary OSD (0) and replica OSD (1) |
307 | local shard_id=0 | |
308 | rados_get_data_bad_size $dir $shard_id 10 || return 1 | |
309 | rados_get_data_bad_size $dir $shard_id 0 || return 1 | |
310 | rados_get_data_bad_size $dir $shard_id 256 add || return 1 | |
28e407b8 | 311 | delete_erasure_coded_pool $poolname |
7c673cae FG |
312 | } |
313 | ||
314 | function TEST_rados_get_bad_size_shard_1() { | |
315 | local dir=$1 | |
b32b8144 | 316 | setup_osds 4 || return 1 |
7c673cae FG |
317 | |
318 | local poolname=pool-jerasure | |
b32b8144 | 319 | create_erasure_coded_pool $poolname 2 1 || return 1 |
7c673cae FG |
320 | # Set incorrect size into replicas OSD (1) and OSD (2) |
321 | local shard_id=1 | |
322 | rados_get_data_bad_size $dir $shard_id 10 || return 1 | |
323 | rados_get_data_bad_size $dir $shard_id 0 || return 1 | |
324 | rados_get_data_bad_size $dir $shard_id 256 add || return 1 | |
28e407b8 | 325 | delete_erasure_coded_pool $poolname |
7c673cae FG |
326 | } |
327 | ||
328 | function TEST_rados_get_with_subreadall_eio_shard_0() { | |
329 | local dir=$1 | |
330 | local shard_id=0 | |
331 | ||
b32b8144 | 332 | setup_osds 4 || return 1 |
7c673cae FG |
333 | |
334 | local poolname=pool-jerasure | |
b32b8144 | 335 | create_erasure_coded_pool $poolname 2 1 || return 1 |
7c673cae | 336 | # inject eio on primary OSD (0) |
b32b8144 | 337 | rados_put_get_data eio $dir $shard_id recovery || return 1 |
7c673cae | 338 | |
28e407b8 | 339 | delete_erasure_coded_pool $poolname |
7c673cae FG |
340 | } |
341 | ||
342 | function TEST_rados_get_with_subreadall_eio_shard_1() { | |
343 | local dir=$1 | |
b32b8144 | 344 | local shard_id=1 |
7c673cae | 345 | |
b32b8144 | 346 | setup_osds 4 || return 1 |
7c673cae FG |
347 | |
348 | local poolname=pool-jerasure | |
b32b8144 | 349 | create_erasure_coded_pool $poolname 2 1 || return 1 |
7c673cae | 350 | # inject eio on replica OSD (1) |
b32b8144 FG |
351 | rados_put_get_data eio $dir $shard_id recovery || return 1 |
352 | ||
28e407b8 | 353 | delete_erasure_coded_pool $poolname |
b32b8144 FG |
354 | } |
355 | ||
356 | # Test recovery the first k copies aren't all available | |
28e407b8 | 357 | function TEST_ec_single_recovery_error() { |
b32b8144 FG |
358 | local dir=$1 |
359 | local objname=myobject | |
360 | ||
361 | setup_osds 7 || return 1 | |
362 | ||
363 | local poolname=pool-jerasure | |
364 | create_erasure_coded_pool $poolname 3 2 || return 1 | |
365 | ||
366 | rados_put $dir $poolname $objname || return 1 | |
367 | inject_eio ec data $poolname $objname $dir 0 || return 1 | |
368 | ||
369 | local -a initial_osds=($(get_osds $poolname $objname)) | |
370 | local last_osd=${initial_osds[-1]} | |
371 | # Kill OSD | |
372 | kill_daemons $dir TERM osd.${last_osd} >&2 < /dev/null || return 1 | |
373 | ceph osd down ${last_osd} || return 1 | |
374 | ceph osd out ${last_osd} || return 1 | |
375 | ||
376 | # Cluster should recover this object | |
377 | wait_for_clean || return 1 | |
378 | ||
28e407b8 AA |
379 | rados_get $dir $poolname myobject || return 1 |
380 | ||
381 | delete_erasure_coded_pool $poolname | |
382 | } | |
383 | ||
384 | # Test recovery when repeated reads are needed due to EIO | |
385 | function TEST_ec_recovery_multiple_errors() { | |
386 | local dir=$1 | |
387 | local objname=myobject | |
388 | ||
389 | setup_osds 9 || return 1 | |
390 | ||
391 | local poolname=pool-jerasure | |
392 | create_erasure_coded_pool $poolname 4 4 || return 1 | |
393 | ||
394 | rados_put $dir $poolname $objname || return 1 | |
395 | inject_eio ec data $poolname $objname $dir 0 || return 1 | |
396 | # first read will try shards 0,1,2 when 0 gets EIO, shard 3 gets | |
397 | # tried as well. Make that fail to test multiple-EIO handling. | |
398 | inject_eio ec data $poolname $objname $dir 3 || return 1 | |
399 | inject_eio ec data $poolname $objname $dir 4 || return 1 | |
400 | ||
401 | local -a initial_osds=($(get_osds $poolname $objname)) | |
402 | local last_osd=${initial_osds[-1]} | |
403 | # Kill OSD | |
404 | kill_daemons $dir TERM osd.${last_osd} >&2 < /dev/null || return 1 | |
405 | ceph osd down ${last_osd} || return 1 | |
406 | ceph osd out ${last_osd} || return 1 | |
407 | ||
408 | # Cluster should recover this object | |
409 | wait_for_clean || return 1 | |
410 | ||
411 | rados_get $dir $poolname myobject || return 1 | |
412 | ||
413 | delete_erasure_coded_pool $poolname | |
414 | } | |
415 | ||
416 | # Test recovery when there's only one shard to recover, but multiple | |
417 | # objects recovering in one RecoveryOp | |
418 | function TEST_ec_recovery_multiple_objects() { | |
419 | local dir=$1 | |
420 | local objname=myobject | |
421 | ||
422 | ORIG_ARGS=$CEPH_ARGS | |
423 | CEPH_ARGS+=' --osd-recovery-max-single-start 3 --osd-recovery-max-active 3 ' | |
424 | setup_osds 7 || return 1 | |
425 | CEPH_ARGS=$ORIG_ARGS | |
426 | ||
427 | local poolname=pool-jerasure | |
428 | create_erasure_coded_pool $poolname 3 2 || return 1 | |
429 | ||
430 | rados_put $dir $poolname test1 | |
431 | rados_put $dir $poolname test2 | |
432 | rados_put $dir $poolname test3 | |
433 | ||
434 | ceph osd out 0 || return 1 | |
435 | ||
436 | # Cluster should recover these objects all at once | |
437 | wait_for_clean || return 1 | |
438 | ||
439 | rados_get $dir $poolname test1 | |
440 | rados_get $dir $poolname test2 | |
441 | rados_get $dir $poolname test3 | |
442 | ||
443 | delete_erasure_coded_pool $poolname | |
444 | } | |
445 | ||
446 | # test multi-object recovery when the one missing shard gets EIO | |
447 | function TEST_ec_recovery_multiple_objects_eio() { | |
448 | local dir=$1 | |
449 | local objname=myobject | |
450 | ||
451 | ORIG_ARGS=$CEPH_ARGS | |
452 | CEPH_ARGS+=' --osd-recovery-max-single-start 3 --osd-recovery-max-active 3 ' | |
453 | setup_osds 7 || return 1 | |
454 | CEPH_ARGS=$ORIG_ARGS | |
455 | ||
456 | local poolname=pool-jerasure | |
457 | create_erasure_coded_pool $poolname 3 2 || return 1 | |
458 | ||
459 | rados_put $dir $poolname test1 | |
460 | rados_put $dir $poolname test2 | |
461 | rados_put $dir $poolname test3 | |
462 | ||
463 | # can't read from this shard anymore | |
464 | inject_eio ec data $poolname $objname $dir 0 || return 1 | |
465 | ceph osd out 0 || return 1 | |
466 | ||
467 | # Cluster should recover these objects all at once | |
468 | wait_for_clean || return 1 | |
469 | ||
470 | rados_get $dir $poolname test1 | |
471 | rados_get $dir $poolname test2 | |
472 | rados_get $dir $poolname test3 | |
473 | ||
474 | delete_erasure_coded_pool $poolname | |
b32b8144 FG |
475 | } |
476 | ||
477 | # Test backfill with unfound object | |
478 | function TEST_ec_backfill_unfound() { | |
479 | local dir=$1 | |
480 | local objname=myobject | |
481 | local lastobj=300 | |
482 | # Must be between 1 and $lastobj | |
483 | local testobj=obj250 | |
484 | ||
28e407b8 | 485 | ORIG_ARGS=$CEPH_ARGS |
b32b8144 FG |
486 | CEPH_ARGS+=' --osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10' |
487 | setup_osds 5 || return 1 | |
28e407b8 | 488 | CEPH_ARGS=$ORIG_ARGS |
b32b8144 FG |
489 | |
490 | local poolname=pool-jerasure | |
491 | create_erasure_coded_pool $poolname 3 2 || return 1 | |
492 | ||
493 | ceph pg dump pgs | |
494 | ||
495 | rados_put $dir $poolname $objname || return 1 | |
496 | ||
497 | local -a initial_osds=($(get_osds $poolname $objname)) | |
498 | local last_osd=${initial_osds[-1]} | |
499 | kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1 | |
500 | ceph osd down ${last_osd} || return 1 | |
501 | ceph osd out ${last_osd} || return 1 | |
502 | ||
503 | ceph pg dump pgs | |
504 | ||
505 | dd if=/dev/urandom of=${dir}/ORIGINAL bs=1024 count=4 | |
506 | for i in $(seq 1 $lastobj) | |
507 | do | |
508 | rados --pool $poolname put obj${i} $dir/ORIGINAL || return 1 | |
509 | done | |
510 | ||
511 | inject_eio ec data $poolname $testobj $dir 0 || return 1 | |
512 | inject_eio ec data $poolname $testobj $dir 1 || return 1 | |
513 | ||
514 | run_osd $dir ${last_osd} || return 1 | |
515 | ceph osd in ${last_osd} || return 1 | |
516 | ||
517 | sleep 15 | |
518 | ||
519 | for tmp in $(seq 1 100); do | |
520 | state=$(get_state 2.0) | |
521 | echo $state | grep backfill_unfound | |
522 | if [ "$?" = "0" ]; then | |
523 | break | |
524 | fi | |
525 | echo $state | |
526 | sleep 1 | |
527 | done | |
528 | ||
529 | ceph pg dump pgs | |
530 | ceph pg 2.0 list_missing | grep -q $testobj || return 1 | |
531 | ||
532 | # Command should hang because object is unfound | |
533 | timeout 5 rados -p $poolname get $testobj $dir/CHECK | |
534 | test $? = "124" || return 1 | |
535 | ||
536 | ceph pg 2.0 mark_unfound_lost delete | |
537 | ||
538 | wait_for_clean || return 1 | |
539 | ||
540 | for i in $(seq 1 $lastobj) | |
541 | do | |
542 | if [ obj${i} = "$testobj" ]; then | |
543 | # Doesn't exist anymore | |
544 | ! rados -p $poolname get $testobj $dir/CHECK || return 1 | |
545 | else | |
546 | rados --pool $poolname get obj${i} $dir/CHECK || return 1 | |
547 | diff -q $dir/ORIGINAL $dir/CHECK || return 1 | |
548 | fi | |
549 | done | |
550 | ||
551 | rm -f ${dir}/ORIGINAL ${dir}/CHECK | |
552 | ||
28e407b8 | 553 | delete_erasure_coded_pool $poolname |
b32b8144 FG |
554 | } |
555 | ||
556 | # Test recovery with unfound object | |
557 | function TEST_ec_recovery_unfound() { | |
558 | local dir=$1 | |
559 | local objname=myobject | |
560 | local lastobj=100 | |
561 | # Must be between 1 and $lastobj | |
562 | local testobj=obj75 | |
563 | ||
28e407b8 AA |
564 | ORIG_ARGS=$CEPH_ARGS |
565 | CEPH_ARGS+=' --osd-recovery-max-single-start 3 --osd-recovery-max-active 3 ' | |
566 | CEPH_ARGS+=' --osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10' | |
b32b8144 | 567 | setup_osds 5 || return 1 |
28e407b8 | 568 | CEPH_ARGS=$ORIG_ARGS |
b32b8144 FG |
569 | |
570 | local poolname=pool-jerasure | |
571 | create_erasure_coded_pool $poolname 3 2 || return 1 | |
572 | ||
573 | ceph pg dump pgs | |
574 | ||
575 | rados_put $dir $poolname $objname || return 1 | |
576 | ||
577 | local -a initial_osds=($(get_osds $poolname $objname)) | |
578 | local last_osd=${initial_osds[-1]} | |
579 | kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1 | |
580 | ceph osd down ${last_osd} || return 1 | |
581 | ceph osd out ${last_osd} || return 1 | |
582 | ||
583 | ceph pg dump pgs | |
584 | ||
585 | dd if=/dev/urandom of=${dir}/ORIGINAL bs=1024 count=4 | |
586 | for i in $(seq 1 $lastobj) | |
587 | do | |
588 | rados --pool $poolname put obj${i} $dir/ORIGINAL || return 1 | |
589 | done | |
590 | ||
591 | inject_eio ec data $poolname $testobj $dir 0 || return 1 | |
592 | inject_eio ec data $poolname $testobj $dir 1 || return 1 | |
593 | ||
594 | run_osd $dir ${last_osd} || return 1 | |
595 | ceph osd in ${last_osd} || return 1 | |
596 | ||
597 | sleep 15 | |
598 | ||
599 | for tmp in $(seq 1 100); do | |
600 | state=$(get_state 2.0) | |
601 | echo $state | grep recovery_unfound | |
602 | if [ "$?" = "0" ]; then | |
603 | break | |
604 | fi | |
605 | echo "$state " | |
606 | sleep 1 | |
607 | done | |
608 | ||
609 | ceph pg dump pgs | |
610 | ceph pg 2.0 list_missing | grep -q $testobj || return 1 | |
611 | ||
612 | # Command should hang because object is unfound | |
613 | timeout 5 rados -p $poolname get $testobj $dir/CHECK | |
614 | test $? = "124" || return 1 | |
615 | ||
616 | ceph pg 2.0 mark_unfound_lost delete | |
617 | ||
618 | wait_for_clean || return 1 | |
619 | ||
620 | for i in $(seq 1 $lastobj) | |
621 | do | |
622 | if [ obj${i} = "$testobj" ]; then | |
623 | # Doesn't exist anymore | |
624 | ! rados -p $poolname get $testobj $dir/CHECK || return 1 | |
625 | else | |
626 | rados --pool $poolname get obj${i} $dir/CHECK || return 1 | |
627 | diff -q $dir/ORIGINAL $dir/CHECK || return 1 | |
628 | fi | |
629 | done | |
630 | ||
631 | rm -f ${dir}/ORIGINAL ${dir}/CHECK | |
7c673cae | 632 | |
28e407b8 | 633 | delete_erasure_coded_pool $poolname |
7c673cae FG |
634 | } |
635 | ||
636 | main test-erasure-eio "$@" | |
637 | ||
638 | # Local Variables: | |
639 | # compile-command: "cd ../.. ; make -j4 && test/erasure-code/test-erasure-eio.sh" | |
640 | # End: |