3 # Copyright (C) 2015 Red Hat <contact@redhat.com>
6 # Author: Kefu Chai <kchai@redhat.com>
8 # This program is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU Library Public License as published by
10 # the Free Software Foundation; either version 2, or (at your option)
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU Library Public License for more details.
19 source $CEPH_ROOT/qa
/standalone
/ceph-helpers.sh
25 export CEPH_MON
="127.0.0.1:7112" # git grep '\<7112\>' : there must be only one
27 CEPH_ARGS
+="--fsid=$(uuidgen) --auth-supported=none "
28 CEPH_ARGS
+="--mon-host=$CEPH_MON "
30 local funcs
=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
31 for func
in $funcs ; do
32 setup
$dir ||
return 1
33 run_mon
$dir a ||
return 1
34 run_mgr
$dir x ||
return 1
35 create_pool rbd
4 ||
return 1
37 # check that erasure code plugins are preloaded
38 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path mon.a
) log flush ||
return 1
39 grep 'load: jerasure.*lrc' $dir/mon.a.log ||
return 1
40 $func $dir ||
return 1
41 teardown
$dir ||
return 1
45 function setup_osds
() {
49 for id
in $
(seq 0 $
(expr $count - 1)) ; do
50 run_osd
$dir $id ||
return 1
53 # check that erasure code plugins are preloaded
54 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd
.0) log flush ||
return 1
55 grep 'load: jerasure.*lrc' $dir/osd
.0.log ||
return 1
58 function get_state
() {
61 ceph
--format json pg dump pgs
2>/dev
/null | \
62 jq
-r ".pg_stats | .[] | select(.pgid==\"$pgid\") | .$sname"
65 function create_erasure_coded_pool
() {
73 ceph osd erasure-code-profile
set myprofile \
76 crush-failure-domain
=osd ||
return 1
77 create_pool
$poolname 1 1 erasure myprofile \
79 wait_for_clean ||
return 1
82 function delete_erasure_coded_pool
() {
84 ceph osd pool delete
$poolname $poolname --yes-i-really-really-mean-it
85 ceph osd erasure-code-profile
rm myprofile
88 function rados_put
() {
91 local objname
=${3:-SOMETHING}
93 for marker
in AAA BBB CCCC DDDD
; do
94 printf "%*s" 1024 $marker
97 # get and put an object, compare they are equal
99 rados
--pool $poolname put
$objname $dir/ORIGINAL ||
return 1
102 function rados_get
() {
105 local objname
=${3:-SOMETHING}
106 local expect
=${4:-ok}
109 # Expect a failure to get object
111 if [ $expect = "fail" ];
113 ! rados
--pool $poolname get
$objname $dir/COPY
117 # get an object, compare with $dir/ORIGINAL
119 rados
--pool $poolname get
$objname $dir/COPY ||
return 1
120 diff $dir/ORIGINAL
$dir/COPY ||
return 1
125 function inject_remove
() {
139 local -a initial_osds
=($
(get_osds
$poolname $objname))
140 local osd_id
=${initial_osds[$shard_id]}
141 objectstore_tool
$dir $osd_id $objname remove ||
return 1
144 # Test with an inject error
145 function rados_put_get_data
() {
154 # inject eio to speificied shard
156 local poolname
=pool-jerasure
157 local objname
=obj-
$inject-$$
-$shard_id
158 rados_put
$dir $poolname $objname ||
return 1
159 inject_
$inject ec data
$poolname $objname $dir $shard_id ||
return 1
160 rados_get
$dir $poolname $objname ||
return 1
162 if [ "$arg" = "recovery" ];
165 # take out the last OSD used to store the object,
166 # bring it back, and check for clean PGs which means
167 # recovery didn't crash the primary.
169 local -a initial_osds
=($
(get_osds
$poolname $objname))
170 local last_osd
=${initial_osds[-1]}
172 kill_daemons
$dir TERM osd.
${last_osd} >&2 < /dev
/null ||
return 1
173 ceph osd out
${last_osd} ||
return 1
174 ! get_osds
$poolname $objname |
grep '\<'${last_osd}'\>' ||
return 1
175 ceph osd
in ${last_osd} ||
return 1
176 run_osd
$dir ${last_osd} ||
return 1
177 wait_for_clean ||
return 1
180 shard_id
=$
(expr $shard_id + 1)
181 inject_
$inject ec data
$poolname $objname $dir $shard_id ||
return 1
182 # Now 2 out of 3 shards get an error, so should fail
183 rados_get
$dir $poolname $objname fail ||
return 1
187 # Change the size of speificied shard
189 function set_size
() {
200 local poolname
=pool-jerasure
201 local -a initial_osds
=($
(get_osds
$poolname $objname))
202 local osd_id
=${initial_osds[$shard_id]}
204 if [ "$mode" = "add" ];
206 objectstore_tool
$dir $osd_id $objname get-bytes
$dir/CORRUPT ||
return 1
207 dd if=/dev
/urandom bs
=$bytes count
=1 >> $dir/CORRUPT
208 elif [ "$bytes" = "0" ];
212 dd if=/dev
/urandom bs
=$bytes count
=1 of
=$dir/CORRUPT
214 objectstore_tool
$dir $osd_id $objname set-bytes
$dir/CORRUPT ||
return 1
219 function rados_get_data_bad_size
() {
228 local poolname
=pool-jerasure
229 local objname
=obj-size-$$
-$shard_id-$bytes
230 rados_put
$dir $poolname $objname ||
return 1
232 # Change the size of speificied shard
234 set_size
$objname $dir $shard_id $bytes $mode ||
return 1
236 rados_get
$dir $poolname $objname ||
return 1
238 # Leave objname and modify another shard
239 shard_id
=$
(expr $shard_id + 1)
240 set_size
$objname $dir $shard_id $bytes $mode ||
return 1
241 rados_get
$dir $poolname $objname fail ||
return 1
246 # These two test cases try to validate the following behavior:
247 # For object on EC pool, if there is one shard having read error (
248 # either primary or replica), client can still read object.
250 # If 2 shards have read errors the client will get an error.
252 function TEST_rados_get_subread_eio_shard_0
() {
254 setup_osds
4 ||
return 1
256 local poolname
=pool-jerasure
257 create_erasure_coded_pool
$poolname 2 1 ||
return 1
258 # inject eio on primary OSD (0) and replica OSD (1)
260 rados_put_get_data eio
$dir $shard_id ||
return 1
261 delete_erasure_coded_pool
$poolname
264 function TEST_rados_get_subread_eio_shard_1
() {
266 setup_osds
4 ||
return 1
268 local poolname
=pool-jerasure
269 create_erasure_coded_pool
$poolname 2 1 ||
return 1
270 # inject eio into replicas OSD (1) and OSD (2)
272 rados_put_get_data eio
$dir $shard_id ||
return 1
273 delete_erasure_coded_pool
$poolname
276 # We don't remove the object from the primary because
277 # that just causes it to appear to be missing
279 function TEST_rados_get_subread_missing
() {
281 setup_osds
4 ||
return 1
283 local poolname
=pool-jerasure
284 create_erasure_coded_pool
$poolname 2 1 ||
return 1
285 # inject remove into replicas OSD (1) and OSD (2)
287 rados_put_get_data remove
$dir $shard_id ||
return 1
288 delete_erasure_coded_pool
$poolname
293 # These two test cases try to validate that following behavior:
294 # For object on EC pool, if there is one shard which an incorrect
295 # size this will cause an internal read error, client can still read object.
297 # If 2 shards have incorrect size the client will get an error.
299 function TEST_rados_get_bad_size_shard_0
() {
301 setup_osds
4 ||
return 1
303 local poolname
=pool-jerasure
304 create_erasure_coded_pool
$poolname 2 1 ||
return 1
305 # Set incorrect size into primary OSD (0) and replica OSD (1)
307 rados_get_data_bad_size
$dir $shard_id 10 ||
return 1
308 rados_get_data_bad_size
$dir $shard_id 0 ||
return 1
309 rados_get_data_bad_size
$dir $shard_id 256 add ||
return 1
310 delete_erasure_coded_pool
$poolname
313 function TEST_rados_get_bad_size_shard_1
() {
315 setup_osds
4 ||
return 1
317 local poolname
=pool-jerasure
318 create_erasure_coded_pool
$poolname 2 1 ||
return 1
319 # Set incorrect size into replicas OSD (1) and OSD (2)
321 rados_get_data_bad_size
$dir $shard_id 10 ||
return 1
322 rados_get_data_bad_size
$dir $shard_id 0 ||
return 1
323 rados_get_data_bad_size
$dir $shard_id 256 add ||
return 1
324 delete_erasure_coded_pool
$poolname
327 function TEST_rados_get_with_subreadall_eio_shard_0
() {
331 setup_osds
4 ||
return 1
333 local poolname
=pool-jerasure
334 create_erasure_coded_pool
$poolname 2 1 ||
return 1
335 # inject eio on primary OSD (0)
336 rados_put_get_data eio
$dir $shard_id recovery ||
return 1
338 delete_erasure_coded_pool
$poolname
341 function TEST_rados_get_with_subreadall_eio_shard_1
() {
345 setup_osds
4 ||
return 1
347 local poolname
=pool-jerasure
348 create_erasure_coded_pool
$poolname 2 1 ||
return 1
349 # inject eio on replica OSD (1)
350 rados_put_get_data eio
$dir $shard_id recovery ||
return 1
352 delete_erasure_coded_pool
$poolname
355 # Test recovery the object attr read error
356 function TEST_ec_object_attr_read_error
() {
358 local objname
=myobject
360 setup_osds
7 ||
return 1
362 local poolname
=pool-jerasure
363 create_erasure_coded_pool
$poolname 3 2 ||
return 1
365 local primary_osd
=$
(get_primary
$poolname $objname)
367 kill_daemons
$dir TERM osd.
${primary_osd} >&2 < /dev
/null ||
return 1
370 rados_put
$dir $poolname $objname ||
return 1
372 # Inject eio, shard 1 is the one read attr
373 inject_eio ec mdata
$poolname $objname $dir 1 ||
return 1
376 run_osd
$dir ${primary_osd} ||
return 1
378 # Cluster should recover this object
379 wait_for_clean ||
return 1
381 rados_get
$dir $poolname myobject ||
return 1
383 delete_erasure_coded_pool
$poolname
386 # Test recovery the first k copies aren't all available
387 function TEST_ec_single_recovery_error
() {
389 local objname
=myobject
391 setup_osds
7 ||
return 1
393 local poolname
=pool-jerasure
394 create_erasure_coded_pool
$poolname 3 2 ||
return 1
396 rados_put
$dir $poolname $objname ||
return 1
397 inject_eio ec data
$poolname $objname $dir 0 ||
return 1
399 local -a initial_osds
=($
(get_osds
$poolname $objname))
400 local last_osd
=${initial_osds[-1]}
402 kill_daemons
$dir TERM osd.
${last_osd} >&2 < /dev
/null ||
return 1
403 ceph osd down
${last_osd} ||
return 1
404 ceph osd out
${last_osd} ||
return 1
406 # Cluster should recover this object
407 wait_for_clean ||
return 1
409 rados_get
$dir $poolname myobject ||
return 1
411 delete_erasure_coded_pool
$poolname
414 # Test recovery when repeated reads are needed due to EIO
415 function TEST_ec_recovery_multiple_errors
() {
417 local objname
=myobject
419 setup_osds
9 ||
return 1
421 local poolname
=pool-jerasure
422 create_erasure_coded_pool
$poolname 4 4 ||
return 1
424 rados_put
$dir $poolname $objname ||
return 1
425 inject_eio ec data
$poolname $objname $dir 0 ||
return 1
426 # first read will try shards 0,1,2 when 0 gets EIO, shard 3 gets
427 # tried as well. Make that fail to test multiple-EIO handling.
428 inject_eio ec data
$poolname $objname $dir 3 ||
return 1
429 inject_eio ec data
$poolname $objname $dir 4 ||
return 1
431 local -a initial_osds
=($
(get_osds
$poolname $objname))
432 local last_osd
=${initial_osds[-1]}
434 kill_daemons
$dir TERM osd.
${last_osd} >&2 < /dev
/null ||
return 1
435 ceph osd down
${last_osd} ||
return 1
436 ceph osd out
${last_osd} ||
return 1
438 # Cluster should recover this object
439 wait_for_clean ||
return 1
441 rados_get
$dir $poolname myobject ||
return 1
443 delete_erasure_coded_pool
$poolname
446 # Test recovery when there's only one shard to recover, but multiple
447 # objects recovering in one RecoveryOp
448 function TEST_ec_recovery_multiple_objects
() {
450 local objname
=myobject
453 CEPH_ARGS
+=' --osd-recovery-max-single-start 3 --osd-recovery-max-active 3 '
454 setup_osds
7 ||
return 1
457 local poolname
=pool-jerasure
458 create_erasure_coded_pool
$poolname 3 2 ||
return 1
460 rados_put
$dir $poolname test1
461 rados_put
$dir $poolname test2
462 rados_put
$dir $poolname test3
464 ceph osd out
0 ||
return 1
466 # Cluster should recover these objects all at once
467 wait_for_clean ||
return 1
469 rados_get
$dir $poolname test1
470 rados_get
$dir $poolname test2
471 rados_get
$dir $poolname test3
473 delete_erasure_coded_pool
$poolname
476 # test multi-object recovery when the one missing shard gets EIO
477 function TEST_ec_recovery_multiple_objects_eio
() {
479 local objname
=myobject
482 CEPH_ARGS
+=' --osd-recovery-max-single-start 3 --osd-recovery-max-active 3 '
483 setup_osds
7 ||
return 1
486 local poolname
=pool-jerasure
487 create_erasure_coded_pool
$poolname 3 2 ||
return 1
489 rados_put
$dir $poolname test1
490 rados_put
$dir $poolname test2
491 rados_put
$dir $poolname test3
493 # can't read from this shard anymore
494 inject_eio ec data
$poolname $objname $dir 0 ||
return 1
495 ceph osd out
0 ||
return 1
497 # Cluster should recover these objects all at once
498 wait_for_clean ||
return 1
500 rados_get
$dir $poolname test1
501 rados_get
$dir $poolname test2
502 rados_get
$dir $poolname test3
504 delete_erasure_coded_pool
$poolname
507 # Test backfill with unfound object
508 function TEST_ec_backfill_unfound
() {
510 local objname
=myobject
512 # Must be between 1 and $lastobj
516 CEPH_ARGS
+=' --osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10'
517 setup_osds
5 ||
return 1
520 local poolname
=pool-jerasure
521 create_erasure_coded_pool
$poolname 3 2 ||
return 1
525 rados_put
$dir $poolname $objname ||
return 1
527 local -a initial_osds
=($
(get_osds
$poolname $objname))
528 local last_osd
=${initial_osds[-1]}
529 kill_daemons
$dir TERM osd.
${last_osd} 2>&2 < /dev
/null ||
return 1
530 ceph osd down
${last_osd} ||
return 1
531 ceph osd out
${last_osd} ||
return 1
535 dd if=/dev
/urandom of
=${dir}/ORIGINAL bs
=1024 count
=4
536 for i
in $
(seq 1 $lastobj)
538 rados
--pool $poolname put obj
${i} $dir/ORIGINAL ||
return 1
541 inject_eio ec data
$poolname $testobj $dir 0 ||
return 1
542 inject_eio ec data
$poolname $testobj $dir 1 ||
return 1
544 run_osd
$dir ${last_osd} ||
return 1
545 ceph osd
in ${last_osd} ||
return 1
549 for tmp
in $
(seq 1 100); do
550 state
=$
(get_state
2.0)
551 echo $state |
grep backfill_unfound
552 if [ "$?" = "0" ]; then
560 ceph pg
2.0 list_unfound |
grep -q $testobj ||
return 1
562 # Command should hang because object is unfound
563 timeout
5 rados
-p $poolname get
$testobj $dir/CHECK
564 test $?
= "124" ||
return 1
566 ceph pg
2.0 mark_unfound_lost delete
568 wait_for_clean ||
return 1
570 for i
in $
(seq 1 $lastobj)
572 if [ obj
${i} = "$testobj" ]; then
573 # Doesn't exist anymore
574 ! rados
-p $poolname get
$testobj $dir/CHECK ||
return 1
576 rados
--pool $poolname get obj
${i} $dir/CHECK ||
return 1
577 diff -q $dir/ORIGINAL
$dir/CHECK ||
return 1
581 rm -f ${dir}/ORIGINAL
${dir}/CHECK
583 delete_erasure_coded_pool
$poolname
586 # Test recovery with unfound object
587 function TEST_ec_recovery_unfound
() {
589 local objname
=myobject
591 # Must be between 1 and $lastobj
595 CEPH_ARGS
+=' --osd-recovery-max-single-start 3 --osd-recovery-max-active 3 '
596 CEPH_ARGS
+=' --osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10'
597 setup_osds
5 ||
return 1
600 local poolname
=pool-jerasure
601 create_erasure_coded_pool
$poolname 3 2 ||
return 1
605 rados_put
$dir $poolname $objname ||
return 1
607 local -a initial_osds
=($
(get_osds
$poolname $objname))
608 local last_osd
=${initial_osds[-1]}
609 kill_daemons
$dir TERM osd.
${last_osd} 2>&2 < /dev
/null ||
return 1
610 ceph osd down
${last_osd} ||
return 1
611 ceph osd out
${last_osd} ||
return 1
615 dd if=/dev
/urandom of
=${dir}/ORIGINAL bs
=1024 count
=4
616 for i
in $
(seq 1 $lastobj)
618 rados
--pool $poolname put obj
${i} $dir/ORIGINAL ||
return 1
621 inject_eio ec data
$poolname $testobj $dir 0 ||
return 1
622 inject_eio ec data
$poolname $testobj $dir 1 ||
return 1
624 run_osd
$dir ${last_osd} ||
return 1
625 ceph osd
in ${last_osd} ||
return 1
629 for tmp
in $
(seq 1 100); do
630 state
=$
(get_state
2.0)
631 echo $state |
grep recovery_unfound
632 if [ "$?" = "0" ]; then
640 ceph pg
2.0 list_unfound |
grep -q $testobj ||
return 1
642 # Command should hang because object is unfound
643 timeout
5 rados
-p $poolname get
$testobj $dir/CHECK
644 test $?
= "124" ||
return 1
646 ceph pg
2.0 mark_unfound_lost delete
648 wait_for_clean ||
return 1
650 for i
in $
(seq 1 $lastobj)
652 if [ obj
${i} = "$testobj" ]; then
653 # Doesn't exist anymore
654 ! rados
-p $poolname get
$testobj $dir/CHECK ||
return 1
656 rados
--pool $poolname get obj
${i} $dir/CHECK ||
return 1
657 diff -q $dir/ORIGINAL
$dir/CHECK ||
return 1
661 rm -f ${dir}/ORIGINAL
${dir}/CHECK
663 delete_erasure_coded_pool
$poolname
666 main test-erasure-eio
"$@"
669 # compile-command: "cd ../.. ; make -j4 && test/erasure-code/test-erasure-eio.sh"