}
function setup_osds() {
- for id in $(seq 0 3) ; do
+ local count=$1
+ shift
+
+ for id in $(seq 0 $(expr $count - 1)) ; do
run_osd $dir $id || return 1
done
wait_for_clean || return 1
grep 'load: jerasure.*lrc' $dir/osd.0.log || return 1
}
+function get_state() {
+ local pgid=$1
+ local sname=state
+ ceph --format json pg dump pgs 2>/dev/null | \
+ jq -r ".[] | select(.pgid==\"$pgid\") | .$sname"
+}
+
function create_erasure_coded_pool() {
local poolname=$1
+ shift
+ local k=$1
+ shift
+ local m=$1
+ shift
ceph osd erasure-code-profile set myprofile \
plugin=jerasure \
- k=2 m=1 \
+ k=$k m=$m \
crush-failure-domain=osd || return 1
create_pool $poolname 1 1 erasure myprofile \
|| return 1
wait_for_clean || return 1
}
-function delete_pool() {
+function delete_erasure_coded_pool() {
local poolname=$1
-
ceph osd pool delete $poolname $poolname --yes-i-really-really-mean-it
ceph osd erasure-code-profile rm myprofile
}
rm $dir/COPY
}
-function rados_put_get() {
+
+function inject_remove() {
+ local pooltype=$1
+ shift
+ local which=$1
+ shift
+ local poolname=$1
+ shift
+ local objname=$1
+ shift
local dir=$1
- local poolname=$2
- local objname=${3:-SOMETHING}
- local recovery=$4
+ shift
+ local shard_id=$1
+ shift
+ local -a initial_osds=($(get_osds $poolname $objname))
+ local osd_id=${initial_osds[$shard_id]}
+ objectstore_tool $dir $osd_id $objname remove || return 1
+}
+
+# Test with an inject error
+function rados_put_get_data() {
+ local inject=$1
+ shift
+ local dir=$1
+ shift
+ local shard_id=$1
+ shift
+ local arg=$1
+
+ # inject eio to speificied shard
#
- # get and put an object, compare they are equal
- #
+ local poolname=pool-jerasure
+ local objname=obj-$inject-$$-$shard_id
rados_put $dir $poolname $objname || return 1
- # We can read even though caller injected read error on one of the shards
+ inject_$inject ec data $poolname $objname $dir $shard_id || return 1
rados_get $dir $poolname $objname || return 1
- if [ -n "$recovery" ];
+ if [ "$arg" = "recovery" ];
then
#
# take out the last OSD used to store the object,
# recovery didn't crash the primary.
#
local -a initial_osds=($(get_osds $poolname $objname))
- local last=$((${#initial_osds[@]} - 1))
+ local last_osd=${initial_osds[-1]}
# Kill OSD
- kill_daemons $dir TERM osd.${initial_osds[$last]} >&2 < /dev/null || return 1
- ceph osd out ${initial_osds[$last]} || return 1
- ! get_osds $poolname $objname | grep '\<'${initial_osds[$last]}'\>' || return 1
- ceph osd in ${initial_osds[$last]} || return 1
- run_osd $dir ${initial_osds[$last]} || return 1
+ kill_daemons $dir TERM osd.${last_osd} >&2 < /dev/null || return 1
+ ceph osd out ${last_osd} || return 1
+ ! get_osds $poolname $objname | grep '\<'${last_osd}'\>' || return 1
+ ceph osd in ${last_osd} || return 1
+ run_osd $dir ${last_osd} || return 1
wait_for_clean || return 1
fi
- rm $dir/ORIGINAL
-}
-
-function rados_get_data_eio() {
- local dir=$1
- shift
- local shard_id=$1
- shift
- local recovery=$1
- shift
-
- # inject eio to speificied shard
- #
- local poolname=pool-jerasure
- local objname=obj-eio-$$-$shard_id
- inject_eio ec data $poolname $objname $dir $shard_id || return 1
- rados_put_get $dir $poolname $objname $recovery || return 1
-
shard_id=$(expr $shard_id + 1)
- inject_eio ec data $poolname $objname $dir $shard_id || return 1
- # Now 2 out of 3 shards get EIO, so should fail
+ inject_$inject ec data $poolname $objname $dir $shard_id || return 1
+ # Now 2 out of 3 shards get an error, so should fail
rados_get $dir $poolname $objname fail || return 1
+ rm $dir/ORIGINAL
}
# Change the size of speificied shard
shard_id=$(expr $shard_id + 1)
set_size $objname $dir $shard_id $bytes $mode || return 1
rados_get $dir $poolname $objname fail || return 1
+ rm $dir/ORIGINAL
}
#
#
function TEST_rados_get_subread_eio_shard_0() {
local dir=$1
- setup_osds || return 1
+ setup_osds 4 || return 1
local poolname=pool-jerasure
- create_erasure_coded_pool $poolname || return 1
+ create_erasure_coded_pool $poolname 2 1 || return 1
# inject eio on primary OSD (0) and replica OSD (1)
local shard_id=0
- rados_get_data_eio $dir $shard_id || return 1
- delete_pool $poolname
+ rados_put_get_data eio $dir $shard_id || return 1
+ delete_erasure_coded_pool $poolname
}
function TEST_rados_get_subread_eio_shard_1() {
local dir=$1
- setup_osds || return 1
+ setup_osds 4 || return 1
local poolname=pool-jerasure
- create_erasure_coded_pool $poolname || return 1
+ create_erasure_coded_pool $poolname 2 1 || return 1
# inject eio into replicas OSD (1) and OSD (2)
local shard_id=1
- rados_get_data_eio $dir $shard_id || return 1
- delete_pool $poolname
+ rados_put_get_data eio $dir $shard_id || return 1
+ delete_erasure_coded_pool $poolname
+}
+
+# We don't remove the object from the primary because
+# that just causes it to appear to be missing
+
+function TEST_rados_get_subread_missing() {
+ local dir=$1
+ setup_osds 4 || return 1
+
+ local poolname=pool-jerasure
+ create_erasure_coded_pool $poolname 2 1 || return 1
+ # inject remove into replicas OSD (1) and OSD (2)
+ local shard_id=1
+ rados_put_get_data remove $dir $shard_id || return 1
+ delete_erasure_coded_pool $poolname
}
+#
#
# These two test cases try to validate that following behavior:
# For object on EC pool, if there is one shard which an incorrect
#
function TEST_rados_get_bad_size_shard_0() {
local dir=$1
- setup_osds || return 1
+ setup_osds 4 || return 1
local poolname=pool-jerasure
- create_erasure_coded_pool $poolname || return 1
+ create_erasure_coded_pool $poolname 2 1 || return 1
# Set incorrect size into primary OSD (0) and replica OSD (1)
local shard_id=0
rados_get_data_bad_size $dir $shard_id 10 || return 1
rados_get_data_bad_size $dir $shard_id 0 || return 1
rados_get_data_bad_size $dir $shard_id 256 add || return 1
- delete_pool $poolname
+ delete_erasure_coded_pool $poolname
}
function TEST_rados_get_bad_size_shard_1() {
local dir=$1
- setup_osds || return 1
+ setup_osds 4 || return 1
local poolname=pool-jerasure
- create_erasure_coded_pool $poolname || return 1
+ create_erasure_coded_pool $poolname 2 1 || return 1
# Set incorrect size into replicas OSD (1) and OSD (2)
local shard_id=1
rados_get_data_bad_size $dir $shard_id 10 || return 1
rados_get_data_bad_size $dir $shard_id 0 || return 1
rados_get_data_bad_size $dir $shard_id 256 add || return 1
- delete_pool $poolname
+ delete_erasure_coded_pool $poolname
}
function TEST_rados_get_with_subreadall_eio_shard_0() {
local dir=$1
local shard_id=0
- setup_osds || return 1
+ setup_osds 4 || return 1
local poolname=pool-jerasure
- create_erasure_coded_pool $poolname || return 1
+ create_erasure_coded_pool $poolname 2 1 || return 1
# inject eio on primary OSD (0)
- local shard_id=0
- rados_get_data_eio $dir $shard_id recovery || return 1
+ rados_put_get_data eio $dir $shard_id recovery || return 1
- delete_pool $poolname
+ delete_erasure_coded_pool $poolname
}
function TEST_rados_get_with_subreadall_eio_shard_1() {
local dir=$1
- local shard_id=0
+ local shard_id=1
- setup_osds || return 1
+ setup_osds 4 || return 1
local poolname=pool-jerasure
- create_erasure_coded_pool $poolname || return 1
+ create_erasure_coded_pool $poolname 2 1 || return 1
# inject eio on replica OSD (1)
- local shard_id=1
- rados_get_data_eio $dir $shard_id recovery || return 1
+ rados_put_get_data eio $dir $shard_id recovery || return 1
+
+ delete_erasure_coded_pool $poolname
+}
+
+# Test recovery the object attr read error
+function TEST_ec_object_attr_read_error() {
+ local dir=$1
+ local objname=myobject
+
+ setup_osds 7 || return 1
+
+ local poolname=pool-jerasure
+ create_erasure_coded_pool $poolname 3 2 || return 1
+
+ local primary_osd=$(get_primary $poolname $objname)
+ # Kill primary OSD
+ kill_daemons $dir TERM osd.${primary_osd} >&2 < /dev/null || return 1
+
+ # Write data
+ rados_put $dir $poolname $objname || return 1
+
+ # Inject eio, shard 1 is the one read attr
+ inject_eio ec mdata $poolname $objname $dir 1 || return 1
+
+ # Restart OSD
+ run_osd $dir ${primary_osd} || return 1
+
+ # Cluster should recover this object
+ wait_for_clean || return 1
+
+ rados_get $dir $poolname myobject || return 1
+
+ delete_erasure_coded_pool $poolname
+}
+
+# Test recovery the first k copies aren't all available
+function TEST_ec_single_recovery_error() {
+ local dir=$1
+ local objname=myobject
+
+ setup_osds 7 || return 1
+
+ local poolname=pool-jerasure
+ create_erasure_coded_pool $poolname 3 2 || return 1
+
+ rados_put $dir $poolname $objname || return 1
+ inject_eio ec data $poolname $objname $dir 0 || return 1
+
+ local -a initial_osds=($(get_osds $poolname $objname))
+ local last_osd=${initial_osds[-1]}
+ # Kill OSD
+ kill_daemons $dir TERM osd.${last_osd} >&2 < /dev/null || return 1
+ ceph osd down ${last_osd} || return 1
+ ceph osd out ${last_osd} || return 1
+
+ # Cluster should recover this object
+ wait_for_clean || return 1
+
+ rados_get $dir $poolname myobject || return 1
+
+ delete_erasure_coded_pool $poolname
+}
+
+# Test recovery when repeated reads are needed due to EIO
+function TEST_ec_recovery_multiple_errors() {
+ local dir=$1
+ local objname=myobject
+
+ setup_osds 9 || return 1
+
+ local poolname=pool-jerasure
+ create_erasure_coded_pool $poolname 4 4 || return 1
+
+ rados_put $dir $poolname $objname || return 1
+ inject_eio ec data $poolname $objname $dir 0 || return 1
+ # first read will try shards 0,1,2 when 0 gets EIO, shard 3 gets
+ # tried as well. Make that fail to test multiple-EIO handling.
+ inject_eio ec data $poolname $objname $dir 3 || return 1
+ inject_eio ec data $poolname $objname $dir 4 || return 1
+
+ local -a initial_osds=($(get_osds $poolname $objname))
+ local last_osd=${initial_osds[-1]}
+ # Kill OSD
+ kill_daemons $dir TERM osd.${last_osd} >&2 < /dev/null || return 1
+ ceph osd down ${last_osd} || return 1
+ ceph osd out ${last_osd} || return 1
+
+ # Cluster should recover this object
+ wait_for_clean || return 1
+
+ rados_get $dir $poolname myobject || return 1
+
+ delete_erasure_coded_pool $poolname
+}
+
+# Test recovery when there's only one shard to recover, but multiple
+# objects recovering in one RecoveryOp
+function TEST_ec_recovery_multiple_objects() {
+ local dir=$1
+ local objname=myobject
+
+ ORIG_ARGS=$CEPH_ARGS
+ CEPH_ARGS+=' --osd-recovery-max-single-start 3 --osd-recovery-max-active 3 '
+ setup_osds 7 || return 1
+ CEPH_ARGS=$ORIG_ARGS
+
+ local poolname=pool-jerasure
+ create_erasure_coded_pool $poolname 3 2 || return 1
+
+ rados_put $dir $poolname test1
+ rados_put $dir $poolname test2
+ rados_put $dir $poolname test3
+
+ ceph osd out 0 || return 1
+
+ # Cluster should recover these objects all at once
+ wait_for_clean || return 1
+
+ rados_get $dir $poolname test1
+ rados_get $dir $poolname test2
+ rados_get $dir $poolname test3
+
+ delete_erasure_coded_pool $poolname
+}
+
+# test multi-object recovery when the one missing shard gets EIO
+function TEST_ec_recovery_multiple_objects_eio() {
+ local dir=$1
+ local objname=myobject
+
+ ORIG_ARGS=$CEPH_ARGS
+ CEPH_ARGS+=' --osd-recovery-max-single-start 3 --osd-recovery-max-active 3 '
+ setup_osds 7 || return 1
+ CEPH_ARGS=$ORIG_ARGS
+
+ local poolname=pool-jerasure
+ create_erasure_coded_pool $poolname 3 2 || return 1
+
+ rados_put $dir $poolname test1
+ rados_put $dir $poolname test2
+ rados_put $dir $poolname test3
+
+ # can't read from this shard anymore
+ inject_eio ec data $poolname $objname $dir 0 || return 1
+ ceph osd out 0 || return 1
+
+ # Cluster should recover these objects all at once
+ wait_for_clean || return 1
+
+ rados_get $dir $poolname test1
+ rados_get $dir $poolname test2
+ rados_get $dir $poolname test3
+
+ delete_erasure_coded_pool $poolname
+}
+
+# Test backfill with unfound object
+function TEST_ec_backfill_unfound() {
+ local dir=$1
+ local objname=myobject
+ local lastobj=300
+ # Must be between 1 and $lastobj
+ local testobj=obj250
+
+ ORIG_ARGS=$CEPH_ARGS
+ CEPH_ARGS+=' --osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10'
+ setup_osds 5 || return 1
+ CEPH_ARGS=$ORIG_ARGS
+
+ local poolname=pool-jerasure
+ create_erasure_coded_pool $poolname 3 2 || return 1
+
+ ceph pg dump pgs
+
+ rados_put $dir $poolname $objname || return 1
+
+ local -a initial_osds=($(get_osds $poolname $objname))
+ local last_osd=${initial_osds[-1]}
+ kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1
+ ceph osd down ${last_osd} || return 1
+ ceph osd out ${last_osd} || return 1
+
+ ceph pg dump pgs
+
+ dd if=/dev/urandom of=${dir}/ORIGINAL bs=1024 count=4
+ for i in $(seq 1 $lastobj)
+ do
+ rados --pool $poolname put obj${i} $dir/ORIGINAL || return 1
+ done
+
+ inject_eio ec data $poolname $testobj $dir 0 || return 1
+ inject_eio ec data $poolname $testobj $dir 1 || return 1
+
+ run_osd $dir ${last_osd} || return 1
+ ceph osd in ${last_osd} || return 1
+
+ sleep 15
+
+ for tmp in $(seq 1 100); do
+ state=$(get_state 2.0)
+ echo $state | grep backfill_unfound
+ if [ "$?" = "0" ]; then
+ break
+ fi
+ echo $state
+ sleep 1
+ done
+
+ ceph pg dump pgs
+ ceph pg 2.0 list_missing | grep -q $testobj || return 1
+
+ # Command should hang because object is unfound
+ timeout 5 rados -p $poolname get $testobj $dir/CHECK
+ test $? = "124" || return 1
+
+ ceph pg 2.0 mark_unfound_lost delete
+
+ wait_for_clean || return 1
+
+ for i in $(seq 1 $lastobj)
+ do
+ if [ obj${i} = "$testobj" ]; then
+ # Doesn't exist anymore
+ ! rados -p $poolname get $testobj $dir/CHECK || return 1
+ else
+ rados --pool $poolname get obj${i} $dir/CHECK || return 1
+ diff -q $dir/ORIGINAL $dir/CHECK || return 1
+ fi
+ done
+
+ rm -f ${dir}/ORIGINAL ${dir}/CHECK
+
+ delete_erasure_coded_pool $poolname
+}
+
+# Test recovery with unfound object
+function TEST_ec_recovery_unfound() {
+ local dir=$1
+ local objname=myobject
+ local lastobj=100
+ # Must be between 1 and $lastobj
+ local testobj=obj75
+
+ ORIG_ARGS=$CEPH_ARGS
+ CEPH_ARGS+=' --osd-recovery-max-single-start 3 --osd-recovery-max-active 3 '
+ CEPH_ARGS+=' --osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10'
+ setup_osds 5 || return 1
+ CEPH_ARGS=$ORIG_ARGS
+
+ local poolname=pool-jerasure
+ create_erasure_coded_pool $poolname 3 2 || return 1
+
+ ceph pg dump pgs
+
+ rados_put $dir $poolname $objname || return 1
+
+ local -a initial_osds=($(get_osds $poolname $objname))
+ local last_osd=${initial_osds[-1]}
+ kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1
+ ceph osd down ${last_osd} || return 1
+ ceph osd out ${last_osd} || return 1
+
+ ceph pg dump pgs
+
+ dd if=/dev/urandom of=${dir}/ORIGINAL bs=1024 count=4
+ for i in $(seq 1 $lastobj)
+ do
+ rados --pool $poolname put obj${i} $dir/ORIGINAL || return 1
+ done
+
+ inject_eio ec data $poolname $testobj $dir 0 || return 1
+ inject_eio ec data $poolname $testobj $dir 1 || return 1
+
+ run_osd $dir ${last_osd} || return 1
+ ceph osd in ${last_osd} || return 1
+
+ sleep 15
+
+ for tmp in $(seq 1 100); do
+ state=$(get_state 2.0)
+ echo $state | grep recovery_unfound
+ if [ "$?" = "0" ]; then
+ break
+ fi
+ echo "$state "
+ sleep 1
+ done
+
+ ceph pg dump pgs
+ ceph pg 2.0 list_missing | grep -q $testobj || return 1
+
+ # Command should hang because object is unfound
+ timeout 5 rados -p $poolname get $testobj $dir/CHECK
+ test $? = "124" || return 1
+
+ ceph pg 2.0 mark_unfound_lost delete
+
+ wait_for_clean || return 1
+
+ for i in $(seq 1 $lastobj)
+ do
+ if [ obj${i} = "$testobj" ]; then
+ # Doesn't exist anymore
+ ! rados -p $poolname get $testobj $dir/CHECK || return 1
+ else
+ rados --pool $poolname get obj${i} $dir/CHECK || return 1
+ diff -q $dir/ORIGINAL $dir/CHECK || return 1
+ fi
+ done
+
+ rm -f ${dir}/ORIGINAL ${dir}/CHECK
- delete_pool $poolname
+ delete_erasure_coded_pool $poolname
}
main test-erasure-eio "$@"