wait_for_clean || return 1
}
-function delete_pool() {
+function delete_erasure_coded_pool() {
local poolname=$1
-
ceph osd pool delete $poolname $poolname --yes-i-really-really-mean-it
ceph osd erasure-code-profile rm myprofile
}
# inject eio on primary OSD (0) and replica OSD (1)
local shard_id=0
rados_put_get_data eio $dir $shard_id || return 1
- delete_pool $poolname
+ delete_erasure_coded_pool $poolname
}
function TEST_rados_get_subread_eio_shard_1() {
# inject eio into replicas OSD (1) and OSD (2)
local shard_id=1
rados_put_get_data eio $dir $shard_id || return 1
- delete_pool $poolname
+ delete_erasure_coded_pool $poolname
}
# We don't remove the object from the primary because
# inject remove into replicas OSD (1) and OSD (2)
local shard_id=1
rados_put_get_data remove $dir $shard_id || return 1
- delete_pool $poolname
+ delete_erasure_coded_pool $poolname
}
#
rados_get_data_bad_size $dir $shard_id 10 || return 1
rados_get_data_bad_size $dir $shard_id 0 || return 1
rados_get_data_bad_size $dir $shard_id 256 add || return 1
- delete_pool $poolname
+ delete_erasure_coded_pool $poolname
}
function TEST_rados_get_bad_size_shard_1() {
rados_get_data_bad_size $dir $shard_id 10 || return 1
rados_get_data_bad_size $dir $shard_id 0 || return 1
rados_get_data_bad_size $dir $shard_id 256 add || return 1
- delete_pool $poolname
+ delete_erasure_coded_pool $poolname
}
function TEST_rados_get_with_subreadall_eio_shard_0() {
# inject eio on primary OSD (0)
rados_put_get_data eio $dir $shard_id recovery || return 1
- delete_pool $poolname
+ delete_erasure_coded_pool $poolname
}
function TEST_rados_get_with_subreadall_eio_shard_1() {
# inject eio on replica OSD (1)
rados_put_get_data eio $dir $shard_id recovery || return 1
- delete_pool $poolname
+ delete_erasure_coded_pool $poolname
}
# Test recovery the first k copies aren't all available
-function TEST_ec_recovery_errors() {
+function TEST_ec_single_recovery_error() {
local dir=$1
local objname=myobject
# Cluster should recover this object
wait_for_clean || return 1
- delete_pool $poolname
+ rados_get $dir $poolname myobject || return 1
+
+ delete_erasure_coded_pool $poolname
+}
+
+# Test recovery when repeated reads are needed due to EIO
+function TEST_ec_recovery_multiple_errors() {
+ local dir=$1
+ local objname=myobject
+
+ setup_osds 9 || return 1
+
+ local poolname=pool-jerasure
+ create_erasure_coded_pool $poolname 4 4 || return 1
+
+ rados_put $dir $poolname $objname || return 1
+ inject_eio ec data $poolname $objname $dir 0 || return 1
+ # first read will try shards 0,1,2 when 0 gets EIO, shard 3 gets
+ # tried as well. Make that fail to test multiple-EIO handling.
+ inject_eio ec data $poolname $objname $dir 3 || return 1
+ inject_eio ec data $poolname $objname $dir 4 || return 1
+
+ local -a initial_osds=($(get_osds $poolname $objname))
+ local last_osd=${initial_osds[-1]}
+ # Kill OSD
+ kill_daemons $dir TERM osd.${last_osd} >&2 < /dev/null || return 1
+ ceph osd down ${last_osd} || return 1
+ ceph osd out ${last_osd} || return 1
+
+ # Cluster should recover this object
+ wait_for_clean || return 1
+
+ rados_get $dir $poolname myobject || return 1
+
+ delete_erasure_coded_pool $poolname
+}
+
+# Test recovery when there's only one shard to recover, but multiple
+# objects recovering in one RecoveryOp
+function TEST_ec_recovery_multiple_objects() {
+ local dir=$1
+ local objname=myobject
+
+ ORIG_ARGS=$CEPH_ARGS
+ CEPH_ARGS+=' --osd-recovery-max-single-start 3 --osd-recovery-max-active 3 '
+ setup_osds 7 || return 1
+ CEPH_ARGS=$ORIG_ARGS
+
+ local poolname=pool-jerasure
+ create_erasure_coded_pool $poolname 3 2 || return 1
+
+ rados_put $dir $poolname test1
+ rados_put $dir $poolname test2
+ rados_put $dir $poolname test3
+
+ ceph osd out 0 || return 1
+
+ # Cluster should recover these objects all at once
+ wait_for_clean || return 1
+
+ rados_get $dir $poolname test1
+ rados_get $dir $poolname test2
+ rados_get $dir $poolname test3
+
+ delete_erasure_coded_pool $poolname
+}
+
+# test multi-object recovery when the one missing shard gets EIO
+function TEST_ec_recovery_multiple_objects_eio() {
+ local dir=$1
+ local objname=myobject
+
+ ORIG_ARGS=$CEPH_ARGS
+ CEPH_ARGS+=' --osd-recovery-max-single-start 3 --osd-recovery-max-active 3 '
+ setup_osds 7 || return 1
+ CEPH_ARGS=$ORIG_ARGS
+
+ local poolname=pool-jerasure
+ create_erasure_coded_pool $poolname 3 2 || return 1
+
+ rados_put $dir $poolname test1
+ rados_put $dir $poolname test2
+ rados_put $dir $poolname test3
+
+ # can't read from this shard anymore
+ inject_eio ec data $poolname $objname $dir 0 || return 1
+ ceph osd out 0 || return 1
+
+ # Cluster should recover these objects all at once
+ wait_for_clean || return 1
+
+ rados_get $dir $poolname test1
+ rados_get $dir $poolname test2
+ rados_get $dir $poolname test3
+
+ delete_erasure_coded_pool $poolname
}
# Test backfill with unfound object
# Must be between 1 and $lastobj
local testobj=obj250
- export CEPH_ARGS
+ ORIG_ARGS=$CEPH_ARGS
CEPH_ARGS+=' --osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10'
setup_osds 5 || return 1
+ CEPH_ARGS=$ORIG_ARGS
local poolname=pool-jerasure
create_erasure_coded_pool $poolname 3 2 || return 1
rm -f ${dir}/ORIGINAL ${dir}/CHECK
- delete_pool $poolname
+ delete_erasure_coded_pool $poolname
}
# Test recovery with unfound object
# Must be between 1 and $lastobj
local testobj=obj75
+ ORIG_ARGS=$CEPH_ARGS
+ CEPH_ARGS+=' --osd-recovery-max-single-start 3 --osd-recovery-max-active 3 '
+ CEPH_ARGS+=' --osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10'
setup_osds 5 || return 1
+ CEPH_ARGS=$ORIG_ARGS
local poolname=pool-jerasure
create_erasure_coded_pool $poolname 3 2 || return 1
rm -f ${dir}/ORIGINAL ${dir}/CHECK
- delete_pool $poolname
+ delete_erasure_coded_pool $poolname
}
main test-erasure-eio "$@"