3 # Copyright (C) 2015 Red Hat <contact@redhat.com>
6 # Author: Kefu Chai <kchai@redhat.com>
8 # This program is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU Library Public License as published by
10 # the Free Software Foundation; either version 2, or (at your option)
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU Library Public License for more details.
19 source $
(dirname $0)/..
/detect-build-env-vars.sh
20 source $CEPH_ROOT/qa
/workunits
/ceph-helpers.sh
26 export CEPH_MON
="127.0.0.1:7112" # git grep '\<7112\>' : there must be only one
28 CEPH_ARGS
+="--fsid=$(uuidgen) --auth-supported=none "
29 CEPH_ARGS
+="--mon-host=$CEPH_MON "
31 local funcs
=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
32 for func
in $funcs ; do
33 setup
$dir ||
return 1
34 run_mon
$dir a ||
return 1
35 run_mgr
$dir x ||
return 1
36 # check that erasure code plugins are preloaded
37 CEPH_ARGS
='' ceph
--admin-daemon $dir/ceph-mon.a.asok log flush ||
return 1
38 grep 'load: jerasure.*lrc' $dir/mon.a.log ||
return 1
39 $func $dir ||
return 1
40 teardown
$dir ||
return 1
44 function setup_osds
() {
45 for id
in $
(seq 0 3) ; do
46 run_osd
$dir $id ||
return 1
48 wait_for_clean ||
return 1
50 # check that erasure code plugins are preloaded
51 CEPH_ARGS
='' ceph
--admin-daemon $dir/ceph-osd
.0.asok log flush ||
return 1
52 grep 'load: jerasure.*lrc' $dir/osd
.0.log ||
return 1
55 function create_erasure_coded_pool
() {
58 ceph osd erasure-code-profile
set myprofile \
61 crush-failure-domain
=osd ||
return 1
62 ceph osd pool create
$poolname 1 1 erasure myprofile \
64 wait_for_clean ||
return 1
67 function delete_pool
() {
70 ceph osd pool delete
$poolname $poolname --yes-i-really-really-mean-it
71 ceph osd erasure-code-profile
rm myprofile
74 function rados_put
() {
77 local objname
=${3:-SOMETHING}
79 for marker
in AAA BBB CCCC DDDD
; do
80 printf "%*s" 1024 $marker
83 # get and put an object, compare they are equal
85 rados
--pool $poolname put
$objname $dir/ORIGINAL ||
return 1
88 function rados_get
() {
91 local objname
=${3:-SOMETHING}
95 # Expect a failure to get object
97 if [ $expect = "fail" ];
99 ! rados
--pool $poolname get
$objname $dir/COPY
103 # get an object, compare with $dir/ORIGINAL
105 rados
--pool $poolname get
$objname $dir/COPY ||
return 1
106 diff $dir/ORIGINAL
$dir/COPY ||
return 1
110 function rados_put_get
() {
113 local objname
=${3:-SOMETHING}
117 # get and put an object, compare they are equal
119 rados_put
$dir $poolname $objname ||
return 1
120 # We can read even though caller injected read error on one of the shards
121 rados_get
$dir $poolname $objname ||
return 1
123 if [ -n "$recovery" ];
126 # take out the last OSD used to store the object,
127 # bring it back, and check for clean PGs which means
128 # recovery didn't crash the primary.
130 local -a initial_osds
=($
(get_osds
$poolname $objname))
131 local last
=$
((${#initial_osds[@]} - 1))
133 kill_daemons
$dir TERM osd.
${initial_osds[$last]} >&2 < /dev
/null ||
return 1
134 ceph osd out
${initial_osds[$last]} ||
return 1
135 ! get_osds
$poolname $objname |
grep '\<'${initial_osds[$last]}'\>' ||
return 1
136 ceph osd
in ${initial_osds[$last]} ||
return 1
137 run_osd
$dir ${initial_osds[$last]} ||
return 1
138 wait_for_clean ||
return 1
144 function inject_eio
() {
152 local poolname
=pool-jerasure
153 local -a initial_osds
=($
(get_osds
$poolname $objname))
154 local osd_id
=${initial_osds[$shard_id]}
155 set_config osd
$osd_id filestore_debug_inject_read_err true ||
return 1
156 CEPH_ARGS
='' ceph
--admin-daemon $dir/ceph-osd.
$osd_id.asok \
157 injectdataerr
$poolname $objname $shard_id ||
return 1
160 function rados_get_data_eio
() {
168 # inject eio to speificied shard
170 local poolname
=pool-jerasure
171 local objname
=obj-eio-$$
-$shard_id
172 inject_eio
$objname $dir $shard_id ||
return 1
173 rados_put_get
$dir $poolname $objname $recovery ||
return 1
175 shard_id
=$
(expr $shard_id + 1)
176 inject_eio
$objname $dir $shard_id ||
return 1
177 # Now 2 out of 3 shards get EIO, so should fail
178 rados_get
$dir $poolname $objname fail ||
return 1
181 # Change the size of speificied shard
183 function set_size
() {
194 local poolname
=pool-jerasure
195 local -a initial_osds
=($
(get_osds
$poolname $objname))
196 local osd_id
=${initial_osds[$shard_id]}
198 if [ "$mode" = "add" ];
200 objectstore_tool
$dir $osd_id $objname get-bytes
$dir/CORRUPT ||
return 1
201 dd if=/dev
/urandom bs
=$bytes count
=1 >> $dir/CORRUPT
202 elif [ "$bytes" = "0" ];
206 dd if=/dev
/urandom bs
=$bytes count
=1 of
=$dir/CORRUPT
208 objectstore_tool
$dir $osd_id $objname set-bytes
$dir/CORRUPT ||
return 1
213 function rados_get_data_bad_size
() {
222 local poolname
=pool-jerasure
223 local objname
=obj-size-$$
-$shard_id-$bytes
224 rados_put
$dir $poolname $objname ||
return 1
226 # Change the size of speificied shard
228 set_size
$objname $dir $shard_id $bytes $mode ||
return 1
230 rados_get
$dir $poolname $objname ||
return 1
232 # Leave objname and modify another shard
233 shard_id
=$
(expr $shard_id + 1)
234 set_size
$objname $dir $shard_id $bytes $mode ||
return 1
235 rados_get
$dir $poolname $objname fail ||
return 1
239 # These two test cases try to validate the following behavior:
240 # For object on EC pool, if there is one shard having read error (
241 # either primary or replica), client can still read object.
243 # If 2 shards have read errors the client will get an error.
245 function TEST_rados_get_subread_eio_shard_0
() {
247 setup_osds ||
return 1
249 local poolname
=pool-jerasure
250 create_erasure_coded_pool
$poolname ||
return 1
251 # inject eio on primary OSD (0) and replica OSD (1)
253 rados_get_data_eio
$dir $shard_id ||
return 1
254 delete_pool
$poolname
257 function TEST_rados_get_subread_eio_shard_1
() {
259 setup_osds ||
return 1
261 local poolname
=pool-jerasure
262 create_erasure_coded_pool
$poolname ||
return 1
263 # inject eio into replicas OSD (1) and OSD (2)
265 rados_get_data_eio
$dir $shard_id ||
return 1
266 delete_pool
$poolname
270 # These two test cases try to validate that following behavior:
271 # For object on EC pool, if there is one shard which an incorrect
272 # size this will cause an internal read error, client can still read object.
274 # If 2 shards have incorrect size the client will get an error.
276 function TEST_rados_get_bad_size_shard_0
() {
278 setup_osds ||
return 1
280 local poolname
=pool-jerasure
281 create_erasure_coded_pool
$poolname ||
return 1
282 # Set incorrect size into primary OSD (0) and replica OSD (1)
284 rados_get_data_bad_size
$dir $shard_id 10 ||
return 1
285 rados_get_data_bad_size
$dir $shard_id 0 ||
return 1
286 rados_get_data_bad_size
$dir $shard_id 256 add ||
return 1
287 delete_pool
$poolname
290 function TEST_rados_get_bad_size_shard_1
() {
292 setup_osds ||
return 1
294 local poolname
=pool-jerasure
295 create_erasure_coded_pool
$poolname ||
return 1
296 # Set incorrect size into replicas OSD (1) and OSD (2)
298 rados_get_data_bad_size
$dir $shard_id 10 ||
return 1
299 rados_get_data_bad_size
$dir $shard_id 0 ||
return 1
300 rados_get_data_bad_size
$dir $shard_id 256 add ||
return 1
301 delete_pool
$poolname
304 function TEST_rados_get_with_subreadall_eio_shard_0
() {
308 setup_osds ||
return 1
310 local poolname
=pool-jerasure
311 create_erasure_coded_pool
$poolname ||
return 1
312 # inject eio on primary OSD (0)
314 rados_get_data_eio
$dir $shard_id recovery ||
return 1
316 delete_pool
$poolname
319 function TEST_rados_get_with_subreadall_eio_shard_1
() {
323 setup_osds ||
return 1
325 local poolname
=pool-jerasure
326 create_erasure_coded_pool
$poolname ||
return 1
327 # inject eio on replica OSD (1)
329 rados_get_data_eio
$dir $shard_id recovery ||
return 1
331 delete_pool
$poolname
334 main test-erasure-eio
"$@"
337 # compile-command: "cd ../.. ; make -j4 && test/erasure-code/test-erasure-eio.sh"