]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | #!/bin/bash |
2 | # | |
3 | # Copyright (C) 2015 Red Hat <contact@redhat.com> | |
4 | # | |
5 | # | |
6 | # Author: Kefu Chai <kchai@redhat.com> | |
7 | # | |
8 | # This program is free software; you can redistribute it and/or modify | |
9 | # it under the terms of the GNU Library Public License as published by | |
10 | # the Free Software Foundation; either version 2, or (at your option) | |
11 | # any later version. | |
12 | # | |
13 | # This program is distributed in the hope that it will be useful, | |
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | # GNU Library Public License for more details. | |
17 | # | |
18 | ||
19 | source $(dirname $0)/../detect-build-env-vars.sh | |
20 | source $CEPH_ROOT/qa/workunits/ceph-helpers.sh | |
21 | ||
22 | function run() { | |
23 | local dir=$1 | |
24 | shift | |
25 | ||
26 | export CEPH_MON="127.0.0.1:7112" # git grep '\<7112\>' : there must be only one | |
27 | export CEPH_ARGS | |
28 | CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " | |
29 | CEPH_ARGS+="--mon-host=$CEPH_MON " | |
30 | ||
31 | local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} | |
32 | for func in $funcs ; do | |
33 | setup $dir || return 1 | |
34 | run_mon $dir a || return 1 | |
35 | run_mgr $dir x || return 1 | |
36 | # check that erasure code plugins are preloaded | |
37 | CEPH_ARGS='' ceph --admin-daemon $dir/ceph-mon.a.asok log flush || return 1 | |
38 | grep 'load: jerasure.*lrc' $dir/mon.a.log || return 1 | |
39 | $func $dir || return 1 | |
40 | teardown $dir || return 1 | |
41 | done | |
42 | } | |
43 | ||
44 | function setup_osds() { | |
45 | for id in $(seq 0 3) ; do | |
46 | run_osd $dir $id || return 1 | |
47 | done | |
48 | wait_for_clean || return 1 | |
49 | ||
50 | # check that erasure code plugins are preloaded | |
51 | CEPH_ARGS='' ceph --admin-daemon $dir/ceph-osd.0.asok log flush || return 1 | |
52 | grep 'load: jerasure.*lrc' $dir/osd.0.log || return 1 | |
53 | } | |
54 | ||
55 | function create_erasure_coded_pool() { | |
56 | local poolname=$1 | |
57 | ||
58 | ceph osd erasure-code-profile set myprofile \ | |
59 | plugin=jerasure \ | |
60 | k=2 m=1 \ | |
61 | ruleset-failure-domain=osd || return 1 | |
62 | ceph osd pool create $poolname 1 1 erasure myprofile \ | |
63 | || return 1 | |
64 | wait_for_clean || return 1 | |
65 | } | |
66 | ||
67 | function delete_pool() { | |
68 | local poolname=$1 | |
69 | ||
70 | ceph osd pool delete $poolname $poolname --yes-i-really-really-mean-it | |
71 | ceph osd erasure-code-profile rm myprofile | |
72 | } | |
73 | ||
74 | function rados_put() { | |
75 | local dir=$1 | |
76 | local poolname=$2 | |
77 | local objname=${3:-SOMETHING} | |
78 | ||
79 | for marker in AAA BBB CCCC DDDD ; do | |
80 | printf "%*s" 1024 $marker | |
81 | done > $dir/ORIGINAL | |
82 | # | |
83 | # get and put an object, compare they are equal | |
84 | # | |
85 | rados --pool $poolname put $objname $dir/ORIGINAL || return 1 | |
86 | } | |
87 | ||
88 | function rados_get() { | |
89 | local dir=$1 | |
90 | local poolname=$2 | |
91 | local objname=${3:-SOMETHING} | |
92 | local expect=${4:-ok} | |
93 | ||
94 | # | |
95 | # Expect a failure to get object | |
96 | # | |
97 | if [ $expect = "fail" ]; | |
98 | then | |
99 | ! rados --pool $poolname get $objname $dir/COPY | |
100 | return | |
101 | fi | |
102 | # | |
103 | # get an object, compare with $dir/ORIGINAL | |
104 | # | |
105 | rados --pool $poolname get $objname $dir/COPY || return 1 | |
106 | diff $dir/ORIGINAL $dir/COPY || return 1 | |
107 | rm $dir/COPY | |
108 | } | |
109 | ||
110 | function rados_put_get() { | |
111 | local dir=$1 | |
112 | local poolname=$2 | |
113 | local objname=${3:-SOMETHING} | |
114 | local recovery=$4 | |
115 | ||
116 | # | |
117 | # get and put an object, compare they are equal | |
118 | # | |
119 | rados_put $dir $poolname $objname || return 1 | |
120 | # We can read even though caller injected read error on one of the shards | |
121 | rados_get $dir $poolname $objname || return 1 | |
122 | ||
123 | if [ -n "$recovery" ]; | |
124 | then | |
125 | # | |
126 | # take out the last OSD used to store the object, | |
127 | # bring it back, and check for clean PGs which means | |
128 | # recovery didn't crash the primary. | |
129 | # | |
130 | local -a initial_osds=($(get_osds $poolname $objname)) | |
131 | local last=$((${#initial_osds[@]} - 1)) | |
132 | # Kill OSD | |
133 | kill_daemons $dir TERM osd.${initial_osds[$last]} >&2 < /dev/null || return 1 | |
134 | ceph osd out ${initial_osds[$last]} || return 1 | |
135 | ! get_osds $poolname $objname | grep '\<'${initial_osds[$last]}'\>' || return 1 | |
136 | ceph osd in ${initial_osds[$last]} || return 1 | |
137 | run_osd $dir ${initial_osds[$last]} || return 1 | |
138 | wait_for_clean || return 1 | |
139 | fi | |
140 | ||
141 | rm $dir/ORIGINAL | |
142 | } | |
143 | ||
144 | function inject_eio() { | |
145 | local objname=$1 | |
146 | shift | |
147 | local dir=$1 | |
148 | shift | |
149 | local shard_id=$1 | |
150 | shift | |
151 | ||
152 | local poolname=pool-jerasure | |
153 | local -a initial_osds=($(get_osds $poolname $objname)) | |
154 | local osd_id=${initial_osds[$shard_id]} | |
155 | set_config osd $osd_id filestore_debug_inject_read_err true || return 1 | |
156 | CEPH_ARGS='' ceph --admin-daemon $dir/ceph-osd.$osd_id.asok \ | |
157 | injectdataerr $poolname $objname $shard_id || return 1 | |
158 | } | |
159 | ||
160 | function rados_get_data_eio() { | |
161 | local dir=$1 | |
162 | shift | |
163 | local shard_id=$1 | |
164 | shift | |
165 | local recovery=$1 | |
166 | shift | |
167 | ||
168 | # inject eio to speificied shard | |
169 | # | |
170 | local poolname=pool-jerasure | |
171 | local objname=obj-eio-$$-$shard_id | |
172 | inject_eio $objname $dir $shard_id || return 1 | |
173 | rados_put_get $dir $poolname $objname $recovery || return 1 | |
174 | ||
175 | shard_id=$(expr $shard_id + 1) | |
176 | inject_eio $objname $dir $shard_id || return 1 | |
177 | # Now 2 out of 3 shards get EIO, so should fail | |
178 | rados_get $dir $poolname $objname fail || return 1 | |
179 | } | |
180 | ||
181 | # Change the size of speificied shard | |
182 | # | |
183 | function set_size() { | |
184 | local objname=$1 | |
185 | shift | |
186 | local dir=$1 | |
187 | shift | |
188 | local shard_id=$1 | |
189 | shift | |
190 | local bytes=$1 | |
191 | shift | |
192 | local mode=${1} | |
193 | ||
194 | local poolname=pool-jerasure | |
195 | local -a initial_osds=($(get_osds $poolname $objname)) | |
196 | local osd_id=${initial_osds[$shard_id]} | |
197 | ceph osd set noout | |
198 | if [ "$mode" = "add" ]; | |
199 | then | |
200 | objectstore_tool $dir $osd_id $objname get-bytes $dir/CORRUPT || return 1 | |
201 | dd if=/dev/urandom bs=$bytes count=1 >> $dir/CORRUPT | |
202 | elif [ "$bytes" = "0" ]; | |
203 | then | |
204 | touch $dir/CORRUPT | |
205 | else | |
206 | dd if=/dev/urandom bs=$bytes count=1 of=$dir/CORRUPT | |
207 | fi | |
208 | objectstore_tool $dir $osd_id $objname set-bytes $dir/CORRUPT || return 1 | |
209 | rm -f $dir/CORRUPT | |
210 | ceph osd unset noout | |
211 | } | |
212 | ||
213 | function rados_get_data_bad_size() { | |
214 | local dir=$1 | |
215 | shift | |
216 | local shard_id=$1 | |
217 | shift | |
218 | local bytes=$1 | |
219 | shift | |
220 | local mode=${1:-set} | |
221 | ||
222 | local poolname=pool-jerasure | |
223 | local objname=obj-size-$$-$shard_id-$bytes | |
224 | rados_put $dir $poolname $objname || return 1 | |
225 | ||
226 | # Change the size of speificied shard | |
227 | # | |
228 | set_size $objname $dir $shard_id $bytes $mode || return 1 | |
229 | ||
230 | rados_get $dir $poolname $objname || return 1 | |
231 | ||
232 | # Leave objname and modify another shard | |
233 | shard_id=$(expr $shard_id + 1) | |
234 | set_size $objname $dir $shard_id $bytes $mode || return 1 | |
235 | rados_get $dir $poolname $objname fail || return 1 | |
236 | } | |
237 | ||
238 | # | |
239 | # These two test cases try to validate the following behavior: | |
240 | # For object on EC pool, if there is one shard having read error ( | |
241 | # either primary or replica), client can still read object. | |
242 | # | |
243 | # If 2 shards have read errors the client will get an error. | |
244 | # | |
245 | function TEST_rados_get_subread_eio_shard_0() { | |
246 | local dir=$1 | |
247 | setup_osds || return 1 | |
248 | ||
249 | local poolname=pool-jerasure | |
250 | create_erasure_coded_pool $poolname || return 1 | |
251 | # inject eio on primary OSD (0) and replica OSD (1) | |
252 | local shard_id=0 | |
253 | rados_get_data_eio $dir $shard_id || return 1 | |
254 | delete_pool $poolname | |
255 | } | |
256 | ||
257 | function TEST_rados_get_subread_eio_shard_1() { | |
258 | local dir=$1 | |
259 | setup_osds || return 1 | |
260 | ||
261 | local poolname=pool-jerasure | |
262 | create_erasure_coded_pool $poolname || return 1 | |
263 | # inject eio into replicas OSD (1) and OSD (2) | |
264 | local shard_id=1 | |
265 | rados_get_data_eio $dir $shard_id || return 1 | |
266 | delete_pool $poolname | |
267 | } | |
268 | ||
269 | # | |
270 | # These two test cases try to validate that following behavior: | |
271 | # For object on EC pool, if there is one shard which an incorrect | |
272 | # size this will cause an internal read error, client can still read object. | |
273 | # | |
274 | # If 2 shards have incorrect size the client will get an error. | |
275 | # | |
276 | function TEST_rados_get_bad_size_shard_0() { | |
277 | local dir=$1 | |
278 | setup_osds || return 1 | |
279 | ||
280 | local poolname=pool-jerasure | |
281 | create_erasure_coded_pool $poolname || return 1 | |
282 | # Set incorrect size into primary OSD (0) and replica OSD (1) | |
283 | local shard_id=0 | |
284 | rados_get_data_bad_size $dir $shard_id 10 || return 1 | |
285 | rados_get_data_bad_size $dir $shard_id 0 || return 1 | |
286 | rados_get_data_bad_size $dir $shard_id 256 add || return 1 | |
287 | delete_pool $poolname | |
288 | } | |
289 | ||
290 | function TEST_rados_get_bad_size_shard_1() { | |
291 | local dir=$1 | |
292 | setup_osds || return 1 | |
293 | ||
294 | local poolname=pool-jerasure | |
295 | create_erasure_coded_pool $poolname || return 1 | |
296 | # Set incorrect size into replicas OSD (1) and OSD (2) | |
297 | local shard_id=1 | |
298 | rados_get_data_bad_size $dir $shard_id 10 || return 1 | |
299 | rados_get_data_bad_size $dir $shard_id 0 || return 1 | |
300 | rados_get_data_bad_size $dir $shard_id 256 add || return 1 | |
301 | delete_pool $poolname | |
302 | } | |
303 | ||
304 | function TEST_rados_get_with_subreadall_eio_shard_0() { | |
305 | local dir=$1 | |
306 | local shard_id=0 | |
307 | ||
308 | setup_osds || return 1 | |
309 | ||
310 | local poolname=pool-jerasure | |
311 | create_erasure_coded_pool $poolname || return 1 | |
312 | # inject eio on primary OSD (0) | |
313 | local shard_id=0 | |
314 | rados_get_data_eio $dir $shard_id recovery || return 1 | |
315 | ||
316 | delete_pool $poolname | |
317 | } | |
318 | ||
319 | function TEST_rados_get_with_subreadall_eio_shard_1() { | |
320 | local dir=$1 | |
321 | local shard_id=0 | |
322 | ||
323 | setup_osds || return 1 | |
324 | ||
325 | local poolname=pool-jerasure | |
326 | create_erasure_coded_pool $poolname || return 1 | |
327 | # inject eio on replica OSD (1) | |
328 | local shard_id=1 | |
329 | rados_get_data_eio $dir $shard_id recovery || return 1 | |
330 | ||
331 | delete_pool $poolname | |
332 | } | |
333 | ||
334 | main test-erasure-eio "$@" | |
335 | ||
336 | # Local Variables: | |
337 | # compile-command: "cd ../.. ; make -j4 && test/erasure-code/test-erasure-eio.sh" | |
338 | # End: |