]>
Commit | Line | Data |
---|---|---|
11fdf7f2 | 1 | #!/usr/bin/env bash |
7c673cae FG |
2 | # |
3 | # Copyright (C) 2015 Red Hat <contact@redhat.com> | |
4 | # | |
5 | # | |
6 | # Author: Kefu Chai <kchai@redhat.com> | |
7 | # | |
8 | # This program is free software; you can redistribute it and/or modify | |
9 | # it under the terms of the GNU Library Public License as published by | |
10 | # the Free Software Foundation; either version 2, or (at your option) | |
11 | # any later version. | |
12 | # | |
13 | # This program is distributed in the hope that it will be useful, | |
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | # GNU Library Public License for more details. | |
17 | # | |
18 | ||
c07f9fc5 | 19 | source $CEPH_ROOT/qa/standalone/ceph-helpers.sh |
7c673cae FG |
20 | |
21 | function run() { | |
22 | local dir=$1 | |
23 | shift | |
24 | ||
25 | export CEPH_MON="127.0.0.1:7112" # git grep '\<7112\>' : there must be only one | |
26 | export CEPH_ARGS | |
27 | CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " | |
28 | CEPH_ARGS+="--mon-host=$CEPH_MON " | |
29 | ||
30 | local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} | |
31 | for func in $funcs ; do | |
32 | setup $dir || return 1 | |
33 | run_mon $dir a || return 1 | |
34 | run_mgr $dir x || return 1 | |
11fdf7f2 | 35 | create_pool rbd 4 || return 1 |
c07f9fc5 | 36 | |
7c673cae | 37 | # check that erasure code plugins are preloaded |
c07f9fc5 | 38 | CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1 |
7c673cae FG |
39 | grep 'load: jerasure.*lrc' $dir/mon.a.log || return 1 |
40 | $func $dir || return 1 | |
41 | teardown $dir || return 1 | |
42 | done | |
43 | } | |
44 | ||
45 | function setup_osds() { | |
b32b8144 FG |
46 | local count=$1 |
47 | shift | |
48 | ||
49 | for id in $(seq 0 $(expr $count - 1)) ; do | |
7c673cae FG |
50 | run_osd $dir $id || return 1 |
51 | done | |
7c673cae FG |
52 | |
53 | # check that erasure code plugins are preloaded | |
c07f9fc5 | 54 | CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) log flush || return 1 |
7c673cae FG |
55 | grep 'load: jerasure.*lrc' $dir/osd.0.log || return 1 |
56 | } | |
57 | ||
b32b8144 FG |
58 | function get_state() { |
59 | local pgid=$1 | |
60 | local sname=state | |
61 | ceph --format json pg dump pgs 2>/dev/null | \ | |
11fdf7f2 | 62 | jq -r ".pg_stats | .[] | select(.pgid==\"$pgid\") | .$sname" |
b32b8144 FG |
63 | } |
64 | ||
7c673cae FG |
65 | function create_erasure_coded_pool() { |
66 | local poolname=$1 | |
b32b8144 FG |
67 | shift |
68 | local k=$1 | |
69 | shift | |
70 | local m=$1 | |
71 | shift | |
7c673cae FG |
72 | |
73 | ceph osd erasure-code-profile set myprofile \ | |
74 | plugin=jerasure \ | |
b32b8144 | 75 | k=$k m=$m \ |
224ce89b | 76 | crush-failure-domain=osd || return 1 |
b5b8bbf5 | 77 | create_pool $poolname 1 1 erasure myprofile \ |
7c673cae FG |
78 | || return 1 |
79 | wait_for_clean || return 1 | |
80 | } | |
81 | ||
28e407b8 | 82 | function delete_erasure_coded_pool() { |
7c673cae | 83 | local poolname=$1 |
7c673cae FG |
84 | ceph osd pool delete $poolname $poolname --yes-i-really-really-mean-it |
85 | ceph osd erasure-code-profile rm myprofile | |
86 | } | |
87 | ||
88 | function rados_put() { | |
89 | local dir=$1 | |
90 | local poolname=$2 | |
91 | local objname=${3:-SOMETHING} | |
92 | ||
93 | for marker in AAA BBB CCCC DDDD ; do | |
94 | printf "%*s" 1024 $marker | |
95 | done > $dir/ORIGINAL | |
96 | # | |
97 | # get and put an object, compare they are equal | |
98 | # | |
99 | rados --pool $poolname put $objname $dir/ORIGINAL || return 1 | |
100 | } | |
101 | ||
102 | function rados_get() { | |
103 | local dir=$1 | |
104 | local poolname=$2 | |
105 | local objname=${3:-SOMETHING} | |
106 | local expect=${4:-ok} | |
107 | ||
108 | # | |
109 | # Expect a failure to get object | |
110 | # | |
111 | if [ $expect = "fail" ]; | |
112 | then | |
113 | ! rados --pool $poolname get $objname $dir/COPY | |
114 | return | |
115 | fi | |
116 | # | |
117 | # get an object, compare with $dir/ORIGINAL | |
118 | # | |
119 | rados --pool $poolname get $objname $dir/COPY || return 1 | |
120 | diff $dir/ORIGINAL $dir/COPY || return 1 | |
121 | rm $dir/COPY | |
122 | } | |
123 | ||
b32b8144 FG |
124 | |
125 | function inject_remove() { | |
126 | local pooltype=$1 | |
127 | shift | |
128 | local which=$1 | |
129 | shift | |
130 | local poolname=$1 | |
131 | shift | |
132 | local objname=$1 | |
133 | shift | |
7c673cae | 134 | local dir=$1 |
b32b8144 FG |
135 | shift |
136 | local shard_id=$1 | |
137 | shift | |
7c673cae | 138 | |
b32b8144 FG |
139 | local -a initial_osds=($(get_osds $poolname $objname)) |
140 | local osd_id=${initial_osds[$shard_id]} | |
141 | objectstore_tool $dir $osd_id $objname remove || return 1 | |
142 | } | |
143 | ||
144 | # Test with an inject error | |
145 | function rados_put_get_data() { | |
146 | local inject=$1 | |
147 | shift | |
148 | local dir=$1 | |
149 | shift | |
150 | local shard_id=$1 | |
151 | shift | |
152 | local arg=$1 | |
153 | ||
154 | # inject eio to speificied shard | |
7c673cae | 155 | # |
b32b8144 FG |
156 | local poolname=pool-jerasure |
157 | local objname=obj-$inject-$$-$shard_id | |
7c673cae | 158 | rados_put $dir $poolname $objname || return 1 |
b32b8144 | 159 | inject_$inject ec data $poolname $objname $dir $shard_id || return 1 |
7c673cae FG |
160 | rados_get $dir $poolname $objname || return 1 |
161 | ||
b32b8144 | 162 | if [ "$arg" = "recovery" ]; |
7c673cae FG |
163 | then |
164 | # | |
165 | # take out the last OSD used to store the object, | |
166 | # bring it back, and check for clean PGs which means | |
167 | # recovery didn't crash the primary. | |
168 | # | |
169 | local -a initial_osds=($(get_osds $poolname $objname)) | |
b32b8144 | 170 | local last_osd=${initial_osds[-1]} |
7c673cae | 171 | # Kill OSD |
b32b8144 FG |
172 | kill_daemons $dir TERM osd.${last_osd} >&2 < /dev/null || return 1 |
173 | ceph osd out ${last_osd} || return 1 | |
174 | ! get_osds $poolname $objname | grep '\<'${last_osd}'\>' || return 1 | |
175 | ceph osd in ${last_osd} || return 1 | |
176 | run_osd $dir ${last_osd} || return 1 | |
7c673cae FG |
177 | wait_for_clean || return 1 |
178 | fi | |
179 | ||
7c673cae | 180 | shard_id=$(expr $shard_id + 1) |
b32b8144 FG |
181 | inject_$inject ec data $poolname $objname $dir $shard_id || return 1 |
182 | # Now 2 out of 3 shards get an error, so should fail | |
7c673cae | 183 | rados_get $dir $poolname $objname fail || return 1 |
b32b8144 | 184 | rm $dir/ORIGINAL |
7c673cae FG |
185 | } |
186 | ||
187 | # Change the size of speificied shard | |
188 | # | |
189 | function set_size() { | |
190 | local objname=$1 | |
191 | shift | |
192 | local dir=$1 | |
193 | shift | |
194 | local shard_id=$1 | |
195 | shift | |
196 | local bytes=$1 | |
197 | shift | |
198 | local mode=${1} | |
199 | ||
200 | local poolname=pool-jerasure | |
201 | local -a initial_osds=($(get_osds $poolname $objname)) | |
202 | local osd_id=${initial_osds[$shard_id]} | |
203 | ceph osd set noout | |
204 | if [ "$mode" = "add" ]; | |
205 | then | |
206 | objectstore_tool $dir $osd_id $objname get-bytes $dir/CORRUPT || return 1 | |
207 | dd if=/dev/urandom bs=$bytes count=1 >> $dir/CORRUPT | |
208 | elif [ "$bytes" = "0" ]; | |
209 | then | |
210 | touch $dir/CORRUPT | |
211 | else | |
212 | dd if=/dev/urandom bs=$bytes count=1 of=$dir/CORRUPT | |
213 | fi | |
214 | objectstore_tool $dir $osd_id $objname set-bytes $dir/CORRUPT || return 1 | |
215 | rm -f $dir/CORRUPT | |
216 | ceph osd unset noout | |
217 | } | |
218 | ||
219 | function rados_get_data_bad_size() { | |
220 | local dir=$1 | |
221 | shift | |
222 | local shard_id=$1 | |
223 | shift | |
224 | local bytes=$1 | |
225 | shift | |
226 | local mode=${1:-set} | |
227 | ||
228 | local poolname=pool-jerasure | |
229 | local objname=obj-size-$$-$shard_id-$bytes | |
230 | rados_put $dir $poolname $objname || return 1 | |
231 | ||
232 | # Change the size of speificied shard | |
233 | # | |
234 | set_size $objname $dir $shard_id $bytes $mode || return 1 | |
235 | ||
236 | rados_get $dir $poolname $objname || return 1 | |
237 | ||
238 | # Leave objname and modify another shard | |
239 | shard_id=$(expr $shard_id + 1) | |
240 | set_size $objname $dir $shard_id $bytes $mode || return 1 | |
241 | rados_get $dir $poolname $objname fail || return 1 | |
b32b8144 | 242 | rm $dir/ORIGINAL |
7c673cae FG |
243 | } |
244 | ||
245 | # | |
246 | # These two test cases try to validate the following behavior: | |
247 | # For object on EC pool, if there is one shard having read error ( | |
248 | # either primary or replica), client can still read object. | |
249 | # | |
250 | # If 2 shards have read errors the client will get an error. | |
251 | # | |
252 | function TEST_rados_get_subread_eio_shard_0() { | |
253 | local dir=$1 | |
b32b8144 | 254 | setup_osds 4 || return 1 |
7c673cae FG |
255 | |
256 | local poolname=pool-jerasure | |
b32b8144 | 257 | create_erasure_coded_pool $poolname 2 1 || return 1 |
7c673cae FG |
258 | # inject eio on primary OSD (0) and replica OSD (1) |
259 | local shard_id=0 | |
b32b8144 | 260 | rados_put_get_data eio $dir $shard_id || return 1 |
28e407b8 | 261 | delete_erasure_coded_pool $poolname |
7c673cae FG |
262 | } |
263 | ||
264 | function TEST_rados_get_subread_eio_shard_1() { | |
265 | local dir=$1 | |
b32b8144 | 266 | setup_osds 4 || return 1 |
7c673cae FG |
267 | |
268 | local poolname=pool-jerasure | |
b32b8144 | 269 | create_erasure_coded_pool $poolname 2 1 || return 1 |
7c673cae FG |
270 | # inject eio into replicas OSD (1) and OSD (2) |
271 | local shard_id=1 | |
b32b8144 | 272 | rados_put_get_data eio $dir $shard_id || return 1 |
28e407b8 | 273 | delete_erasure_coded_pool $poolname |
7c673cae FG |
274 | } |
275 | ||
b32b8144 FG |
276 | # We don't remove the object from the primary because |
277 | # that just causes it to appear to be missing | |
278 | ||
279 | function TEST_rados_get_subread_missing() { | |
280 | local dir=$1 | |
281 | setup_osds 4 || return 1 | |
282 | ||
283 | local poolname=pool-jerasure | |
284 | create_erasure_coded_pool $poolname 2 1 || return 1 | |
285 | # inject remove into replicas OSD (1) and OSD (2) | |
286 | local shard_id=1 | |
287 | rados_put_get_data remove $dir $shard_id || return 1 | |
28e407b8 | 288 | delete_erasure_coded_pool $poolname |
b32b8144 FG |
289 | } |
290 | ||
291 | # | |
7c673cae FG |
292 | # |
293 | # These two test cases try to validate that following behavior: | |
294 | # For object on EC pool, if there is one shard which an incorrect | |
295 | # size this will cause an internal read error, client can still read object. | |
296 | # | |
297 | # If 2 shards have incorrect size the client will get an error. | |
298 | # | |
299 | function TEST_rados_get_bad_size_shard_0() { | |
300 | local dir=$1 | |
b32b8144 | 301 | setup_osds 4 || return 1 |
7c673cae FG |
302 | |
303 | local poolname=pool-jerasure | |
b32b8144 | 304 | create_erasure_coded_pool $poolname 2 1 || return 1 |
7c673cae FG |
305 | # Set incorrect size into primary OSD (0) and replica OSD (1) |
306 | local shard_id=0 | |
307 | rados_get_data_bad_size $dir $shard_id 10 || return 1 | |
308 | rados_get_data_bad_size $dir $shard_id 0 || return 1 | |
309 | rados_get_data_bad_size $dir $shard_id 256 add || return 1 | |
28e407b8 | 310 | delete_erasure_coded_pool $poolname |
7c673cae FG |
311 | } |
312 | ||
313 | function TEST_rados_get_bad_size_shard_1() { | |
314 | local dir=$1 | |
b32b8144 | 315 | setup_osds 4 || return 1 |
7c673cae FG |
316 | |
317 | local poolname=pool-jerasure | |
b32b8144 | 318 | create_erasure_coded_pool $poolname 2 1 || return 1 |
7c673cae FG |
319 | # Set incorrect size into replicas OSD (1) and OSD (2) |
320 | local shard_id=1 | |
321 | rados_get_data_bad_size $dir $shard_id 10 || return 1 | |
322 | rados_get_data_bad_size $dir $shard_id 0 || return 1 | |
323 | rados_get_data_bad_size $dir $shard_id 256 add || return 1 | |
28e407b8 | 324 | delete_erasure_coded_pool $poolname |
7c673cae FG |
325 | } |
326 | ||
327 | function TEST_rados_get_with_subreadall_eio_shard_0() { | |
328 | local dir=$1 | |
329 | local shard_id=0 | |
330 | ||
b32b8144 | 331 | setup_osds 4 || return 1 |
7c673cae FG |
332 | |
333 | local poolname=pool-jerasure | |
b32b8144 | 334 | create_erasure_coded_pool $poolname 2 1 || return 1 |
7c673cae | 335 | # inject eio on primary OSD (0) |
b32b8144 | 336 | rados_put_get_data eio $dir $shard_id recovery || return 1 |
7c673cae | 337 | |
28e407b8 | 338 | delete_erasure_coded_pool $poolname |
7c673cae FG |
339 | } |
340 | ||
341 | function TEST_rados_get_with_subreadall_eio_shard_1() { | |
342 | local dir=$1 | |
b32b8144 | 343 | local shard_id=1 |
7c673cae | 344 | |
b32b8144 | 345 | setup_osds 4 || return 1 |
7c673cae FG |
346 | |
347 | local poolname=pool-jerasure | |
b32b8144 | 348 | create_erasure_coded_pool $poolname 2 1 || return 1 |
7c673cae | 349 | # inject eio on replica OSD (1) |
b32b8144 FG |
350 | rados_put_get_data eio $dir $shard_id recovery || return 1 |
351 | ||
28e407b8 | 352 | delete_erasure_coded_pool $poolname |
b32b8144 FG |
353 | } |
354 | ||
91327a77 AA |
355 | # Test recovery the object attr read error |
356 | function TEST_ec_object_attr_read_error() { | |
357 | local dir=$1 | |
358 | local objname=myobject | |
359 | ||
360 | setup_osds 7 || return 1 | |
361 | ||
362 | local poolname=pool-jerasure | |
363 | create_erasure_coded_pool $poolname 3 2 || return 1 | |
364 | ||
365 | local primary_osd=$(get_primary $poolname $objname) | |
366 | # Kill primary OSD | |
367 | kill_daemons $dir TERM osd.${primary_osd} >&2 < /dev/null || return 1 | |
368 | ||
369 | # Write data | |
370 | rados_put $dir $poolname $objname || return 1 | |
371 | ||
372 | # Inject eio, shard 1 is the one read attr | |
373 | inject_eio ec mdata $poolname $objname $dir 1 || return 1 | |
374 | ||
375 | # Restart OSD | |
376 | run_osd $dir ${primary_osd} || return 1 | |
377 | ||
378 | # Cluster should recover this object | |
379 | wait_for_clean || return 1 | |
380 | ||
381 | rados_get $dir $poolname myobject || return 1 | |
382 | ||
383 | delete_erasure_coded_pool $poolname | |
384 | } | |
385 | ||
b32b8144 | 386 | # Test recovery the first k copies aren't all available |
28e407b8 | 387 | function TEST_ec_single_recovery_error() { |
b32b8144 FG |
388 | local dir=$1 |
389 | local objname=myobject | |
390 | ||
391 | setup_osds 7 || return 1 | |
392 | ||
393 | local poolname=pool-jerasure | |
394 | create_erasure_coded_pool $poolname 3 2 || return 1 | |
395 | ||
396 | rados_put $dir $poolname $objname || return 1 | |
397 | inject_eio ec data $poolname $objname $dir 0 || return 1 | |
398 | ||
399 | local -a initial_osds=($(get_osds $poolname $objname)) | |
400 | local last_osd=${initial_osds[-1]} | |
401 | # Kill OSD | |
402 | kill_daemons $dir TERM osd.${last_osd} >&2 < /dev/null || return 1 | |
403 | ceph osd down ${last_osd} || return 1 | |
404 | ceph osd out ${last_osd} || return 1 | |
405 | ||
406 | # Cluster should recover this object | |
407 | wait_for_clean || return 1 | |
408 | ||
28e407b8 AA |
409 | rados_get $dir $poolname myobject || return 1 |
410 | ||
411 | delete_erasure_coded_pool $poolname | |
412 | } | |
413 | ||
414 | # Test recovery when repeated reads are needed due to EIO | |
415 | function TEST_ec_recovery_multiple_errors() { | |
416 | local dir=$1 | |
417 | local objname=myobject | |
418 | ||
419 | setup_osds 9 || return 1 | |
420 | ||
421 | local poolname=pool-jerasure | |
422 | create_erasure_coded_pool $poolname 4 4 || return 1 | |
423 | ||
424 | rados_put $dir $poolname $objname || return 1 | |
425 | inject_eio ec data $poolname $objname $dir 0 || return 1 | |
426 | # first read will try shards 0,1,2 when 0 gets EIO, shard 3 gets | |
427 | # tried as well. Make that fail to test multiple-EIO handling. | |
428 | inject_eio ec data $poolname $objname $dir 3 || return 1 | |
429 | inject_eio ec data $poolname $objname $dir 4 || return 1 | |
430 | ||
431 | local -a initial_osds=($(get_osds $poolname $objname)) | |
432 | local last_osd=${initial_osds[-1]} | |
433 | # Kill OSD | |
434 | kill_daemons $dir TERM osd.${last_osd} >&2 < /dev/null || return 1 | |
435 | ceph osd down ${last_osd} || return 1 | |
436 | ceph osd out ${last_osd} || return 1 | |
437 | ||
438 | # Cluster should recover this object | |
439 | wait_for_clean || return 1 | |
440 | ||
441 | rados_get $dir $poolname myobject || return 1 | |
442 | ||
443 | delete_erasure_coded_pool $poolname | |
444 | } | |
445 | ||
446 | # Test recovery when there's only one shard to recover, but multiple | |
447 | # objects recovering in one RecoveryOp | |
448 | function TEST_ec_recovery_multiple_objects() { | |
449 | local dir=$1 | |
450 | local objname=myobject | |
451 | ||
452 | ORIG_ARGS=$CEPH_ARGS | |
453 | CEPH_ARGS+=' --osd-recovery-max-single-start 3 --osd-recovery-max-active 3 ' | |
454 | setup_osds 7 || return 1 | |
455 | CEPH_ARGS=$ORIG_ARGS | |
456 | ||
457 | local poolname=pool-jerasure | |
458 | create_erasure_coded_pool $poolname 3 2 || return 1 | |
459 | ||
460 | rados_put $dir $poolname test1 | |
461 | rados_put $dir $poolname test2 | |
462 | rados_put $dir $poolname test3 | |
463 | ||
464 | ceph osd out 0 || return 1 | |
465 | ||
466 | # Cluster should recover these objects all at once | |
467 | wait_for_clean || return 1 | |
468 | ||
469 | rados_get $dir $poolname test1 | |
470 | rados_get $dir $poolname test2 | |
471 | rados_get $dir $poolname test3 | |
472 | ||
473 | delete_erasure_coded_pool $poolname | |
474 | } | |
475 | ||
476 | # test multi-object recovery when the one missing shard gets EIO | |
477 | function TEST_ec_recovery_multiple_objects_eio() { | |
478 | local dir=$1 | |
479 | local objname=myobject | |
480 | ||
481 | ORIG_ARGS=$CEPH_ARGS | |
482 | CEPH_ARGS+=' --osd-recovery-max-single-start 3 --osd-recovery-max-active 3 ' | |
483 | setup_osds 7 || return 1 | |
484 | CEPH_ARGS=$ORIG_ARGS | |
485 | ||
486 | local poolname=pool-jerasure | |
487 | create_erasure_coded_pool $poolname 3 2 || return 1 | |
488 | ||
489 | rados_put $dir $poolname test1 | |
490 | rados_put $dir $poolname test2 | |
491 | rados_put $dir $poolname test3 | |
492 | ||
493 | # can't read from this shard anymore | |
494 | inject_eio ec data $poolname $objname $dir 0 || return 1 | |
495 | ceph osd out 0 || return 1 | |
496 | ||
497 | # Cluster should recover these objects all at once | |
498 | wait_for_clean || return 1 | |
499 | ||
500 | rados_get $dir $poolname test1 | |
501 | rados_get $dir $poolname test2 | |
502 | rados_get $dir $poolname test3 | |
503 | ||
504 | delete_erasure_coded_pool $poolname | |
b32b8144 FG |
505 | } |
506 | ||
507 | # Test backfill with unfound object | |
508 | function TEST_ec_backfill_unfound() { | |
509 | local dir=$1 | |
510 | local objname=myobject | |
511 | local lastobj=300 | |
512 | # Must be between 1 and $lastobj | |
513 | local testobj=obj250 | |
514 | ||
28e407b8 | 515 | ORIG_ARGS=$CEPH_ARGS |
b32b8144 FG |
516 | CEPH_ARGS+=' --osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10' |
517 | setup_osds 5 || return 1 | |
28e407b8 | 518 | CEPH_ARGS=$ORIG_ARGS |
b32b8144 FG |
519 | |
520 | local poolname=pool-jerasure | |
521 | create_erasure_coded_pool $poolname 3 2 || return 1 | |
522 | ||
523 | ceph pg dump pgs | |
524 | ||
525 | rados_put $dir $poolname $objname || return 1 | |
526 | ||
527 | local -a initial_osds=($(get_osds $poolname $objname)) | |
528 | local last_osd=${initial_osds[-1]} | |
529 | kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1 | |
530 | ceph osd down ${last_osd} || return 1 | |
531 | ceph osd out ${last_osd} || return 1 | |
532 | ||
533 | ceph pg dump pgs | |
534 | ||
535 | dd if=/dev/urandom of=${dir}/ORIGINAL bs=1024 count=4 | |
536 | for i in $(seq 1 $lastobj) | |
537 | do | |
538 | rados --pool $poolname put obj${i} $dir/ORIGINAL || return 1 | |
539 | done | |
540 | ||
541 | inject_eio ec data $poolname $testobj $dir 0 || return 1 | |
542 | inject_eio ec data $poolname $testobj $dir 1 || return 1 | |
543 | ||
544 | run_osd $dir ${last_osd} || return 1 | |
545 | ceph osd in ${last_osd} || return 1 | |
546 | ||
547 | sleep 15 | |
548 | ||
549 | for tmp in $(seq 1 100); do | |
550 | state=$(get_state 2.0) | |
551 | echo $state | grep backfill_unfound | |
552 | if [ "$?" = "0" ]; then | |
553 | break | |
554 | fi | |
555 | echo $state | |
556 | sleep 1 | |
557 | done | |
558 | ||
559 | ceph pg dump pgs | |
11fdf7f2 | 560 | ceph pg 2.0 list_unfound | grep -q $testobj || return 1 |
b32b8144 FG |
561 | |
562 | # Command should hang because object is unfound | |
563 | timeout 5 rados -p $poolname get $testobj $dir/CHECK | |
564 | test $? = "124" || return 1 | |
565 | ||
566 | ceph pg 2.0 mark_unfound_lost delete | |
567 | ||
568 | wait_for_clean || return 1 | |
569 | ||
570 | for i in $(seq 1 $lastobj) | |
571 | do | |
572 | if [ obj${i} = "$testobj" ]; then | |
573 | # Doesn't exist anymore | |
574 | ! rados -p $poolname get $testobj $dir/CHECK || return 1 | |
575 | else | |
576 | rados --pool $poolname get obj${i} $dir/CHECK || return 1 | |
577 | diff -q $dir/ORIGINAL $dir/CHECK || return 1 | |
578 | fi | |
579 | done | |
580 | ||
581 | rm -f ${dir}/ORIGINAL ${dir}/CHECK | |
582 | ||
28e407b8 | 583 | delete_erasure_coded_pool $poolname |
b32b8144 FG |
584 | } |
585 | ||
586 | # Test recovery with unfound object | |
587 | function TEST_ec_recovery_unfound() { | |
588 | local dir=$1 | |
589 | local objname=myobject | |
590 | local lastobj=100 | |
591 | # Must be between 1 and $lastobj | |
592 | local testobj=obj75 | |
593 | ||
28e407b8 AA |
594 | ORIG_ARGS=$CEPH_ARGS |
595 | CEPH_ARGS+=' --osd-recovery-max-single-start 3 --osd-recovery-max-active 3 ' | |
596 | CEPH_ARGS+=' --osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10' | |
b32b8144 | 597 | setup_osds 5 || return 1 |
28e407b8 | 598 | CEPH_ARGS=$ORIG_ARGS |
b32b8144 FG |
599 | |
600 | local poolname=pool-jerasure | |
601 | create_erasure_coded_pool $poolname 3 2 || return 1 | |
602 | ||
603 | ceph pg dump pgs | |
604 | ||
605 | rados_put $dir $poolname $objname || return 1 | |
606 | ||
607 | local -a initial_osds=($(get_osds $poolname $objname)) | |
608 | local last_osd=${initial_osds[-1]} | |
609 | kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1 | |
610 | ceph osd down ${last_osd} || return 1 | |
611 | ceph osd out ${last_osd} || return 1 | |
612 | ||
613 | ceph pg dump pgs | |
614 | ||
615 | dd if=/dev/urandom of=${dir}/ORIGINAL bs=1024 count=4 | |
616 | for i in $(seq 1 $lastobj) | |
617 | do | |
618 | rados --pool $poolname put obj${i} $dir/ORIGINAL || return 1 | |
619 | done | |
620 | ||
621 | inject_eio ec data $poolname $testobj $dir 0 || return 1 | |
622 | inject_eio ec data $poolname $testobj $dir 1 || return 1 | |
623 | ||
624 | run_osd $dir ${last_osd} || return 1 | |
625 | ceph osd in ${last_osd} || return 1 | |
626 | ||
627 | sleep 15 | |
628 | ||
629 | for tmp in $(seq 1 100); do | |
630 | state=$(get_state 2.0) | |
631 | echo $state | grep recovery_unfound | |
632 | if [ "$?" = "0" ]; then | |
633 | break | |
634 | fi | |
635 | echo "$state " | |
636 | sleep 1 | |
637 | done | |
638 | ||
639 | ceph pg dump pgs | |
11fdf7f2 | 640 | ceph pg 2.0 list_unfound | grep -q $testobj || return 1 |
b32b8144 FG |
641 | |
642 | # Command should hang because object is unfound | |
643 | timeout 5 rados -p $poolname get $testobj $dir/CHECK | |
644 | test $? = "124" || return 1 | |
645 | ||
646 | ceph pg 2.0 mark_unfound_lost delete | |
647 | ||
648 | wait_for_clean || return 1 | |
649 | ||
650 | for i in $(seq 1 $lastobj) | |
651 | do | |
652 | if [ obj${i} = "$testobj" ]; then | |
653 | # Doesn't exist anymore | |
654 | ! rados -p $poolname get $testobj $dir/CHECK || return 1 | |
655 | else | |
656 | rados --pool $poolname get obj${i} $dir/CHECK || return 1 | |
657 | diff -q $dir/ORIGINAL $dir/CHECK || return 1 | |
658 | fi | |
659 | done | |
660 | ||
661 | rm -f ${dir}/ORIGINAL ${dir}/CHECK | |
7c673cae | 662 | |
28e407b8 | 663 | delete_erasure_coded_pool $poolname |
7c673cae FG |
664 | } |
665 | ||
666 | main test-erasure-eio "$@" | |
667 | ||
668 | # Local Variables: | |
669 | # compile-command: "cd ../.. ; make -j4 && test/erasure-code/test-erasure-eio.sh" | |
670 | # End: |