]>
Commit | Line | Data |
---|---|---|
11fdf7f2 | 1 | #!/usr/bin/env bash |
7c673cae FG |
2 | # |
3 | # Copyright (C) 2015 Red Hat <contact@redhat.com> | |
4 | # | |
5 | # | |
6 | # Author: Kefu Chai <kchai@redhat.com> | |
7 | # | |
8 | # This program is free software; you can redistribute it and/or modify | |
9 | # it under the terms of the GNU Library Public License as published by | |
10 | # the Free Software Foundation; either version 2, or (at your option) | |
11 | # any later version. | |
12 | # | |
13 | # This program is distributed in the hope that it will be useful, | |
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | # GNU Library Public License for more details. | |
17 | # | |
18 | ||
c07f9fc5 | 19 | source $CEPH_ROOT/qa/standalone/ceph-helpers.sh |
7c673cae FG |
20 | |
21 | function run() { | |
22 | local dir=$1 | |
23 | shift | |
24 | ||
25 | export CEPH_MON="127.0.0.1:7112" # git grep '\<7112\>' : there must be only one | |
26 | export CEPH_ARGS | |
27 | CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " | |
28 | CEPH_ARGS+="--mon-host=$CEPH_MON " | |
20effc67 | 29 | CEPH_ARGS+="--osd-mclock-profile=high_recovery_ops " |
7c673cae FG |
30 | |
31 | local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} | |
32 | for func in $funcs ; do | |
33 | setup $dir || return 1 | |
34 | run_mon $dir a || return 1 | |
35 | run_mgr $dir x || return 1 | |
11fdf7f2 | 36 | create_pool rbd 4 || return 1 |
c07f9fc5 | 37 | |
7c673cae | 38 | # check that erasure code plugins are preloaded |
c07f9fc5 | 39 | CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1 |
7c673cae FG |
40 | grep 'load: jerasure.*lrc' $dir/mon.a.log || return 1 |
41 | $func $dir || return 1 | |
42 | teardown $dir || return 1 | |
43 | done | |
44 | } | |
45 | ||
46 | function setup_osds() { | |
b32b8144 FG |
47 | local count=$1 |
48 | shift | |
49 | ||
50 | for id in $(seq 0 $(expr $count - 1)) ; do | |
7c673cae FG |
51 | run_osd $dir $id || return 1 |
52 | done | |
7c673cae FG |
53 | |
54 | # check that erasure code plugins are preloaded | |
c07f9fc5 | 55 | CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) log flush || return 1 |
7c673cae FG |
56 | grep 'load: jerasure.*lrc' $dir/osd.0.log || return 1 |
57 | } | |
58 | ||
b32b8144 FG |
59 | function get_state() { |
60 | local pgid=$1 | |
61 | local sname=state | |
62 | ceph --format json pg dump pgs 2>/dev/null | \ | |
11fdf7f2 | 63 | jq -r ".pg_stats | .[] | select(.pgid==\"$pgid\") | .$sname" |
b32b8144 FG |
64 | } |
65 | ||
7c673cae FG |
66 | function create_erasure_coded_pool() { |
67 | local poolname=$1 | |
b32b8144 FG |
68 | shift |
69 | local k=$1 | |
70 | shift | |
71 | local m=$1 | |
72 | shift | |
7c673cae FG |
73 | |
74 | ceph osd erasure-code-profile set myprofile \ | |
75 | plugin=jerasure \ | |
b32b8144 | 76 | k=$k m=$m \ |
224ce89b | 77 | crush-failure-domain=osd || return 1 |
b5b8bbf5 | 78 | create_pool $poolname 1 1 erasure myprofile \ |
7c673cae FG |
79 | || return 1 |
80 | wait_for_clean || return 1 | |
81 | } | |
82 | ||
28e407b8 | 83 | function delete_erasure_coded_pool() { |
7c673cae | 84 | local poolname=$1 |
7c673cae FG |
85 | ceph osd pool delete $poolname $poolname --yes-i-really-really-mean-it |
86 | ceph osd erasure-code-profile rm myprofile | |
87 | } | |
88 | ||
89 | function rados_put() { | |
90 | local dir=$1 | |
91 | local poolname=$2 | |
92 | local objname=${3:-SOMETHING} | |
93 | ||
94 | for marker in AAA BBB CCCC DDDD ; do | |
95 | printf "%*s" 1024 $marker | |
96 | done > $dir/ORIGINAL | |
97 | # | |
98 | # get and put an object, compare they are equal | |
99 | # | |
100 | rados --pool $poolname put $objname $dir/ORIGINAL || return 1 | |
101 | } | |
102 | ||
103 | function rados_get() { | |
104 | local dir=$1 | |
105 | local poolname=$2 | |
106 | local objname=${3:-SOMETHING} | |
107 | local expect=${4:-ok} | |
108 | ||
109 | # | |
110 | # Expect a failure to get object | |
111 | # | |
112 | if [ $expect = "fail" ]; | |
113 | then | |
114 | ! rados --pool $poolname get $objname $dir/COPY | |
115 | return | |
116 | fi | |
117 | # | |
118 | # get an object, compare with $dir/ORIGINAL | |
119 | # | |
120 | rados --pool $poolname get $objname $dir/COPY || return 1 | |
121 | diff $dir/ORIGINAL $dir/COPY || return 1 | |
122 | rm $dir/COPY | |
123 | } | |
124 | ||
b32b8144 FG |
125 | |
126 | function inject_remove() { | |
127 | local pooltype=$1 | |
128 | shift | |
129 | local which=$1 | |
130 | shift | |
131 | local poolname=$1 | |
132 | shift | |
133 | local objname=$1 | |
134 | shift | |
7c673cae | 135 | local dir=$1 |
b32b8144 FG |
136 | shift |
137 | local shard_id=$1 | |
138 | shift | |
7c673cae | 139 | |
b32b8144 FG |
140 | local -a initial_osds=($(get_osds $poolname $objname)) |
141 | local osd_id=${initial_osds[$shard_id]} | |
142 | objectstore_tool $dir $osd_id $objname remove || return 1 | |
143 | } | |
144 | ||
145 | # Test with an inject error | |
146 | function rados_put_get_data() { | |
147 | local inject=$1 | |
148 | shift | |
149 | local dir=$1 | |
150 | shift | |
151 | local shard_id=$1 | |
152 | shift | |
153 | local arg=$1 | |
154 | ||
155 | # inject eio to speificied shard | |
7c673cae | 156 | # |
b32b8144 FG |
157 | local poolname=pool-jerasure |
158 | local objname=obj-$inject-$$-$shard_id | |
7c673cae | 159 | rados_put $dir $poolname $objname || return 1 |
b32b8144 | 160 | inject_$inject ec data $poolname $objname $dir $shard_id || return 1 |
7c673cae FG |
161 | rados_get $dir $poolname $objname || return 1 |
162 | ||
b32b8144 | 163 | if [ "$arg" = "recovery" ]; |
7c673cae FG |
164 | then |
165 | # | |
166 | # take out the last OSD used to store the object, | |
167 | # bring it back, and check for clean PGs which means | |
168 | # recovery didn't crash the primary. | |
169 | # | |
170 | local -a initial_osds=($(get_osds $poolname $objname)) | |
b32b8144 | 171 | local last_osd=${initial_osds[-1]} |
7c673cae | 172 | # Kill OSD |
b32b8144 FG |
173 | kill_daemons $dir TERM osd.${last_osd} >&2 < /dev/null || return 1 |
174 | ceph osd out ${last_osd} || return 1 | |
175 | ! get_osds $poolname $objname | grep '\<'${last_osd}'\>' || return 1 | |
176 | ceph osd in ${last_osd} || return 1 | |
9f95a23c | 177 | activate_osd $dir ${last_osd} || return 1 |
7c673cae | 178 | wait_for_clean || return 1 |
9f95a23c TL |
179 | # Won't check for eio on get here -- recovery above might have fixed it |
180 | else | |
181 | shard_id=$(expr $shard_id + 1) | |
182 | inject_$inject ec data $poolname $objname $dir $shard_id || return 1 | |
183 | rados_get $dir $poolname $objname fail || return 1 | |
184 | rm $dir/ORIGINAL | |
7c673cae FG |
185 | fi |
186 | ||
7c673cae FG |
187 | } |
188 | ||
189 | # Change the size of speificied shard | |
190 | # | |
191 | function set_size() { | |
192 | local objname=$1 | |
193 | shift | |
194 | local dir=$1 | |
195 | shift | |
196 | local shard_id=$1 | |
197 | shift | |
198 | local bytes=$1 | |
199 | shift | |
200 | local mode=${1} | |
201 | ||
202 | local poolname=pool-jerasure | |
203 | local -a initial_osds=($(get_osds $poolname $objname)) | |
204 | local osd_id=${initial_osds[$shard_id]} | |
205 | ceph osd set noout | |
206 | if [ "$mode" = "add" ]; | |
207 | then | |
208 | objectstore_tool $dir $osd_id $objname get-bytes $dir/CORRUPT || return 1 | |
209 | dd if=/dev/urandom bs=$bytes count=1 >> $dir/CORRUPT | |
210 | elif [ "$bytes" = "0" ]; | |
211 | then | |
212 | touch $dir/CORRUPT | |
213 | else | |
214 | dd if=/dev/urandom bs=$bytes count=1 of=$dir/CORRUPT | |
215 | fi | |
216 | objectstore_tool $dir $osd_id $objname set-bytes $dir/CORRUPT || return 1 | |
217 | rm -f $dir/CORRUPT | |
218 | ceph osd unset noout | |
219 | } | |
220 | ||
221 | function rados_get_data_bad_size() { | |
222 | local dir=$1 | |
223 | shift | |
224 | local shard_id=$1 | |
225 | shift | |
226 | local bytes=$1 | |
227 | shift | |
228 | local mode=${1:-set} | |
229 | ||
230 | local poolname=pool-jerasure | |
231 | local objname=obj-size-$$-$shard_id-$bytes | |
232 | rados_put $dir $poolname $objname || return 1 | |
233 | ||
234 | # Change the size of speificied shard | |
235 | # | |
236 | set_size $objname $dir $shard_id $bytes $mode || return 1 | |
237 | ||
238 | rados_get $dir $poolname $objname || return 1 | |
239 | ||
240 | # Leave objname and modify another shard | |
241 | shard_id=$(expr $shard_id + 1) | |
242 | set_size $objname $dir $shard_id $bytes $mode || return 1 | |
243 | rados_get $dir $poolname $objname fail || return 1 | |
b32b8144 | 244 | rm $dir/ORIGINAL |
7c673cae FG |
245 | } |
246 | ||
247 | # | |
248 | # These two test cases try to validate the following behavior: | |
249 | # For object on EC pool, if there is one shard having read error ( | |
250 | # either primary or replica), client can still read object. | |
251 | # | |
252 | # If 2 shards have read errors the client will get an error. | |
253 | # | |
254 | function TEST_rados_get_subread_eio_shard_0() { | |
255 | local dir=$1 | |
b32b8144 | 256 | setup_osds 4 || return 1 |
7c673cae FG |
257 | |
258 | local poolname=pool-jerasure | |
b32b8144 | 259 | create_erasure_coded_pool $poolname 2 1 || return 1 |
7c673cae FG |
260 | # inject eio on primary OSD (0) and replica OSD (1) |
261 | local shard_id=0 | |
b32b8144 | 262 | rados_put_get_data eio $dir $shard_id || return 1 |
28e407b8 | 263 | delete_erasure_coded_pool $poolname |
7c673cae FG |
264 | } |
265 | ||
266 | function TEST_rados_get_subread_eio_shard_1() { | |
267 | local dir=$1 | |
b32b8144 | 268 | setup_osds 4 || return 1 |
7c673cae FG |
269 | |
270 | local poolname=pool-jerasure | |
b32b8144 | 271 | create_erasure_coded_pool $poolname 2 1 || return 1 |
7c673cae FG |
272 | # inject eio into replicas OSD (1) and OSD (2) |
273 | local shard_id=1 | |
b32b8144 | 274 | rados_put_get_data eio $dir $shard_id || return 1 |
28e407b8 | 275 | delete_erasure_coded_pool $poolname |
7c673cae FG |
276 | } |
277 | ||
b32b8144 FG |
278 | # We don't remove the object from the primary because |
279 | # that just causes it to appear to be missing | |
280 | ||
281 | function TEST_rados_get_subread_missing() { | |
282 | local dir=$1 | |
283 | setup_osds 4 || return 1 | |
284 | ||
285 | local poolname=pool-jerasure | |
286 | create_erasure_coded_pool $poolname 2 1 || return 1 | |
287 | # inject remove into replicas OSD (1) and OSD (2) | |
288 | local shard_id=1 | |
289 | rados_put_get_data remove $dir $shard_id || return 1 | |
28e407b8 | 290 | delete_erasure_coded_pool $poolname |
b32b8144 FG |
291 | } |
292 | ||
293 | # | |
7c673cae FG |
294 | # |
295 | # These two test cases try to validate that following behavior: | |
296 | # For object on EC pool, if there is one shard which an incorrect | |
297 | # size this will cause an internal read error, client can still read object. | |
298 | # | |
299 | # If 2 shards have incorrect size the client will get an error. | |
300 | # | |
301 | function TEST_rados_get_bad_size_shard_0() { | |
302 | local dir=$1 | |
b32b8144 | 303 | setup_osds 4 || return 1 |
7c673cae FG |
304 | |
305 | local poolname=pool-jerasure | |
b32b8144 | 306 | create_erasure_coded_pool $poolname 2 1 || return 1 |
7c673cae FG |
307 | # Set incorrect size into primary OSD (0) and replica OSD (1) |
308 | local shard_id=0 | |
309 | rados_get_data_bad_size $dir $shard_id 10 || return 1 | |
310 | rados_get_data_bad_size $dir $shard_id 0 || return 1 | |
311 | rados_get_data_bad_size $dir $shard_id 256 add || return 1 | |
28e407b8 | 312 | delete_erasure_coded_pool $poolname |
7c673cae FG |
313 | } |
314 | ||
315 | function TEST_rados_get_bad_size_shard_1() { | |
316 | local dir=$1 | |
b32b8144 | 317 | setup_osds 4 || return 1 |
7c673cae FG |
318 | |
319 | local poolname=pool-jerasure | |
b32b8144 | 320 | create_erasure_coded_pool $poolname 2 1 || return 1 |
7c673cae FG |
321 | # Set incorrect size into replicas OSD (1) and OSD (2) |
322 | local shard_id=1 | |
323 | rados_get_data_bad_size $dir $shard_id 10 || return 1 | |
324 | rados_get_data_bad_size $dir $shard_id 0 || return 1 | |
325 | rados_get_data_bad_size $dir $shard_id 256 add || return 1 | |
28e407b8 | 326 | delete_erasure_coded_pool $poolname |
7c673cae FG |
327 | } |
328 | ||
329 | function TEST_rados_get_with_subreadall_eio_shard_0() { | |
330 | local dir=$1 | |
331 | local shard_id=0 | |
332 | ||
b32b8144 | 333 | setup_osds 4 || return 1 |
7c673cae FG |
334 | |
335 | local poolname=pool-jerasure | |
b32b8144 | 336 | create_erasure_coded_pool $poolname 2 1 || return 1 |
7c673cae | 337 | # inject eio on primary OSD (0) |
b32b8144 | 338 | rados_put_get_data eio $dir $shard_id recovery || return 1 |
7c673cae | 339 | |
28e407b8 | 340 | delete_erasure_coded_pool $poolname |
7c673cae FG |
341 | } |
342 | ||
343 | function TEST_rados_get_with_subreadall_eio_shard_1() { | |
344 | local dir=$1 | |
b32b8144 | 345 | local shard_id=1 |
7c673cae | 346 | |
b32b8144 | 347 | setup_osds 4 || return 1 |
7c673cae FG |
348 | |
349 | local poolname=pool-jerasure | |
b32b8144 | 350 | create_erasure_coded_pool $poolname 2 1 || return 1 |
7c673cae | 351 | # inject eio on replica OSD (1) |
b32b8144 FG |
352 | rados_put_get_data eio $dir $shard_id recovery || return 1 |
353 | ||
28e407b8 | 354 | delete_erasure_coded_pool $poolname |
b32b8144 FG |
355 | } |
356 | ||
91327a77 AA |
357 | # Test recovery the object attr read error |
358 | function TEST_ec_object_attr_read_error() { | |
359 | local dir=$1 | |
360 | local objname=myobject | |
361 | ||
362 | setup_osds 7 || return 1 | |
363 | ||
364 | local poolname=pool-jerasure | |
365 | create_erasure_coded_pool $poolname 3 2 || return 1 | |
366 | ||
367 | local primary_osd=$(get_primary $poolname $objname) | |
368 | # Kill primary OSD | |
369 | kill_daemons $dir TERM osd.${primary_osd} >&2 < /dev/null || return 1 | |
370 | ||
371 | # Write data | |
372 | rados_put $dir $poolname $objname || return 1 | |
373 | ||
374 | # Inject eio, shard 1 is the one read attr | |
375 | inject_eio ec mdata $poolname $objname $dir 1 || return 1 | |
376 | ||
377 | # Restart OSD | |
9f95a23c | 378 | activate_osd $dir ${primary_osd} || return 1 |
91327a77 AA |
379 | |
380 | # Cluster should recover this object | |
381 | wait_for_clean || return 1 | |
382 | ||
383 | rados_get $dir $poolname myobject || return 1 | |
384 | ||
385 | delete_erasure_coded_pool $poolname | |
386 | } | |
387 | ||
b32b8144 | 388 | # Test recovery the first k copies aren't all available |
28e407b8 | 389 | function TEST_ec_single_recovery_error() { |
b32b8144 FG |
390 | local dir=$1 |
391 | local objname=myobject | |
392 | ||
393 | setup_osds 7 || return 1 | |
394 | ||
395 | local poolname=pool-jerasure | |
396 | create_erasure_coded_pool $poolname 3 2 || return 1 | |
397 | ||
398 | rados_put $dir $poolname $objname || return 1 | |
399 | inject_eio ec data $poolname $objname $dir 0 || return 1 | |
400 | ||
401 | local -a initial_osds=($(get_osds $poolname $objname)) | |
402 | local last_osd=${initial_osds[-1]} | |
403 | # Kill OSD | |
404 | kill_daemons $dir TERM osd.${last_osd} >&2 < /dev/null || return 1 | |
405 | ceph osd down ${last_osd} || return 1 | |
406 | ceph osd out ${last_osd} || return 1 | |
407 | ||
408 | # Cluster should recover this object | |
409 | wait_for_clean || return 1 | |
410 | ||
28e407b8 AA |
411 | rados_get $dir $poolname myobject || return 1 |
412 | ||
413 | delete_erasure_coded_pool $poolname | |
414 | } | |
415 | ||
416 | # Test recovery when repeated reads are needed due to EIO | |
417 | function TEST_ec_recovery_multiple_errors() { | |
418 | local dir=$1 | |
419 | local objname=myobject | |
420 | ||
421 | setup_osds 9 || return 1 | |
422 | ||
423 | local poolname=pool-jerasure | |
424 | create_erasure_coded_pool $poolname 4 4 || return 1 | |
425 | ||
426 | rados_put $dir $poolname $objname || return 1 | |
427 | inject_eio ec data $poolname $objname $dir 0 || return 1 | |
428 | # first read will try shards 0,1,2 when 0 gets EIO, shard 3 gets | |
429 | # tried as well. Make that fail to test multiple-EIO handling. | |
430 | inject_eio ec data $poolname $objname $dir 3 || return 1 | |
431 | inject_eio ec data $poolname $objname $dir 4 || return 1 | |
432 | ||
433 | local -a initial_osds=($(get_osds $poolname $objname)) | |
434 | local last_osd=${initial_osds[-1]} | |
435 | # Kill OSD | |
436 | kill_daemons $dir TERM osd.${last_osd} >&2 < /dev/null || return 1 | |
437 | ceph osd down ${last_osd} || return 1 | |
438 | ceph osd out ${last_osd} || return 1 | |
439 | ||
440 | # Cluster should recover this object | |
441 | wait_for_clean || return 1 | |
442 | ||
443 | rados_get $dir $poolname myobject || return 1 | |
444 | ||
445 | delete_erasure_coded_pool $poolname | |
446 | } | |
447 | ||
448 | # Test recovery when there's only one shard to recover, but multiple | |
449 | # objects recovering in one RecoveryOp | |
450 | function TEST_ec_recovery_multiple_objects() { | |
451 | local dir=$1 | |
452 | local objname=myobject | |
453 | ||
454 | ORIG_ARGS=$CEPH_ARGS | |
455 | CEPH_ARGS+=' --osd-recovery-max-single-start 3 --osd-recovery-max-active 3 ' | |
456 | setup_osds 7 || return 1 | |
457 | CEPH_ARGS=$ORIG_ARGS | |
458 | ||
459 | local poolname=pool-jerasure | |
460 | create_erasure_coded_pool $poolname 3 2 || return 1 | |
461 | ||
462 | rados_put $dir $poolname test1 | |
463 | rados_put $dir $poolname test2 | |
464 | rados_put $dir $poolname test3 | |
465 | ||
466 | ceph osd out 0 || return 1 | |
467 | ||
468 | # Cluster should recover these objects all at once | |
469 | wait_for_clean || return 1 | |
470 | ||
471 | rados_get $dir $poolname test1 | |
472 | rados_get $dir $poolname test2 | |
473 | rados_get $dir $poolname test3 | |
474 | ||
475 | delete_erasure_coded_pool $poolname | |
476 | } | |
477 | ||
478 | # test multi-object recovery when the one missing shard gets EIO | |
479 | function TEST_ec_recovery_multiple_objects_eio() { | |
480 | local dir=$1 | |
481 | local objname=myobject | |
482 | ||
483 | ORIG_ARGS=$CEPH_ARGS | |
484 | CEPH_ARGS+=' --osd-recovery-max-single-start 3 --osd-recovery-max-active 3 ' | |
485 | setup_osds 7 || return 1 | |
486 | CEPH_ARGS=$ORIG_ARGS | |
487 | ||
488 | local poolname=pool-jerasure | |
489 | create_erasure_coded_pool $poolname 3 2 || return 1 | |
490 | ||
491 | rados_put $dir $poolname test1 | |
492 | rados_put $dir $poolname test2 | |
493 | rados_put $dir $poolname test3 | |
494 | ||
495 | # can't read from this shard anymore | |
496 | inject_eio ec data $poolname $objname $dir 0 || return 1 | |
497 | ceph osd out 0 || return 1 | |
498 | ||
499 | # Cluster should recover these objects all at once | |
500 | wait_for_clean || return 1 | |
501 | ||
502 | rados_get $dir $poolname test1 | |
503 | rados_get $dir $poolname test2 | |
504 | rados_get $dir $poolname test3 | |
505 | ||
506 | delete_erasure_coded_pool $poolname | |
b32b8144 FG |
507 | } |
508 | ||
509 | # Test backfill with unfound object | |
510 | function TEST_ec_backfill_unfound() { | |
511 | local dir=$1 | |
512 | local objname=myobject | |
513 | local lastobj=300 | |
514 | # Must be between 1 and $lastobj | |
515 | local testobj=obj250 | |
516 | ||
28e407b8 | 517 | ORIG_ARGS=$CEPH_ARGS |
b32b8144 FG |
518 | CEPH_ARGS+=' --osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10' |
519 | setup_osds 5 || return 1 | |
28e407b8 | 520 | CEPH_ARGS=$ORIG_ARGS |
b32b8144 FG |
521 | |
522 | local poolname=pool-jerasure | |
523 | create_erasure_coded_pool $poolname 3 2 || return 1 | |
524 | ||
525 | ceph pg dump pgs | |
526 | ||
527 | rados_put $dir $poolname $objname || return 1 | |
f67539c2 | 528 | local primary=$(get_primary $poolname $objname) |
b32b8144 FG |
529 | |
530 | local -a initial_osds=($(get_osds $poolname $objname)) | |
531 | local last_osd=${initial_osds[-1]} | |
532 | kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1 | |
533 | ceph osd down ${last_osd} || return 1 | |
534 | ceph osd out ${last_osd} || return 1 | |
535 | ||
536 | ceph pg dump pgs | |
537 | ||
538 | dd if=/dev/urandom of=${dir}/ORIGINAL bs=1024 count=4 | |
539 | for i in $(seq 1 $lastobj) | |
540 | do | |
541 | rados --pool $poolname put obj${i} $dir/ORIGINAL || return 1 | |
542 | done | |
543 | ||
544 | inject_eio ec data $poolname $testobj $dir 0 || return 1 | |
545 | inject_eio ec data $poolname $testobj $dir 1 || return 1 | |
546 | ||
9f95a23c | 547 | activate_osd $dir ${last_osd} || return 1 |
b32b8144 FG |
548 | ceph osd in ${last_osd} || return 1 |
549 | ||
550 | sleep 15 | |
551 | ||
20effc67 | 552 | for tmp in $(seq 1 240); do |
b32b8144 FG |
553 | state=$(get_state 2.0) |
554 | echo $state | grep backfill_unfound | |
555 | if [ "$?" = "0" ]; then | |
556 | break | |
557 | fi | |
558 | echo $state | |
559 | sleep 1 | |
560 | done | |
561 | ||
562 | ceph pg dump pgs | |
f67539c2 TL |
563 | kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1 |
564 | sleep 5 | |
565 | ||
566 | ceph pg dump pgs | |
567 | ceph pg 2.0 list_unfound | |
568 | ceph pg 2.0 query | |
569 | ||
11fdf7f2 | 570 | ceph pg 2.0 list_unfound | grep -q $testobj || return 1 |
b32b8144 | 571 | |
f67539c2 TL |
572 | check=$(ceph pg 2.0 list_unfound | jq ".available_might_have_unfound") |
573 | test "$check" == "true" || return 1 | |
574 | ||
575 | eval check=$(ceph pg 2.0 list_unfound | jq .might_have_unfound[0].status) | |
576 | test "$check" == "osd is down" || return 1 | |
577 | ||
578 | eval check=$(ceph pg 2.0 list_unfound | jq .might_have_unfound[0].osd) | |
579 | test "$check" == "2(4)" || return 1 | |
580 | ||
581 | activate_osd $dir ${last_osd} || return 1 | |
582 | ||
b32b8144 FG |
583 | # Command should hang because object is unfound |
584 | timeout 5 rados -p $poolname get $testobj $dir/CHECK | |
585 | test $? = "124" || return 1 | |
586 | ||
587 | ceph pg 2.0 mark_unfound_lost delete | |
588 | ||
589 | wait_for_clean || return 1 | |
590 | ||
591 | for i in $(seq 1 $lastobj) | |
592 | do | |
593 | if [ obj${i} = "$testobj" ]; then | |
594 | # Doesn't exist anymore | |
595 | ! rados -p $poolname get $testobj $dir/CHECK || return 1 | |
596 | else | |
597 | rados --pool $poolname get obj${i} $dir/CHECK || return 1 | |
598 | diff -q $dir/ORIGINAL $dir/CHECK || return 1 | |
599 | fi | |
600 | done | |
601 | ||
602 | rm -f ${dir}/ORIGINAL ${dir}/CHECK | |
603 | ||
28e407b8 | 604 | delete_erasure_coded_pool $poolname |
b32b8144 FG |
605 | } |
606 | ||
607 | # Test recovery with unfound object | |
608 | function TEST_ec_recovery_unfound() { | |
609 | local dir=$1 | |
610 | local objname=myobject | |
611 | local lastobj=100 | |
612 | # Must be between 1 and $lastobj | |
613 | local testobj=obj75 | |
614 | ||
28e407b8 AA |
615 | ORIG_ARGS=$CEPH_ARGS |
616 | CEPH_ARGS+=' --osd-recovery-max-single-start 3 --osd-recovery-max-active 3 ' | |
617 | CEPH_ARGS+=' --osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10' | |
b32b8144 | 618 | setup_osds 5 || return 1 |
28e407b8 | 619 | CEPH_ARGS=$ORIG_ARGS |
b32b8144 FG |
620 | |
621 | local poolname=pool-jerasure | |
622 | create_erasure_coded_pool $poolname 3 2 || return 1 | |
623 | ||
624 | ceph pg dump pgs | |
625 | ||
626 | rados_put $dir $poolname $objname || return 1 | |
627 | ||
628 | local -a initial_osds=($(get_osds $poolname $objname)) | |
629 | local last_osd=${initial_osds[-1]} | |
630 | kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1 | |
631 | ceph osd down ${last_osd} || return 1 | |
632 | ceph osd out ${last_osd} || return 1 | |
633 | ||
634 | ceph pg dump pgs | |
635 | ||
636 | dd if=/dev/urandom of=${dir}/ORIGINAL bs=1024 count=4 | |
637 | for i in $(seq 1 $lastobj) | |
638 | do | |
639 | rados --pool $poolname put obj${i} $dir/ORIGINAL || return 1 | |
640 | done | |
641 | ||
642 | inject_eio ec data $poolname $testobj $dir 0 || return 1 | |
643 | inject_eio ec data $poolname $testobj $dir 1 || return 1 | |
644 | ||
9f95a23c | 645 | activate_osd $dir ${last_osd} || return 1 |
b32b8144 FG |
646 | ceph osd in ${last_osd} || return 1 |
647 | ||
648 | sleep 15 | |
649 | ||
650 | for tmp in $(seq 1 100); do | |
651 | state=$(get_state 2.0) | |
652 | echo $state | grep recovery_unfound | |
653 | if [ "$?" = "0" ]; then | |
654 | break | |
655 | fi | |
656 | echo "$state " | |
657 | sleep 1 | |
658 | done | |
659 | ||
660 | ceph pg dump pgs | |
f67539c2 TL |
661 | ceph pg 2.0 list_unfound |
662 | ceph pg 2.0 query | |
663 | ||
11fdf7f2 | 664 | ceph pg 2.0 list_unfound | grep -q $testobj || return 1 |
b32b8144 | 665 | |
f67539c2 TL |
666 | check=$(ceph pg 2.0 list_unfound | jq ".available_might_have_unfound") |
667 | test "$check" == "true" || return 1 | |
668 | ||
669 | check=$(ceph pg 2.0 list_unfound | jq ".might_have_unfound | length") | |
670 | test $check == 0 || return 1 | |
671 | ||
b32b8144 FG |
672 | # Command should hang because object is unfound |
673 | timeout 5 rados -p $poolname get $testobj $dir/CHECK | |
674 | test $? = "124" || return 1 | |
675 | ||
676 | ceph pg 2.0 mark_unfound_lost delete | |
677 | ||
678 | wait_for_clean || return 1 | |
679 | ||
680 | for i in $(seq 1 $lastobj) | |
681 | do | |
682 | if [ obj${i} = "$testobj" ]; then | |
683 | # Doesn't exist anymore | |
684 | ! rados -p $poolname get $testobj $dir/CHECK || return 1 | |
685 | else | |
686 | rados --pool $poolname get obj${i} $dir/CHECK || return 1 | |
687 | diff -q $dir/ORIGINAL $dir/CHECK || return 1 | |
688 | fi | |
689 | done | |
690 | ||
691 | rm -f ${dir}/ORIGINAL ${dir}/CHECK | |
7c673cae | 692 | |
28e407b8 | 693 | delete_erasure_coded_pool $poolname |
7c673cae FG |
694 | } |
695 | ||
696 | main test-erasure-eio "$@" | |
697 | ||
698 | # Local Variables: | |
699 | # compile-command: "cd ../.. ; make -j4 && test/erasure-code/test-erasure-eio.sh" | |
700 | # End: |