]> git.proxmox.com Git - ceph.git/blob - ceph/qa/standalone/erasure-code/test-erasure-eio.sh
e08c68377f6977d5438815449e1519981abd455b
[ceph.git] / ceph / qa / standalone / erasure-code / test-erasure-eio.sh
1 #!/usr/bin/env bash
2 #
3 # Copyright (C) 2015 Red Hat <contact@redhat.com>
4 #
5 #
6 # Author: Kefu Chai <kchai@redhat.com>
7 #
8 # This program is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU Library Public License as published by
10 # the Free Software Foundation; either version 2, or (at your option)
11 # any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU Library Public License for more details.
17 #
18
19 source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
20
21 function run() {
22 local dir=$1
23 shift
24
25 export CEPH_MON="127.0.0.1:7112" # git grep '\<7112\>' : there must be only one
26 export CEPH_ARGS
27 CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
28 CEPH_ARGS+="--mon-host=$CEPH_MON "
29 CEPH_ARGS+="--osd-mclock-profile=high_recovery_ops "
30
31 local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
32 for func in $funcs ; do
33 setup $dir || return 1
34 run_mon $dir a || return 1
35 run_mgr $dir x || return 1
36 create_pool rbd 4 || return 1
37
38 # check that erasure code plugins are preloaded
39 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1
40 grep 'load: jerasure.*lrc' $dir/mon.a.log || return 1
41 $func $dir || return 1
42 teardown $dir || return 1
43 done
44 }
45
46 function setup_osds() {
47 local count=$1
48 shift
49
50 for id in $(seq 0 $(expr $count - 1)) ; do
51 run_osd $dir $id || return 1
52 done
53
54 # check that erasure code plugins are preloaded
55 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) log flush || return 1
56 grep 'load: jerasure.*lrc' $dir/osd.0.log || return 1
57 }
58
59 function get_state() {
60 local pgid=$1
61 local sname=state
62 ceph --format json pg dump pgs 2>/dev/null | \
63 jq -r ".pg_stats | .[] | select(.pgid==\"$pgid\") | .$sname"
64 }
65
66 function create_erasure_coded_pool() {
67 local poolname=$1
68 shift
69 local k=$1
70 shift
71 local m=$1
72 shift
73
74 ceph osd erasure-code-profile set myprofile \
75 plugin=jerasure \
76 k=$k m=$m \
77 crush-failure-domain=osd || return 1
78 create_pool $poolname 1 1 erasure myprofile \
79 || return 1
80 wait_for_clean || return 1
81 }
82
83 function delete_erasure_coded_pool() {
84 local poolname=$1
85 ceph osd pool delete $poolname $poolname --yes-i-really-really-mean-it
86 ceph osd erasure-code-profile rm myprofile
87 }
88
89 function rados_put() {
90 local dir=$1
91 local poolname=$2
92 local objname=${3:-SOMETHING}
93
94 for marker in AAA BBB CCCC DDDD ; do
95 printf "%*s" 1024 $marker
96 done > $dir/ORIGINAL
97 #
98 # get and put an object, compare they are equal
99 #
100 rados --pool $poolname put $objname $dir/ORIGINAL || return 1
101 }
102
103 function rados_get() {
104 local dir=$1
105 local poolname=$2
106 local objname=${3:-SOMETHING}
107 local expect=${4:-ok}
108
109 #
110 # Expect a failure to get object
111 #
112 if [ $expect = "fail" ];
113 then
114 ! rados --pool $poolname get $objname $dir/COPY
115 return
116 fi
117 #
118 # get an object, compare with $dir/ORIGINAL
119 #
120 rados --pool $poolname get $objname $dir/COPY || return 1
121 diff $dir/ORIGINAL $dir/COPY || return 1
122 rm $dir/COPY
123 }
124
125
126 function inject_remove() {
127 local pooltype=$1
128 shift
129 local which=$1
130 shift
131 local poolname=$1
132 shift
133 local objname=$1
134 shift
135 local dir=$1
136 shift
137 local shard_id=$1
138 shift
139
140 local -a initial_osds=($(get_osds $poolname $objname))
141 local osd_id=${initial_osds[$shard_id]}
142 objectstore_tool $dir $osd_id $objname remove || return 1
143 }
144
145 # Test with an inject error
146 function rados_put_get_data() {
147 local inject=$1
148 shift
149 local dir=$1
150 shift
151 local shard_id=$1
152 shift
153 local arg=$1
154
155 # inject eio to speificied shard
156 #
157 local poolname=pool-jerasure
158 local objname=obj-$inject-$$-$shard_id
159 rados_put $dir $poolname $objname || return 1
160 inject_$inject ec data $poolname $objname $dir $shard_id || return 1
161 rados_get $dir $poolname $objname || return 1
162
163 if [ "$arg" = "recovery" ];
164 then
165 #
166 # take out the last OSD used to store the object,
167 # bring it back, and check for clean PGs which means
168 # recovery didn't crash the primary.
169 #
170 local -a initial_osds=($(get_osds $poolname $objname))
171 local last_osd=${initial_osds[-1]}
172 # Kill OSD
173 kill_daemons $dir TERM osd.${last_osd} >&2 < /dev/null || return 1
174 ceph osd out ${last_osd} || return 1
175 ! get_osds $poolname $objname | grep '\<'${last_osd}'\>' || return 1
176 ceph osd in ${last_osd} || return 1
177 activate_osd $dir ${last_osd} || return 1
178 wait_for_clean || return 1
179 # Won't check for eio on get here -- recovery above might have fixed it
180 else
181 shard_id=$(expr $shard_id + 1)
182 inject_$inject ec data $poolname $objname $dir $shard_id || return 1
183 rados_get $dir $poolname $objname fail || return 1
184 rm $dir/ORIGINAL
185 fi
186
187 }
188
189 # Change the size of speificied shard
190 #
191 function set_size() {
192 local objname=$1
193 shift
194 local dir=$1
195 shift
196 local shard_id=$1
197 shift
198 local bytes=$1
199 shift
200 local mode=${1}
201
202 local poolname=pool-jerasure
203 local -a initial_osds=($(get_osds $poolname $objname))
204 local osd_id=${initial_osds[$shard_id]}
205 ceph osd set noout
206 if [ "$mode" = "add" ];
207 then
208 objectstore_tool $dir $osd_id $objname get-bytes $dir/CORRUPT || return 1
209 dd if=/dev/urandom bs=$bytes count=1 >> $dir/CORRUPT
210 elif [ "$bytes" = "0" ];
211 then
212 touch $dir/CORRUPT
213 else
214 dd if=/dev/urandom bs=$bytes count=1 of=$dir/CORRUPT
215 fi
216 objectstore_tool $dir $osd_id $objname set-bytes $dir/CORRUPT || return 1
217 rm -f $dir/CORRUPT
218 ceph osd unset noout
219 }
220
221 function rados_get_data_bad_size() {
222 local dir=$1
223 shift
224 local shard_id=$1
225 shift
226 local bytes=$1
227 shift
228 local mode=${1:-set}
229
230 local poolname=pool-jerasure
231 local objname=obj-size-$$-$shard_id-$bytes
232 rados_put $dir $poolname $objname || return 1
233
234 # Change the size of speificied shard
235 #
236 set_size $objname $dir $shard_id $bytes $mode || return 1
237
238 rados_get $dir $poolname $objname || return 1
239
240 # Leave objname and modify another shard
241 shard_id=$(expr $shard_id + 1)
242 set_size $objname $dir $shard_id $bytes $mode || return 1
243 rados_get $dir $poolname $objname fail || return 1
244 rm $dir/ORIGINAL
245 }
246
247 #
248 # These two test cases try to validate the following behavior:
249 # For object on EC pool, if there is one shard having read error (
250 # either primary or replica), client can still read object.
251 #
252 # If 2 shards have read errors the client will get an error.
253 #
254 function TEST_rados_get_subread_eio_shard_0() {
255 local dir=$1
256 setup_osds 4 || return 1
257
258 local poolname=pool-jerasure
259 create_erasure_coded_pool $poolname 2 1 || return 1
260 # inject eio on primary OSD (0) and replica OSD (1)
261 local shard_id=0
262 rados_put_get_data eio $dir $shard_id || return 1
263 delete_erasure_coded_pool $poolname
264 }
265
266 function TEST_rados_get_subread_eio_shard_1() {
267 local dir=$1
268 setup_osds 4 || return 1
269
270 local poolname=pool-jerasure
271 create_erasure_coded_pool $poolname 2 1 || return 1
272 # inject eio into replicas OSD (1) and OSD (2)
273 local shard_id=1
274 rados_put_get_data eio $dir $shard_id || return 1
275 delete_erasure_coded_pool $poolname
276 }
277
278 # We don't remove the object from the primary because
279 # that just causes it to appear to be missing
280
281 function TEST_rados_get_subread_missing() {
282 local dir=$1
283 setup_osds 4 || return 1
284
285 local poolname=pool-jerasure
286 create_erasure_coded_pool $poolname 2 1 || return 1
287 # inject remove into replicas OSD (1) and OSD (2)
288 local shard_id=1
289 rados_put_get_data remove $dir $shard_id || return 1
290 delete_erasure_coded_pool $poolname
291 }
292
293 #
294 #
295 # These two test cases try to validate that following behavior:
296 # For object on EC pool, if there is one shard which an incorrect
297 # size this will cause an internal read error, client can still read object.
298 #
299 # If 2 shards have incorrect size the client will get an error.
300 #
301 function TEST_rados_get_bad_size_shard_0() {
302 local dir=$1
303 setup_osds 4 || return 1
304
305 local poolname=pool-jerasure
306 create_erasure_coded_pool $poolname 2 1 || return 1
307 # Set incorrect size into primary OSD (0) and replica OSD (1)
308 local shard_id=0
309 rados_get_data_bad_size $dir $shard_id 10 || return 1
310 rados_get_data_bad_size $dir $shard_id 0 || return 1
311 rados_get_data_bad_size $dir $shard_id 256 add || return 1
312 delete_erasure_coded_pool $poolname
313 }
314
315 function TEST_rados_get_bad_size_shard_1() {
316 local dir=$1
317 setup_osds 4 || return 1
318
319 local poolname=pool-jerasure
320 create_erasure_coded_pool $poolname 2 1 || return 1
321 # Set incorrect size into replicas OSD (1) and OSD (2)
322 local shard_id=1
323 rados_get_data_bad_size $dir $shard_id 10 || return 1
324 rados_get_data_bad_size $dir $shard_id 0 || return 1
325 rados_get_data_bad_size $dir $shard_id 256 add || return 1
326 delete_erasure_coded_pool $poolname
327 }
328
329 function TEST_rados_get_with_subreadall_eio_shard_0() {
330 local dir=$1
331 local shard_id=0
332
333 setup_osds 4 || return 1
334
335 local poolname=pool-jerasure
336 create_erasure_coded_pool $poolname 2 1 || return 1
337 # inject eio on primary OSD (0)
338 rados_put_get_data eio $dir $shard_id recovery || return 1
339
340 delete_erasure_coded_pool $poolname
341 }
342
343 function TEST_rados_get_with_subreadall_eio_shard_1() {
344 local dir=$1
345 local shard_id=1
346
347 setup_osds 4 || return 1
348
349 local poolname=pool-jerasure
350 create_erasure_coded_pool $poolname 2 1 || return 1
351 # inject eio on replica OSD (1)
352 rados_put_get_data eio $dir $shard_id recovery || return 1
353
354 delete_erasure_coded_pool $poolname
355 }
356
357 # Test recovery the object attr read error
358 function TEST_ec_object_attr_read_error() {
359 local dir=$1
360 local objname=myobject
361
362 setup_osds 7 || return 1
363
364 local poolname=pool-jerasure
365 create_erasure_coded_pool $poolname 3 2 || return 1
366
367 local primary_osd=$(get_primary $poolname $objname)
368 # Kill primary OSD
369 kill_daemons $dir TERM osd.${primary_osd} >&2 < /dev/null || return 1
370
371 # Write data
372 rados_put $dir $poolname $objname || return 1
373
374 # Inject eio, shard 1 is the one read attr
375 inject_eio ec mdata $poolname $objname $dir 1 || return 1
376
377 # Restart OSD
378 activate_osd $dir ${primary_osd} || return 1
379
380 # Cluster should recover this object
381 wait_for_clean || return 1
382
383 rados_get $dir $poolname myobject || return 1
384
385 delete_erasure_coded_pool $poolname
386 }
387
388 # Test recovery the first k copies aren't all available
389 function TEST_ec_single_recovery_error() {
390 local dir=$1
391 local objname=myobject
392
393 setup_osds 7 || return 1
394
395 local poolname=pool-jerasure
396 create_erasure_coded_pool $poolname 3 2 || return 1
397
398 rados_put $dir $poolname $objname || return 1
399 inject_eio ec data $poolname $objname $dir 0 || return 1
400
401 local -a initial_osds=($(get_osds $poolname $objname))
402 local last_osd=${initial_osds[-1]}
403 # Kill OSD
404 kill_daemons $dir TERM osd.${last_osd} >&2 < /dev/null || return 1
405 ceph osd down ${last_osd} || return 1
406 ceph osd out ${last_osd} || return 1
407
408 # Cluster should recover this object
409 wait_for_clean || return 1
410
411 rados_get $dir $poolname myobject || return 1
412
413 delete_erasure_coded_pool $poolname
414 }
415
416 # Test recovery when repeated reads are needed due to EIO
417 function TEST_ec_recovery_multiple_errors() {
418 local dir=$1
419 local objname=myobject
420
421 setup_osds 9 || return 1
422
423 local poolname=pool-jerasure
424 create_erasure_coded_pool $poolname 4 4 || return 1
425
426 rados_put $dir $poolname $objname || return 1
427 inject_eio ec data $poolname $objname $dir 0 || return 1
428 # first read will try shards 0,1,2 when 0 gets EIO, shard 3 gets
429 # tried as well. Make that fail to test multiple-EIO handling.
430 inject_eio ec data $poolname $objname $dir 3 || return 1
431 inject_eio ec data $poolname $objname $dir 4 || return 1
432
433 local -a initial_osds=($(get_osds $poolname $objname))
434 local last_osd=${initial_osds[-1]}
435 # Kill OSD
436 kill_daemons $dir TERM osd.${last_osd} >&2 < /dev/null || return 1
437 ceph osd down ${last_osd} || return 1
438 ceph osd out ${last_osd} || return 1
439
440 # Cluster should recover this object
441 wait_for_clean || return 1
442
443 rados_get $dir $poolname myobject || return 1
444
445 delete_erasure_coded_pool $poolname
446 }
447
448 # Test recovery when there's only one shard to recover, but multiple
449 # objects recovering in one RecoveryOp
450 function TEST_ec_recovery_multiple_objects() {
451 local dir=$1
452 local objname=myobject
453
454 ORIG_ARGS=$CEPH_ARGS
455 CEPH_ARGS+=' --osd-recovery-max-single-start 3 --osd-recovery-max-active 3 '
456 setup_osds 7 || return 1
457 CEPH_ARGS=$ORIG_ARGS
458
459 local poolname=pool-jerasure
460 create_erasure_coded_pool $poolname 3 2 || return 1
461
462 rados_put $dir $poolname test1
463 rados_put $dir $poolname test2
464 rados_put $dir $poolname test3
465
466 ceph osd out 0 || return 1
467
468 # Cluster should recover these objects all at once
469 wait_for_clean || return 1
470
471 rados_get $dir $poolname test1
472 rados_get $dir $poolname test2
473 rados_get $dir $poolname test3
474
475 delete_erasure_coded_pool $poolname
476 }
477
478 # test multi-object recovery when the one missing shard gets EIO
479 function TEST_ec_recovery_multiple_objects_eio() {
480 local dir=$1
481 local objname=myobject
482
483 ORIG_ARGS=$CEPH_ARGS
484 CEPH_ARGS+=' --osd-recovery-max-single-start 3 --osd-recovery-max-active 3 '
485 setup_osds 7 || return 1
486 CEPH_ARGS=$ORIG_ARGS
487
488 local poolname=pool-jerasure
489 create_erasure_coded_pool $poolname 3 2 || return 1
490
491 rados_put $dir $poolname test1
492 rados_put $dir $poolname test2
493 rados_put $dir $poolname test3
494
495 # can't read from this shard anymore
496 inject_eio ec data $poolname $objname $dir 0 || return 1
497 ceph osd out 0 || return 1
498
499 # Cluster should recover these objects all at once
500 wait_for_clean || return 1
501
502 rados_get $dir $poolname test1
503 rados_get $dir $poolname test2
504 rados_get $dir $poolname test3
505
506 delete_erasure_coded_pool $poolname
507 }
508
509 # Test backfill with unfound object
510 function TEST_ec_backfill_unfound() {
511 local dir=$1
512 local objname=myobject
513 local lastobj=300
514 # Must be between 1 and $lastobj
515 local testobj=obj250
516
517 ORIG_ARGS=$CEPH_ARGS
518 CEPH_ARGS+=' --osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10'
519 setup_osds 5 || return 1
520 CEPH_ARGS=$ORIG_ARGS
521
522 local poolname=pool-jerasure
523 create_erasure_coded_pool $poolname 3 2 || return 1
524
525 ceph pg dump pgs
526
527 rados_put $dir $poolname $objname || return 1
528 local primary=$(get_primary $poolname $objname)
529
530 local -a initial_osds=($(get_osds $poolname $objname))
531 local last_osd=${initial_osds[-1]}
532 kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1
533 ceph osd down ${last_osd} || return 1
534 ceph osd out ${last_osd} || return 1
535
536 ceph pg dump pgs
537
538 dd if=/dev/urandom of=${dir}/ORIGINAL bs=1024 count=4
539 for i in $(seq 1 $lastobj)
540 do
541 rados --pool $poolname put obj${i} $dir/ORIGINAL || return 1
542 done
543
544 inject_eio ec data $poolname $testobj $dir 0 || return 1
545 inject_eio ec data $poolname $testobj $dir 1 || return 1
546
547 activate_osd $dir ${last_osd} || return 1
548 ceph osd in ${last_osd} || return 1
549
550 sleep 15
551
552 for tmp in $(seq 1 240); do
553 state=$(get_state 2.0)
554 echo $state | grep backfill_unfound
555 if [ "$?" = "0" ]; then
556 break
557 fi
558 echo $state
559 sleep 1
560 done
561
562 ceph pg dump pgs
563 kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1
564 sleep 5
565
566 ceph pg dump pgs
567 ceph pg 2.0 list_unfound
568 ceph pg 2.0 query
569
570 ceph pg 2.0 list_unfound | grep -q $testobj || return 1
571
572 check=$(ceph pg 2.0 list_unfound | jq ".available_might_have_unfound")
573 test "$check" == "true" || return 1
574
575 eval check=$(ceph pg 2.0 list_unfound | jq .might_have_unfound[0].status)
576 test "$check" == "osd is down" || return 1
577
578 eval check=$(ceph pg 2.0 list_unfound | jq .might_have_unfound[0].osd)
579 test "$check" == "2(4)" || return 1
580
581 activate_osd $dir ${last_osd} || return 1
582
583 # Command should hang because object is unfound
584 timeout 5 rados -p $poolname get $testobj $dir/CHECK
585 test $? = "124" || return 1
586
587 ceph pg 2.0 mark_unfound_lost delete
588
589 wait_for_clean || return 1
590
591 for i in $(seq 1 $lastobj)
592 do
593 if [ obj${i} = "$testobj" ]; then
594 # Doesn't exist anymore
595 ! rados -p $poolname get $testobj $dir/CHECK || return 1
596 else
597 rados --pool $poolname get obj${i} $dir/CHECK || return 1
598 diff -q $dir/ORIGINAL $dir/CHECK || return 1
599 fi
600 done
601
602 rm -f ${dir}/ORIGINAL ${dir}/CHECK
603
604 delete_erasure_coded_pool $poolname
605 }
606
607 # Test recovery with unfound object
608 function TEST_ec_recovery_unfound() {
609 local dir=$1
610 local objname=myobject
611 local lastobj=100
612 # Must be between 1 and $lastobj
613 local testobj=obj75
614
615 ORIG_ARGS=$CEPH_ARGS
616 CEPH_ARGS+=' --osd-recovery-max-single-start 3 --osd-recovery-max-active 3 '
617 CEPH_ARGS+=' --osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10'
618 setup_osds 5 || return 1
619 CEPH_ARGS=$ORIG_ARGS
620
621 local poolname=pool-jerasure
622 create_erasure_coded_pool $poolname 3 2 || return 1
623
624 ceph pg dump pgs
625
626 rados_put $dir $poolname $objname || return 1
627
628 local -a initial_osds=($(get_osds $poolname $objname))
629 local last_osd=${initial_osds[-1]}
630 kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1
631 ceph osd down ${last_osd} || return 1
632 ceph osd out ${last_osd} || return 1
633
634 ceph pg dump pgs
635
636 dd if=/dev/urandom of=${dir}/ORIGINAL bs=1024 count=4
637 for i in $(seq 1 $lastobj)
638 do
639 rados --pool $poolname put obj${i} $dir/ORIGINAL || return 1
640 done
641
642 inject_eio ec data $poolname $testobj $dir 0 || return 1
643 inject_eio ec data $poolname $testobj $dir 1 || return 1
644
645 activate_osd $dir ${last_osd} || return 1
646 ceph osd in ${last_osd} || return 1
647
648 sleep 15
649
650 for tmp in $(seq 1 100); do
651 state=$(get_state 2.0)
652 echo $state | grep recovery_unfound
653 if [ "$?" = "0" ]; then
654 break
655 fi
656 echo "$state "
657 sleep 1
658 done
659
660 ceph pg dump pgs
661 ceph pg 2.0 list_unfound
662 ceph pg 2.0 query
663
664 ceph pg 2.0 list_unfound | grep -q $testobj || return 1
665
666 check=$(ceph pg 2.0 list_unfound | jq ".available_might_have_unfound")
667 test "$check" == "true" || return 1
668
669 check=$(ceph pg 2.0 list_unfound | jq ".might_have_unfound | length")
670 test $check == 0 || return 1
671
672 # Command should hang because object is unfound
673 timeout 5 rados -p $poolname get $testobj $dir/CHECK
674 test $? = "124" || return 1
675
676 ceph pg 2.0 mark_unfound_lost delete
677
678 wait_for_clean || return 1
679
680 for i in $(seq 1 $lastobj)
681 do
682 if [ obj${i} = "$testobj" ]; then
683 # Doesn't exist anymore
684 ! rados -p $poolname get $testobj $dir/CHECK || return 1
685 else
686 rados --pool $poolname get obj${i} $dir/CHECK || return 1
687 diff -q $dir/ORIGINAL $dir/CHECK || return 1
688 fi
689 done
690
691 rm -f ${dir}/ORIGINAL ${dir}/CHECK
692
693 delete_erasure_coded_pool $poolname
694 }
695
696 main test-erasure-eio "$@"
697
698 # Local Variables:
699 # compile-command: "cd ../.. ; make -j4 && test/erasure-code/test-erasure-eio.sh"
700 # End: