]> git.proxmox.com Git - ceph.git/blob - ceph/qa/standalone/erasure-code/test-erasure-eio.sh
import 15.2.0 Octopus source
[ceph.git] / ceph / qa / standalone / erasure-code / test-erasure-eio.sh
1 #!/usr/bin/env bash
2 #
3 # Copyright (C) 2015 Red Hat <contact@redhat.com>
4 #
5 #
6 # Author: Kefu Chai <kchai@redhat.com>
7 #
8 # This program is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU Library Public License as published by
10 # the Free Software Foundation; either version 2, or (at your option)
11 # any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU Library Public License for more details.
17 #
18
19 source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
20
21 function run() {
22 local dir=$1
23 shift
24
25 export CEPH_MON="127.0.0.1:7112" # git grep '\<7112\>' : there must be only one
26 export CEPH_ARGS
27 CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
28 CEPH_ARGS+="--mon-host=$CEPH_MON "
29
30 local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
31 for func in $funcs ; do
32 setup $dir || return 1
33 run_mon $dir a || return 1
34 run_mgr $dir x || return 1
35 create_pool rbd 4 || return 1
36
37 # check that erasure code plugins are preloaded
38 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1
39 grep 'load: jerasure.*lrc' $dir/mon.a.log || return 1
40 $func $dir || return 1
41 teardown $dir || return 1
42 done
43 }
44
45 function setup_osds() {
46 local count=$1
47 shift
48
49 for id in $(seq 0 $(expr $count - 1)) ; do
50 run_osd $dir $id || return 1
51 done
52
53 # check that erasure code plugins are preloaded
54 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) log flush || return 1
55 grep 'load: jerasure.*lrc' $dir/osd.0.log || return 1
56 }
57
58 function get_state() {
59 local pgid=$1
60 local sname=state
61 ceph --format json pg dump pgs 2>/dev/null | \
62 jq -r ".pg_stats | .[] | select(.pgid==\"$pgid\") | .$sname"
63 }
64
65 function create_erasure_coded_pool() {
66 local poolname=$1
67 shift
68 local k=$1
69 shift
70 local m=$1
71 shift
72
73 ceph osd erasure-code-profile set myprofile \
74 plugin=jerasure \
75 k=$k m=$m \
76 crush-failure-domain=osd || return 1
77 create_pool $poolname 1 1 erasure myprofile \
78 || return 1
79 wait_for_clean || return 1
80 }
81
82 function delete_erasure_coded_pool() {
83 local poolname=$1
84 ceph osd pool delete $poolname $poolname --yes-i-really-really-mean-it
85 ceph osd erasure-code-profile rm myprofile
86 }
87
88 function rados_put() {
89 local dir=$1
90 local poolname=$2
91 local objname=${3:-SOMETHING}
92
93 for marker in AAA BBB CCCC DDDD ; do
94 printf "%*s" 1024 $marker
95 done > $dir/ORIGINAL
96 #
97 # get and put an object, compare they are equal
98 #
99 rados --pool $poolname put $objname $dir/ORIGINAL || return 1
100 }
101
102 function rados_get() {
103 local dir=$1
104 local poolname=$2
105 local objname=${3:-SOMETHING}
106 local expect=${4:-ok}
107
108 #
109 # Expect a failure to get object
110 #
111 if [ $expect = "fail" ];
112 then
113 ! rados --pool $poolname get $objname $dir/COPY
114 return
115 fi
116 #
117 # get an object, compare with $dir/ORIGINAL
118 #
119 rados --pool $poolname get $objname $dir/COPY || return 1
120 diff $dir/ORIGINAL $dir/COPY || return 1
121 rm $dir/COPY
122 }
123
124
125 function inject_remove() {
126 local pooltype=$1
127 shift
128 local which=$1
129 shift
130 local poolname=$1
131 shift
132 local objname=$1
133 shift
134 local dir=$1
135 shift
136 local shard_id=$1
137 shift
138
139 local -a initial_osds=($(get_osds $poolname $objname))
140 local osd_id=${initial_osds[$shard_id]}
141 objectstore_tool $dir $osd_id $objname remove || return 1
142 }
143
144 # Test with an inject error
145 function rados_put_get_data() {
146 local inject=$1
147 shift
148 local dir=$1
149 shift
150 local shard_id=$1
151 shift
152 local arg=$1
153
154 # inject eio to speificied shard
155 #
156 local poolname=pool-jerasure
157 local objname=obj-$inject-$$-$shard_id
158 rados_put $dir $poolname $objname || return 1
159 inject_$inject ec data $poolname $objname $dir $shard_id || return 1
160 rados_get $dir $poolname $objname || return 1
161
162 if [ "$arg" = "recovery" ];
163 then
164 #
165 # take out the last OSD used to store the object,
166 # bring it back, and check for clean PGs which means
167 # recovery didn't crash the primary.
168 #
169 local -a initial_osds=($(get_osds $poolname $objname))
170 local last_osd=${initial_osds[-1]}
171 # Kill OSD
172 kill_daemons $dir TERM osd.${last_osd} >&2 < /dev/null || return 1
173 ceph osd out ${last_osd} || return 1
174 ! get_osds $poolname $objname | grep '\<'${last_osd}'\>' || return 1
175 ceph osd in ${last_osd} || return 1
176 activate_osd $dir ${last_osd} || return 1
177 wait_for_clean || return 1
178 # Won't check for eio on get here -- recovery above might have fixed it
179 else
180 shard_id=$(expr $shard_id + 1)
181 inject_$inject ec data $poolname $objname $dir $shard_id || return 1
182 rados_get $dir $poolname $objname fail || return 1
183 rm $dir/ORIGINAL
184 fi
185
186 }
187
188 # Change the size of speificied shard
189 #
190 function set_size() {
191 local objname=$1
192 shift
193 local dir=$1
194 shift
195 local shard_id=$1
196 shift
197 local bytes=$1
198 shift
199 local mode=${1}
200
201 local poolname=pool-jerasure
202 local -a initial_osds=($(get_osds $poolname $objname))
203 local osd_id=${initial_osds[$shard_id]}
204 ceph osd set noout
205 if [ "$mode" = "add" ];
206 then
207 objectstore_tool $dir $osd_id $objname get-bytes $dir/CORRUPT || return 1
208 dd if=/dev/urandom bs=$bytes count=1 >> $dir/CORRUPT
209 elif [ "$bytes" = "0" ];
210 then
211 touch $dir/CORRUPT
212 else
213 dd if=/dev/urandom bs=$bytes count=1 of=$dir/CORRUPT
214 fi
215 objectstore_tool $dir $osd_id $objname set-bytes $dir/CORRUPT || return 1
216 rm -f $dir/CORRUPT
217 ceph osd unset noout
218 }
219
220 function rados_get_data_bad_size() {
221 local dir=$1
222 shift
223 local shard_id=$1
224 shift
225 local bytes=$1
226 shift
227 local mode=${1:-set}
228
229 local poolname=pool-jerasure
230 local objname=obj-size-$$-$shard_id-$bytes
231 rados_put $dir $poolname $objname || return 1
232
233 # Change the size of speificied shard
234 #
235 set_size $objname $dir $shard_id $bytes $mode || return 1
236
237 rados_get $dir $poolname $objname || return 1
238
239 # Leave objname and modify another shard
240 shard_id=$(expr $shard_id + 1)
241 set_size $objname $dir $shard_id $bytes $mode || return 1
242 rados_get $dir $poolname $objname fail || return 1
243 rm $dir/ORIGINAL
244 }
245
246 #
247 # These two test cases try to validate the following behavior:
248 # For object on EC pool, if there is one shard having read error (
249 # either primary or replica), client can still read object.
250 #
251 # If 2 shards have read errors the client will get an error.
252 #
253 function TEST_rados_get_subread_eio_shard_0() {
254 local dir=$1
255 setup_osds 4 || return 1
256
257 local poolname=pool-jerasure
258 create_erasure_coded_pool $poolname 2 1 || return 1
259 # inject eio on primary OSD (0) and replica OSD (1)
260 local shard_id=0
261 rados_put_get_data eio $dir $shard_id || return 1
262 delete_erasure_coded_pool $poolname
263 }
264
265 function TEST_rados_get_subread_eio_shard_1() {
266 local dir=$1
267 setup_osds 4 || return 1
268
269 local poolname=pool-jerasure
270 create_erasure_coded_pool $poolname 2 1 || return 1
271 # inject eio into replicas OSD (1) and OSD (2)
272 local shard_id=1
273 rados_put_get_data eio $dir $shard_id || return 1
274 delete_erasure_coded_pool $poolname
275 }
276
277 # We don't remove the object from the primary because
278 # that just causes it to appear to be missing
279
280 function TEST_rados_get_subread_missing() {
281 local dir=$1
282 setup_osds 4 || return 1
283
284 local poolname=pool-jerasure
285 create_erasure_coded_pool $poolname 2 1 || return 1
286 # inject remove into replicas OSD (1) and OSD (2)
287 local shard_id=1
288 rados_put_get_data remove $dir $shard_id || return 1
289 delete_erasure_coded_pool $poolname
290 }
291
292 #
293 #
294 # These two test cases try to validate that following behavior:
295 # For object on EC pool, if there is one shard which an incorrect
296 # size this will cause an internal read error, client can still read object.
297 #
298 # If 2 shards have incorrect size the client will get an error.
299 #
300 function TEST_rados_get_bad_size_shard_0() {
301 local dir=$1
302 setup_osds 4 || return 1
303
304 local poolname=pool-jerasure
305 create_erasure_coded_pool $poolname 2 1 || return 1
306 # Set incorrect size into primary OSD (0) and replica OSD (1)
307 local shard_id=0
308 rados_get_data_bad_size $dir $shard_id 10 || return 1
309 rados_get_data_bad_size $dir $shard_id 0 || return 1
310 rados_get_data_bad_size $dir $shard_id 256 add || return 1
311 delete_erasure_coded_pool $poolname
312 }
313
314 function TEST_rados_get_bad_size_shard_1() {
315 local dir=$1
316 setup_osds 4 || return 1
317
318 local poolname=pool-jerasure
319 create_erasure_coded_pool $poolname 2 1 || return 1
320 # Set incorrect size into replicas OSD (1) and OSD (2)
321 local shard_id=1
322 rados_get_data_bad_size $dir $shard_id 10 || return 1
323 rados_get_data_bad_size $dir $shard_id 0 || return 1
324 rados_get_data_bad_size $dir $shard_id 256 add || return 1
325 delete_erasure_coded_pool $poolname
326 }
327
328 function TEST_rados_get_with_subreadall_eio_shard_0() {
329 local dir=$1
330 local shard_id=0
331
332 setup_osds 4 || return 1
333
334 local poolname=pool-jerasure
335 create_erasure_coded_pool $poolname 2 1 || return 1
336 # inject eio on primary OSD (0)
337 rados_put_get_data eio $dir $shard_id recovery || return 1
338
339 delete_erasure_coded_pool $poolname
340 }
341
342 function TEST_rados_get_with_subreadall_eio_shard_1() {
343 local dir=$1
344 local shard_id=1
345
346 setup_osds 4 || return 1
347
348 local poolname=pool-jerasure
349 create_erasure_coded_pool $poolname 2 1 || return 1
350 # inject eio on replica OSD (1)
351 rados_put_get_data eio $dir $shard_id recovery || return 1
352
353 delete_erasure_coded_pool $poolname
354 }
355
356 # Test recovery the object attr read error
357 function TEST_ec_object_attr_read_error() {
358 local dir=$1
359 local objname=myobject
360
361 setup_osds 7 || return 1
362
363 local poolname=pool-jerasure
364 create_erasure_coded_pool $poolname 3 2 || return 1
365
366 local primary_osd=$(get_primary $poolname $objname)
367 # Kill primary OSD
368 kill_daemons $dir TERM osd.${primary_osd} >&2 < /dev/null || return 1
369
370 # Write data
371 rados_put $dir $poolname $objname || return 1
372
373 # Inject eio, shard 1 is the one read attr
374 inject_eio ec mdata $poolname $objname $dir 1 || return 1
375
376 # Restart OSD
377 activate_osd $dir ${primary_osd} || return 1
378
379 # Cluster should recover this object
380 wait_for_clean || return 1
381
382 rados_get $dir $poolname myobject || return 1
383
384 delete_erasure_coded_pool $poolname
385 }
386
387 # Test recovery the first k copies aren't all available
388 function TEST_ec_single_recovery_error() {
389 local dir=$1
390 local objname=myobject
391
392 setup_osds 7 || return 1
393
394 local poolname=pool-jerasure
395 create_erasure_coded_pool $poolname 3 2 || return 1
396
397 rados_put $dir $poolname $objname || return 1
398 inject_eio ec data $poolname $objname $dir 0 || return 1
399
400 local -a initial_osds=($(get_osds $poolname $objname))
401 local last_osd=${initial_osds[-1]}
402 # Kill OSD
403 kill_daemons $dir TERM osd.${last_osd} >&2 < /dev/null || return 1
404 ceph osd down ${last_osd} || return 1
405 ceph osd out ${last_osd} || return 1
406
407 # Cluster should recover this object
408 wait_for_clean || return 1
409
410 rados_get $dir $poolname myobject || return 1
411
412 delete_erasure_coded_pool $poolname
413 }
414
415 # Test recovery when repeated reads are needed due to EIO
416 function TEST_ec_recovery_multiple_errors() {
417 local dir=$1
418 local objname=myobject
419
420 setup_osds 9 || return 1
421
422 local poolname=pool-jerasure
423 create_erasure_coded_pool $poolname 4 4 || return 1
424
425 rados_put $dir $poolname $objname || return 1
426 inject_eio ec data $poolname $objname $dir 0 || return 1
427 # first read will try shards 0,1,2 when 0 gets EIO, shard 3 gets
428 # tried as well. Make that fail to test multiple-EIO handling.
429 inject_eio ec data $poolname $objname $dir 3 || return 1
430 inject_eio ec data $poolname $objname $dir 4 || return 1
431
432 local -a initial_osds=($(get_osds $poolname $objname))
433 local last_osd=${initial_osds[-1]}
434 # Kill OSD
435 kill_daemons $dir TERM osd.${last_osd} >&2 < /dev/null || return 1
436 ceph osd down ${last_osd} || return 1
437 ceph osd out ${last_osd} || return 1
438
439 # Cluster should recover this object
440 wait_for_clean || return 1
441
442 rados_get $dir $poolname myobject || return 1
443
444 delete_erasure_coded_pool $poolname
445 }
446
447 # Test recovery when there's only one shard to recover, but multiple
448 # objects recovering in one RecoveryOp
449 function TEST_ec_recovery_multiple_objects() {
450 local dir=$1
451 local objname=myobject
452
453 ORIG_ARGS=$CEPH_ARGS
454 CEPH_ARGS+=' --osd-recovery-max-single-start 3 --osd-recovery-max-active 3 '
455 setup_osds 7 || return 1
456 CEPH_ARGS=$ORIG_ARGS
457
458 local poolname=pool-jerasure
459 create_erasure_coded_pool $poolname 3 2 || return 1
460
461 rados_put $dir $poolname test1
462 rados_put $dir $poolname test2
463 rados_put $dir $poolname test3
464
465 ceph osd out 0 || return 1
466
467 # Cluster should recover these objects all at once
468 wait_for_clean || return 1
469
470 rados_get $dir $poolname test1
471 rados_get $dir $poolname test2
472 rados_get $dir $poolname test3
473
474 delete_erasure_coded_pool $poolname
475 }
476
477 # test multi-object recovery when the one missing shard gets EIO
478 function TEST_ec_recovery_multiple_objects_eio() {
479 local dir=$1
480 local objname=myobject
481
482 ORIG_ARGS=$CEPH_ARGS
483 CEPH_ARGS+=' --osd-recovery-max-single-start 3 --osd-recovery-max-active 3 '
484 setup_osds 7 || return 1
485 CEPH_ARGS=$ORIG_ARGS
486
487 local poolname=pool-jerasure
488 create_erasure_coded_pool $poolname 3 2 || return 1
489
490 rados_put $dir $poolname test1
491 rados_put $dir $poolname test2
492 rados_put $dir $poolname test3
493
494 # can't read from this shard anymore
495 inject_eio ec data $poolname $objname $dir 0 || return 1
496 ceph osd out 0 || return 1
497
498 # Cluster should recover these objects all at once
499 wait_for_clean || return 1
500
501 rados_get $dir $poolname test1
502 rados_get $dir $poolname test2
503 rados_get $dir $poolname test3
504
505 delete_erasure_coded_pool $poolname
506 }
507
508 # Test backfill with unfound object
509 function TEST_ec_backfill_unfound() {
510 local dir=$1
511 local objname=myobject
512 local lastobj=300
513 # Must be between 1 and $lastobj
514 local testobj=obj250
515
516 ORIG_ARGS=$CEPH_ARGS
517 CEPH_ARGS+=' --osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10'
518 setup_osds 5 || return 1
519 CEPH_ARGS=$ORIG_ARGS
520
521 local poolname=pool-jerasure
522 create_erasure_coded_pool $poolname 3 2 || return 1
523
524 ceph pg dump pgs
525
526 rados_put $dir $poolname $objname || return 1
527
528 local -a initial_osds=($(get_osds $poolname $objname))
529 local last_osd=${initial_osds[-1]}
530 kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1
531 ceph osd down ${last_osd} || return 1
532 ceph osd out ${last_osd} || return 1
533
534 ceph pg dump pgs
535
536 dd if=/dev/urandom of=${dir}/ORIGINAL bs=1024 count=4
537 for i in $(seq 1 $lastobj)
538 do
539 rados --pool $poolname put obj${i} $dir/ORIGINAL || return 1
540 done
541
542 inject_eio ec data $poolname $testobj $dir 0 || return 1
543 inject_eio ec data $poolname $testobj $dir 1 || return 1
544
545 activate_osd $dir ${last_osd} || return 1
546 ceph osd in ${last_osd} || return 1
547
548 sleep 15
549
550 for tmp in $(seq 1 100); do
551 state=$(get_state 2.0)
552 echo $state | grep backfill_unfound
553 if [ "$?" = "0" ]; then
554 break
555 fi
556 echo $state
557 sleep 1
558 done
559
560 ceph pg dump pgs
561 ceph pg 2.0 list_unfound | grep -q $testobj || return 1
562
563 # Command should hang because object is unfound
564 timeout 5 rados -p $poolname get $testobj $dir/CHECK
565 test $? = "124" || return 1
566
567 ceph pg 2.0 mark_unfound_lost delete
568
569 wait_for_clean || return 1
570
571 for i in $(seq 1 $lastobj)
572 do
573 if [ obj${i} = "$testobj" ]; then
574 # Doesn't exist anymore
575 ! rados -p $poolname get $testobj $dir/CHECK || return 1
576 else
577 rados --pool $poolname get obj${i} $dir/CHECK || return 1
578 diff -q $dir/ORIGINAL $dir/CHECK || return 1
579 fi
580 done
581
582 rm -f ${dir}/ORIGINAL ${dir}/CHECK
583
584 delete_erasure_coded_pool $poolname
585 }
586
587 # Test recovery with unfound object
588 function TEST_ec_recovery_unfound() {
589 local dir=$1
590 local objname=myobject
591 local lastobj=100
592 # Must be between 1 and $lastobj
593 local testobj=obj75
594
595 ORIG_ARGS=$CEPH_ARGS
596 CEPH_ARGS+=' --osd-recovery-max-single-start 3 --osd-recovery-max-active 3 '
597 CEPH_ARGS+=' --osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10'
598 setup_osds 5 || return 1
599 CEPH_ARGS=$ORIG_ARGS
600
601 local poolname=pool-jerasure
602 create_erasure_coded_pool $poolname 3 2 || return 1
603
604 ceph pg dump pgs
605
606 rados_put $dir $poolname $objname || return 1
607
608 local -a initial_osds=($(get_osds $poolname $objname))
609 local last_osd=${initial_osds[-1]}
610 kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1
611 ceph osd down ${last_osd} || return 1
612 ceph osd out ${last_osd} || return 1
613
614 ceph pg dump pgs
615
616 dd if=/dev/urandom of=${dir}/ORIGINAL bs=1024 count=4
617 for i in $(seq 1 $lastobj)
618 do
619 rados --pool $poolname put obj${i} $dir/ORIGINAL || return 1
620 done
621
622 inject_eio ec data $poolname $testobj $dir 0 || return 1
623 inject_eio ec data $poolname $testobj $dir 1 || return 1
624
625 activate_osd $dir ${last_osd} || return 1
626 ceph osd in ${last_osd} || return 1
627
628 sleep 15
629
630 for tmp in $(seq 1 100); do
631 state=$(get_state 2.0)
632 echo $state | grep recovery_unfound
633 if [ "$?" = "0" ]; then
634 break
635 fi
636 echo "$state "
637 sleep 1
638 done
639
640 ceph pg dump pgs
641 ceph pg 2.0 list_unfound | grep -q $testobj || return 1
642
643 # Command should hang because object is unfound
644 timeout 5 rados -p $poolname get $testobj $dir/CHECK
645 test $? = "124" || return 1
646
647 ceph pg 2.0 mark_unfound_lost delete
648
649 wait_for_clean || return 1
650
651 for i in $(seq 1 $lastobj)
652 do
653 if [ obj${i} = "$testobj" ]; then
654 # Doesn't exist anymore
655 ! rados -p $poolname get $testobj $dir/CHECK || return 1
656 else
657 rados --pool $poolname get obj${i} $dir/CHECK || return 1
658 diff -q $dir/ORIGINAL $dir/CHECK || return 1
659 fi
660 done
661
662 rm -f ${dir}/ORIGINAL ${dir}/CHECK
663
664 delete_erasure_coded_pool $poolname
665 }
666
667 main test-erasure-eio "$@"
668
669 # Local Variables:
670 # compile-command: "cd ../.. ; make -j4 && test/erasure-code/test-erasure-eio.sh"
671 # End: