]> git.proxmox.com Git - ceph.git/blob - ceph/qa/standalone/erasure-code/test-erasure-eio.sh
import ceph 14.2.5
[ceph.git] / ceph / qa / standalone / erasure-code / test-erasure-eio.sh
1 #!/usr/bin/env bash
2 #
3 # Copyright (C) 2015 Red Hat <contact@redhat.com>
4 #
5 #
6 # Author: Kefu Chai <kchai@redhat.com>
7 #
8 # This program is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU Library Public License as published by
10 # the Free Software Foundation; either version 2, or (at your option)
11 # any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU Library Public License for more details.
17 #
18
19 source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
20
21 function run() {
22 local dir=$1
23 shift
24
25 export CEPH_MON="127.0.0.1:7112" # git grep '\<7112\>' : there must be only one
26 export CEPH_ARGS
27 CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
28 CEPH_ARGS+="--mon-host=$CEPH_MON "
29
30 local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
31 for func in $funcs ; do
32 setup $dir || return 1
33 run_mon $dir a || return 1
34 run_mgr $dir x || return 1
35 create_pool rbd 4 || return 1
36
37 # check that erasure code plugins are preloaded
38 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1
39 grep 'load: jerasure.*lrc' $dir/mon.a.log || return 1
40 $func $dir || return 1
41 teardown $dir || return 1
42 done
43 }
44
45 function setup_osds() {
46 local count=$1
47 shift
48
49 for id in $(seq 0 $(expr $count - 1)) ; do
50 run_osd $dir $id || return 1
51 done
52
53 # check that erasure code plugins are preloaded
54 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) log flush || return 1
55 grep 'load: jerasure.*lrc' $dir/osd.0.log || return 1
56 }
57
58 function get_state() {
59 local pgid=$1
60 local sname=state
61 ceph --format json pg dump pgs 2>/dev/null | \
62 jq -r ".pg_stats | .[] | select(.pgid==\"$pgid\") | .$sname"
63 }
64
65 function create_erasure_coded_pool() {
66 local poolname=$1
67 shift
68 local k=$1
69 shift
70 local m=$1
71 shift
72
73 ceph osd erasure-code-profile set myprofile \
74 plugin=jerasure \
75 k=$k m=$m \
76 crush-failure-domain=osd || return 1
77 create_pool $poolname 1 1 erasure myprofile \
78 || return 1
79 wait_for_clean || return 1
80 }
81
82 function delete_erasure_coded_pool() {
83 local poolname=$1
84 ceph osd pool delete $poolname $poolname --yes-i-really-really-mean-it
85 ceph osd erasure-code-profile rm myprofile
86 }
87
88 function rados_put() {
89 local dir=$1
90 local poolname=$2
91 local objname=${3:-SOMETHING}
92
93 for marker in AAA BBB CCCC DDDD ; do
94 printf "%*s" 1024 $marker
95 done > $dir/ORIGINAL
96 #
97 # get and put an object, compare they are equal
98 #
99 rados --pool $poolname put $objname $dir/ORIGINAL || return 1
100 }
101
102 function rados_get() {
103 local dir=$1
104 local poolname=$2
105 local objname=${3:-SOMETHING}
106 local expect=${4:-ok}
107
108 #
109 # Expect a failure to get object
110 #
111 if [ $expect = "fail" ];
112 then
113 ! rados --pool $poolname get $objname $dir/COPY
114 return
115 fi
116 #
117 # get an object, compare with $dir/ORIGINAL
118 #
119 rados --pool $poolname get $objname $dir/COPY || return 1
120 diff $dir/ORIGINAL $dir/COPY || return 1
121 rm $dir/COPY
122 }
123
124
125 function inject_remove() {
126 local pooltype=$1
127 shift
128 local which=$1
129 shift
130 local poolname=$1
131 shift
132 local objname=$1
133 shift
134 local dir=$1
135 shift
136 local shard_id=$1
137 shift
138
139 local -a initial_osds=($(get_osds $poolname $objname))
140 local osd_id=${initial_osds[$shard_id]}
141 objectstore_tool $dir $osd_id $objname remove || return 1
142 }
143
144 # Test with an inject error
145 function rados_put_get_data() {
146 local inject=$1
147 shift
148 local dir=$1
149 shift
150 local shard_id=$1
151 shift
152 local arg=$1
153
154 # inject eio to speificied shard
155 #
156 local poolname=pool-jerasure
157 local objname=obj-$inject-$$-$shard_id
158 rados_put $dir $poolname $objname || return 1
159 inject_$inject ec data $poolname $objname $dir $shard_id || return 1
160 rados_get $dir $poolname $objname || return 1
161
162 if [ "$arg" = "recovery" ];
163 then
164 #
165 # take out the last OSD used to store the object,
166 # bring it back, and check for clean PGs which means
167 # recovery didn't crash the primary.
168 #
169 local -a initial_osds=($(get_osds $poolname $objname))
170 local last_osd=${initial_osds[-1]}
171 # Kill OSD
172 kill_daemons $dir TERM osd.${last_osd} >&2 < /dev/null || return 1
173 ceph osd out ${last_osd} || return 1
174 ! get_osds $poolname $objname | grep '\<'${last_osd}'\>' || return 1
175 ceph osd in ${last_osd} || return 1
176 run_osd $dir ${last_osd} || return 1
177 wait_for_clean || return 1
178 fi
179
180 shard_id=$(expr $shard_id + 1)
181 inject_$inject ec data $poolname $objname $dir $shard_id || return 1
182 # Now 2 out of 3 shards get an error, so should fail
183 rados_get $dir $poolname $objname fail || return 1
184 rm $dir/ORIGINAL
185 }
186
187 # Change the size of speificied shard
188 #
189 function set_size() {
190 local objname=$1
191 shift
192 local dir=$1
193 shift
194 local shard_id=$1
195 shift
196 local bytes=$1
197 shift
198 local mode=${1}
199
200 local poolname=pool-jerasure
201 local -a initial_osds=($(get_osds $poolname $objname))
202 local osd_id=${initial_osds[$shard_id]}
203 ceph osd set noout
204 if [ "$mode" = "add" ];
205 then
206 objectstore_tool $dir $osd_id $objname get-bytes $dir/CORRUPT || return 1
207 dd if=/dev/urandom bs=$bytes count=1 >> $dir/CORRUPT
208 elif [ "$bytes" = "0" ];
209 then
210 touch $dir/CORRUPT
211 else
212 dd if=/dev/urandom bs=$bytes count=1 of=$dir/CORRUPT
213 fi
214 objectstore_tool $dir $osd_id $objname set-bytes $dir/CORRUPT || return 1
215 rm -f $dir/CORRUPT
216 ceph osd unset noout
217 }
218
219 function rados_get_data_bad_size() {
220 local dir=$1
221 shift
222 local shard_id=$1
223 shift
224 local bytes=$1
225 shift
226 local mode=${1:-set}
227
228 local poolname=pool-jerasure
229 local objname=obj-size-$$-$shard_id-$bytes
230 rados_put $dir $poolname $objname || return 1
231
232 # Change the size of speificied shard
233 #
234 set_size $objname $dir $shard_id $bytes $mode || return 1
235
236 rados_get $dir $poolname $objname || return 1
237
238 # Leave objname and modify another shard
239 shard_id=$(expr $shard_id + 1)
240 set_size $objname $dir $shard_id $bytes $mode || return 1
241 rados_get $dir $poolname $objname fail || return 1
242 rm $dir/ORIGINAL
243 }
244
245 #
246 # These two test cases try to validate the following behavior:
247 # For object on EC pool, if there is one shard having read error (
248 # either primary or replica), client can still read object.
249 #
250 # If 2 shards have read errors the client will get an error.
251 #
252 function TEST_rados_get_subread_eio_shard_0() {
253 local dir=$1
254 setup_osds 4 || return 1
255
256 local poolname=pool-jerasure
257 create_erasure_coded_pool $poolname 2 1 || return 1
258 # inject eio on primary OSD (0) and replica OSD (1)
259 local shard_id=0
260 rados_put_get_data eio $dir $shard_id || return 1
261 delete_erasure_coded_pool $poolname
262 }
263
264 function TEST_rados_get_subread_eio_shard_1() {
265 local dir=$1
266 setup_osds 4 || return 1
267
268 local poolname=pool-jerasure
269 create_erasure_coded_pool $poolname 2 1 || return 1
270 # inject eio into replicas OSD (1) and OSD (2)
271 local shard_id=1
272 rados_put_get_data eio $dir $shard_id || return 1
273 delete_erasure_coded_pool $poolname
274 }
275
276 # We don't remove the object from the primary because
277 # that just causes it to appear to be missing
278
279 function TEST_rados_get_subread_missing() {
280 local dir=$1
281 setup_osds 4 || return 1
282
283 local poolname=pool-jerasure
284 create_erasure_coded_pool $poolname 2 1 || return 1
285 # inject remove into replicas OSD (1) and OSD (2)
286 local shard_id=1
287 rados_put_get_data remove $dir $shard_id || return 1
288 delete_erasure_coded_pool $poolname
289 }
290
291 #
292 #
293 # These two test cases try to validate that following behavior:
294 # For object on EC pool, if there is one shard which an incorrect
295 # size this will cause an internal read error, client can still read object.
296 #
297 # If 2 shards have incorrect size the client will get an error.
298 #
299 function TEST_rados_get_bad_size_shard_0() {
300 local dir=$1
301 setup_osds 4 || return 1
302
303 local poolname=pool-jerasure
304 create_erasure_coded_pool $poolname 2 1 || return 1
305 # Set incorrect size into primary OSD (0) and replica OSD (1)
306 local shard_id=0
307 rados_get_data_bad_size $dir $shard_id 10 || return 1
308 rados_get_data_bad_size $dir $shard_id 0 || return 1
309 rados_get_data_bad_size $dir $shard_id 256 add || return 1
310 delete_erasure_coded_pool $poolname
311 }
312
313 function TEST_rados_get_bad_size_shard_1() {
314 local dir=$1
315 setup_osds 4 || return 1
316
317 local poolname=pool-jerasure
318 create_erasure_coded_pool $poolname 2 1 || return 1
319 # Set incorrect size into replicas OSD (1) and OSD (2)
320 local shard_id=1
321 rados_get_data_bad_size $dir $shard_id 10 || return 1
322 rados_get_data_bad_size $dir $shard_id 0 || return 1
323 rados_get_data_bad_size $dir $shard_id 256 add || return 1
324 delete_erasure_coded_pool $poolname
325 }
326
327 function TEST_rados_get_with_subreadall_eio_shard_0() {
328 local dir=$1
329 local shard_id=0
330
331 setup_osds 4 || return 1
332
333 local poolname=pool-jerasure
334 create_erasure_coded_pool $poolname 2 1 || return 1
335 # inject eio on primary OSD (0)
336 rados_put_get_data eio $dir $shard_id recovery || return 1
337
338 delete_erasure_coded_pool $poolname
339 }
340
341 function TEST_rados_get_with_subreadall_eio_shard_1() {
342 local dir=$1
343 local shard_id=1
344
345 setup_osds 4 || return 1
346
347 local poolname=pool-jerasure
348 create_erasure_coded_pool $poolname 2 1 || return 1
349 # inject eio on replica OSD (1)
350 rados_put_get_data eio $dir $shard_id recovery || return 1
351
352 delete_erasure_coded_pool $poolname
353 }
354
355 # Test recovery the object attr read error
356 function TEST_ec_object_attr_read_error() {
357 local dir=$1
358 local objname=myobject
359
360 setup_osds 7 || return 1
361
362 local poolname=pool-jerasure
363 create_erasure_coded_pool $poolname 3 2 || return 1
364
365 local primary_osd=$(get_primary $poolname $objname)
366 # Kill primary OSD
367 kill_daemons $dir TERM osd.${primary_osd} >&2 < /dev/null || return 1
368
369 # Write data
370 rados_put $dir $poolname $objname || return 1
371
372 # Inject eio, shard 1 is the one read attr
373 inject_eio ec mdata $poolname $objname $dir 1 || return 1
374
375 # Restart OSD
376 run_osd $dir ${primary_osd} || return 1
377
378 # Cluster should recover this object
379 wait_for_clean || return 1
380
381 rados_get $dir $poolname myobject || return 1
382
383 delete_erasure_coded_pool $poolname
384 }
385
386 # Test recovery the first k copies aren't all available
387 function TEST_ec_single_recovery_error() {
388 local dir=$1
389 local objname=myobject
390
391 setup_osds 7 || return 1
392
393 local poolname=pool-jerasure
394 create_erasure_coded_pool $poolname 3 2 || return 1
395
396 rados_put $dir $poolname $objname || return 1
397 inject_eio ec data $poolname $objname $dir 0 || return 1
398
399 local -a initial_osds=($(get_osds $poolname $objname))
400 local last_osd=${initial_osds[-1]}
401 # Kill OSD
402 kill_daemons $dir TERM osd.${last_osd} >&2 < /dev/null || return 1
403 ceph osd down ${last_osd} || return 1
404 ceph osd out ${last_osd} || return 1
405
406 # Cluster should recover this object
407 wait_for_clean || return 1
408
409 rados_get $dir $poolname myobject || return 1
410
411 delete_erasure_coded_pool $poolname
412 }
413
414 # Test recovery when repeated reads are needed due to EIO
415 function TEST_ec_recovery_multiple_errors() {
416 local dir=$1
417 local objname=myobject
418
419 setup_osds 9 || return 1
420
421 local poolname=pool-jerasure
422 create_erasure_coded_pool $poolname 4 4 || return 1
423
424 rados_put $dir $poolname $objname || return 1
425 inject_eio ec data $poolname $objname $dir 0 || return 1
426 # first read will try shards 0,1,2 when 0 gets EIO, shard 3 gets
427 # tried as well. Make that fail to test multiple-EIO handling.
428 inject_eio ec data $poolname $objname $dir 3 || return 1
429 inject_eio ec data $poolname $objname $dir 4 || return 1
430
431 local -a initial_osds=($(get_osds $poolname $objname))
432 local last_osd=${initial_osds[-1]}
433 # Kill OSD
434 kill_daemons $dir TERM osd.${last_osd} >&2 < /dev/null || return 1
435 ceph osd down ${last_osd} || return 1
436 ceph osd out ${last_osd} || return 1
437
438 # Cluster should recover this object
439 wait_for_clean || return 1
440
441 rados_get $dir $poolname myobject || return 1
442
443 delete_erasure_coded_pool $poolname
444 }
445
446 # Test recovery when there's only one shard to recover, but multiple
447 # objects recovering in one RecoveryOp
448 function TEST_ec_recovery_multiple_objects() {
449 local dir=$1
450 local objname=myobject
451
452 ORIG_ARGS=$CEPH_ARGS
453 CEPH_ARGS+=' --osd-recovery-max-single-start 3 --osd-recovery-max-active 3 '
454 setup_osds 7 || return 1
455 CEPH_ARGS=$ORIG_ARGS
456
457 local poolname=pool-jerasure
458 create_erasure_coded_pool $poolname 3 2 || return 1
459
460 rados_put $dir $poolname test1
461 rados_put $dir $poolname test2
462 rados_put $dir $poolname test3
463
464 ceph osd out 0 || return 1
465
466 # Cluster should recover these objects all at once
467 wait_for_clean || return 1
468
469 rados_get $dir $poolname test1
470 rados_get $dir $poolname test2
471 rados_get $dir $poolname test3
472
473 delete_erasure_coded_pool $poolname
474 }
475
476 # test multi-object recovery when the one missing shard gets EIO
477 function TEST_ec_recovery_multiple_objects_eio() {
478 local dir=$1
479 local objname=myobject
480
481 ORIG_ARGS=$CEPH_ARGS
482 CEPH_ARGS+=' --osd-recovery-max-single-start 3 --osd-recovery-max-active 3 '
483 setup_osds 7 || return 1
484 CEPH_ARGS=$ORIG_ARGS
485
486 local poolname=pool-jerasure
487 create_erasure_coded_pool $poolname 3 2 || return 1
488
489 rados_put $dir $poolname test1
490 rados_put $dir $poolname test2
491 rados_put $dir $poolname test3
492
493 # can't read from this shard anymore
494 inject_eio ec data $poolname $objname $dir 0 || return 1
495 ceph osd out 0 || return 1
496
497 # Cluster should recover these objects all at once
498 wait_for_clean || return 1
499
500 rados_get $dir $poolname test1
501 rados_get $dir $poolname test2
502 rados_get $dir $poolname test3
503
504 delete_erasure_coded_pool $poolname
505 }
506
507 # Test backfill with unfound object
508 function TEST_ec_backfill_unfound() {
509 local dir=$1
510 local objname=myobject
511 local lastobj=300
512 # Must be between 1 and $lastobj
513 local testobj=obj250
514
515 ORIG_ARGS=$CEPH_ARGS
516 CEPH_ARGS+=' --osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10'
517 setup_osds 5 || return 1
518 CEPH_ARGS=$ORIG_ARGS
519
520 local poolname=pool-jerasure
521 create_erasure_coded_pool $poolname 3 2 || return 1
522
523 ceph pg dump pgs
524
525 rados_put $dir $poolname $objname || return 1
526
527 local -a initial_osds=($(get_osds $poolname $objname))
528 local last_osd=${initial_osds[-1]}
529 kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1
530 ceph osd down ${last_osd} || return 1
531 ceph osd out ${last_osd} || return 1
532
533 ceph pg dump pgs
534
535 dd if=/dev/urandom of=${dir}/ORIGINAL bs=1024 count=4
536 for i in $(seq 1 $lastobj)
537 do
538 rados --pool $poolname put obj${i} $dir/ORIGINAL || return 1
539 done
540
541 inject_eio ec data $poolname $testobj $dir 0 || return 1
542 inject_eio ec data $poolname $testobj $dir 1 || return 1
543
544 run_osd $dir ${last_osd} || return 1
545 ceph osd in ${last_osd} || return 1
546
547 sleep 15
548
549 for tmp in $(seq 1 100); do
550 state=$(get_state 2.0)
551 echo $state | grep backfill_unfound
552 if [ "$?" = "0" ]; then
553 break
554 fi
555 echo $state
556 sleep 1
557 done
558
559 ceph pg dump pgs
560 ceph pg 2.0 list_unfound | grep -q $testobj || return 1
561
562 # Command should hang because object is unfound
563 timeout 5 rados -p $poolname get $testobj $dir/CHECK
564 test $? = "124" || return 1
565
566 ceph pg 2.0 mark_unfound_lost delete
567
568 wait_for_clean || return 1
569
570 for i in $(seq 1 $lastobj)
571 do
572 if [ obj${i} = "$testobj" ]; then
573 # Doesn't exist anymore
574 ! rados -p $poolname get $testobj $dir/CHECK || return 1
575 else
576 rados --pool $poolname get obj${i} $dir/CHECK || return 1
577 diff -q $dir/ORIGINAL $dir/CHECK || return 1
578 fi
579 done
580
581 rm -f ${dir}/ORIGINAL ${dir}/CHECK
582
583 delete_erasure_coded_pool $poolname
584 }
585
586 # Test recovery with unfound object
587 function TEST_ec_recovery_unfound() {
588 local dir=$1
589 local objname=myobject
590 local lastobj=100
591 # Must be between 1 and $lastobj
592 local testobj=obj75
593
594 ORIG_ARGS=$CEPH_ARGS
595 CEPH_ARGS+=' --osd-recovery-max-single-start 3 --osd-recovery-max-active 3 '
596 CEPH_ARGS+=' --osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10'
597 setup_osds 5 || return 1
598 CEPH_ARGS=$ORIG_ARGS
599
600 local poolname=pool-jerasure
601 create_erasure_coded_pool $poolname 3 2 || return 1
602
603 ceph pg dump pgs
604
605 rados_put $dir $poolname $objname || return 1
606
607 local -a initial_osds=($(get_osds $poolname $objname))
608 local last_osd=${initial_osds[-1]}
609 kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1
610 ceph osd down ${last_osd} || return 1
611 ceph osd out ${last_osd} || return 1
612
613 ceph pg dump pgs
614
615 dd if=/dev/urandom of=${dir}/ORIGINAL bs=1024 count=4
616 for i in $(seq 1 $lastobj)
617 do
618 rados --pool $poolname put obj${i} $dir/ORIGINAL || return 1
619 done
620
621 inject_eio ec data $poolname $testobj $dir 0 || return 1
622 inject_eio ec data $poolname $testobj $dir 1 || return 1
623
624 run_osd $dir ${last_osd} || return 1
625 ceph osd in ${last_osd} || return 1
626
627 sleep 15
628
629 for tmp in $(seq 1 100); do
630 state=$(get_state 2.0)
631 echo $state | grep recovery_unfound
632 if [ "$?" = "0" ]; then
633 break
634 fi
635 echo "$state "
636 sleep 1
637 done
638
639 ceph pg dump pgs
640 ceph pg 2.0 list_unfound | grep -q $testobj || return 1
641
642 # Command should hang because object is unfound
643 timeout 5 rados -p $poolname get $testobj $dir/CHECK
644 test $? = "124" || return 1
645
646 ceph pg 2.0 mark_unfound_lost delete
647
648 wait_for_clean || return 1
649
650 for i in $(seq 1 $lastobj)
651 do
652 if [ obj${i} = "$testobj" ]; then
653 # Doesn't exist anymore
654 ! rados -p $poolname get $testobj $dir/CHECK || return 1
655 else
656 rados --pool $poolname get obj${i} $dir/CHECK || return 1
657 diff -q $dir/ORIGINAL $dir/CHECK || return 1
658 fi
659 done
660
661 rm -f ${dir}/ORIGINAL ${dir}/CHECK
662
663 delete_erasure_coded_pool $poolname
664 }
665
666 main test-erasure-eio "$@"
667
668 # Local Variables:
669 # compile-command: "cd ../.. ; make -j4 && test/erasure-code/test-erasure-eio.sh"
670 # End: