3 # Copyright (C) 2018 Red Hat <contact@redhat.com>
5 # Author: David Zafman <dzafman@redhat.com>
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU Library Public License as published by
9 # the Free Software Foundation; either version 2, or (at your option)
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU Library Public License for more details.
18 source $CEPH_ROOT/qa
/standalone
/ceph-helpers.sh
24 export CEPH_MON
="127.0.0.1:7180" # git grep '\<7180\>' : there must be only one
26 CEPH_ARGS
+="--fsid=$(uuidgen) --auth-supported=none "
27 CEPH_ARGS
+="--mon-host=$CEPH_MON "
28 CEPH_ARGS
+="--osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10 "
29 CEPH_ARGS
+="--fake_statfs_for_testing=3686400 "
30 CEPH_ARGS
+="--osd_max_backfills=10 "
31 CEPH_ARGS
+="--osd_mclock_profile=high_recovery_ops "
33 export poolprefix
=test
35 local funcs
=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
36 for func
in $funcs ; do
37 setup
$dir ||
return 1
38 $func $dir ||
return 1
39 teardown
$dir ||
return 1
44 function get_num_in_state
() {
47 expression
+="select(contains(\"${state}\"))"
48 ceph
--format json pg dump pgs
2>/dev
/null | \
49 jq
".pg_stats | [.[] | .state | $expression] | length"
53 function wait_for_not_state
() {
57 local -a delays
=($
(get_timeout_delays
$2 5))
60 flush_pg_stats ||
return 1
61 while test $
(get_num_pgs
) == 0 ; do
66 cur_in_state
=$
(get_num_in_state
${state})
67 test $cur_in_state = "0" && break
68 if test $cur_in_state != $num_in_state ; then
70 num_in_state
=$cur_in_state
71 elif (( $loop >= ${#delays[*]} )) ; then
75 sleep ${delays[$loop]}
82 function wait_for_not_backfilling
() {
84 wait_for_not_state backfilling
$timeout
88 function wait_for_not_activating
() {
90 wait_for_not_state activating
$timeout
93 # All tests are created in an environment which has fake total space
94 # of 3600K (3686400) which can hold 600 6K replicated objects or
95 # 200 18K shards of erasure coded objects. For a k=3, m=2 EC pool
96 # we have a theoretical 54K object but with the chunk size of 4K
97 # and a rounding of 4K to account for the chunks is 36K max object
98 # which is ((36K / 3) + 4K) * 200 = 3200K which is 88% of
101 # Create 2 pools with size 1
102 # Write enough data that only 1 pool pg can fit per osd
103 # Incresase the pool size to 2
104 # On 3 OSDs this should result in 1 OSD with overlapping replicas,
105 # so both pools can't fit. We assume pgid 1.0 and 2.0 won't
106 # map to the same 2 OSDs.
107 # At least 1 pool shouldn't have room to backfill
108 # All other pools should go active+clean
109 function TEST_backfill_test_simple
() {
114 run_mon
$dir a ||
return 1
115 run_mgr
$dir x ||
return 1
118 for osd
in $
(seq 0 $
(expr $OSDS - 1))
120 run_osd
$dir $osd ||
return 1
123 ceph osd set-backfillfull-ratio
.85
125 for p
in $
(seq 1 $pools)
127 create_pool
"${poolprefix}$p" 1 1
128 ceph osd pool
set "${poolprefix}$p" size
1 --yes-i-really-mean-it
131 wait_for_clean ||
return 1
133 # This won't work is if the 2 pools primary and only osds
136 dd if=/dev
/urandom of
=$dir/datafile bs
=1024 count
=4
137 for o
in $
(seq 1 $objects)
139 for p
in $
(seq 1 $pools)
141 rados
-p "${poolprefix}$p" put obj
$o $dir/datafile
147 for p
in $
(seq 1 $pools)
149 ceph osd pool
set "${poolprefix}$p" size
2
153 wait_for_not_backfilling
1200 ||
return 1
154 wait_for_not_activating
60 ||
return 1
157 if [ "$(ceph pg dump pgs | grep +backfill_toofull | wc -l)" != "1" ];
159 echo "One pool should have been in backfill_toofull"
160 ERRORS
="$(expr $ERRORS + 1)"
163 expected
="$(expr $pools - 1)"
164 if [ "$(ceph pg dump pgs | grep active+clean | wc -l)" != "$expected" ];
166 echo "$expected didn't finish backfill"
167 ERRORS
="$(expr $ERRORS + 1)"
172 if [ $ERRORS != "0" ];
177 for i
in $
(seq 1 $pools)
179 delete_pool
"${poolprefix}$i"
181 kill_daemons
$dir ||
return 1
182 ! grep -q "num_bytes mismatch" $dir/osd.
*.log ||
return 1
186 # Create 8 pools of size 1 on 20 OSDs
187 # Write 4K * 600 objects (only 1 pool pg can fit on any given osd)
188 # Increase pool size to 2
189 # At least 1 pool shouldn't have room to backfill
190 # All other pools should go active+clean
191 function TEST_backfill_test_multi
() {
196 run_mon
$dir a ||
return 1
197 run_mgr
$dir x ||
return 1
200 for osd
in $
(seq 0 $
(expr $OSDS - 1))
202 run_osd
$dir $osd ||
return 1
205 ceph osd set-backfillfull-ratio
.85
207 for p
in $
(seq 1 $pools)
209 create_pool
"${poolprefix}$p" 1 1
210 ceph osd pool
set "${poolprefix}$p" size
1 --yes-i-really-mean-it
213 wait_for_clean ||
return 1
215 dd if=/dev
/urandom of
=$dir/datafile bs
=1024 count
=4
216 for o
in $
(seq 1 $objects)
218 for p
in $
(seq 1 $pools)
220 rados
-p "${poolprefix}$p" put obj
$o $dir/datafile
226 for p
in $
(seq 1 $pools)
228 ceph osd pool
set "${poolprefix}$p" size
2
232 wait_for_not_backfilling
1200 ||
return 1
233 wait_for_not_activating
60 ||
return 1
236 full
="$(ceph pg dump pgs | grep +backfill_toofull | wc -l)"
237 if [ "$full" -lt "1" ];
239 echo "At least one pool should have been in backfill_toofull"
240 ERRORS
="$(expr $ERRORS + 1)"
243 expected
="$(expr $pools - $full)"
244 if [ "$(ceph pg dump pgs | grep active+clean | wc -l)" != "$expected" ];
246 echo "$expected didn't finish backfill"
247 ERRORS
="$(expr $ERRORS + 1)"
253 ceph status
--format=json-pretty
> $dir/stat.json
255 eval SEV
=$
(jq
'.health.checks.PG_BACKFILL_FULL.severity' $dir/stat.json
)
256 if [ "$SEV" != "HEALTH_WARN" ]; then
257 echo "PG_BACKFILL_FULL severity $SEV not HEALTH_WARN"
258 ERRORS
="$(expr $ERRORS + 1)"
260 eval MSG
=$
(jq
'.health.checks.PG_BACKFILL_FULL.summary.message' $dir/stat.json
)
261 if [ "$MSG" != "Low space hindering backfill (add storage if this doesn't resolve itself): 4 pgs backfill_toofull" ]; then
262 echo "PG_BACKFILL_FULL message '$MSG' mismatched"
263 ERRORS
="$(expr $ERRORS + 1)"
267 if [ $ERRORS != "0" ];
272 for i
in $
(seq 1 $pools)
274 delete_pool
"${poolprefix}$i"
276 # Work around for http://tracker.ceph.com/issues/38195
277 kill_daemons
$dir #|| return 1
278 ! grep -q "num_bytes mismatch" $dir/osd.
*.log ||
return 1
282 # To make sure that when 2 pg try to backfill at the same time to
283 # the same target. This might be covered by the simple test above
284 # but this makes sure we get it.
286 # Create 10 pools of size 2 and identify 2 that have the same
288 # Delete all other pools
289 # Set size to 1 and write 4K * 600 to each pool
291 # The 2 pools should race to backfill.
292 # One pool goes active+clean
293 # The other goes acitve+...+backfill_toofull
294 function TEST_backfill_test_sametarget
() {
299 run_mon
$dir a ||
return 1
300 run_mgr
$dir x ||
return 1
303 for osd
in $
(seq 0 $
(expr $OSDS - 1))
305 run_osd
$dir $osd ||
return 1
308 ceph osd set-backfillfull-ratio
.85
310 for p
in $
(seq 1 $pools)
312 create_pool
"${poolprefix}$p" 1 1
313 ceph osd pool
set "${poolprefix}$p" size
2
317 wait_for_clean ||
return 1
321 # Find 2 pools with a pg that distinct primaries but second
322 # replica on the same osd.
332 for p
in $
(seq 1 $pools)
334 ceph pg map
${p}.0 --format=json | jq
'.acting[]' > $dir/acting
335 local test_osd1
=$
(head -1 $dir/acting
)
336 local test_osd2
=$
(tail -1 $dir/acting
)
341 pool1
="${poolprefix}$p"
344 elif [ $chk_osd1 != $test_osd1 -a $chk_osd2 = $test_osd2 ];
348 pool2
="${poolprefix}$p"
354 if [ "$pool2" = "" ];
356 echo "Failure to find appropirate PGs"
360 for p
in $
(seq 1 $pools)
362 if [ $p != $POOLNUM1 -a $p != $POOLNUM2 ];
364 delete_pool
${poolprefix}$p
368 ceph osd pool
set $pool1 size
1 --yes-i-really-mean-it
369 ceph osd pool
set $pool2 size
1 --yes-i-really-mean-it
371 wait_for_clean ||
return 1
373 dd if=/dev
/urandom of
=$dir/datafile bs
=1024 count
=4
374 for i
in $
(seq 1 $objects)
376 rados
-p $pool1 put obj
$i $dir/datafile
377 rados
-p $pool2 put obj
$i $dir/datafile
380 ceph osd pool
set $pool1 size
2
381 ceph osd pool
set $pool2 size
2
384 wait_for_not_backfilling
1200 ||
return 1
385 wait_for_not_activating
60 ||
return 1
388 if [ "$(ceph pg dump pgs | grep +backfill_toofull | wc -l)" != "1" ];
390 echo "One pool should have been in backfill_toofull"
391 ERRORS
="$(expr $ERRORS + 1)"
394 if [ "$(ceph pg dump pgs | grep active+clean | wc -l)" != "1" ];
396 echo "One didn't finish backfill"
397 ERRORS
="$(expr $ERRORS + 1)"
402 if [ $ERRORS != "0" ];
409 kill_daemons
$dir ||
return 1
410 ! grep -q "num_bytes mismatch" $dir/osd.
*.log ||
return 1
413 # 2 pools can't both backfill to a target which has other data
414 # 1 of the pools has objects that increase from 1024 to 2611 bytes
416 # Write to fill pool which is size 1
417 # Take fill pool osd down (other 2 pools must go to the remaining OSDs
418 # Save an export of data on fill OSD and restart it
419 # Write an intial 1K to pool1 which has pg 2.0
420 # Export 2.0 from non-fillpool OSD don't wait for it to start-up
421 # Take down fillpool OSD
422 # Put 1K object version of 2.0 on fillpool OSD
423 # Put back fillpool data on fillpool OSD
424 # With fillpool down write 2611 byte objects
425 # Take down $osd and bring back $fillosd simultaneously
426 # Wait for backfilling
427 # One PG will be able to backfill its remaining data
428 # One PG must get backfill_toofull
429 function TEST_backfill_multi_partial
() {
435 run_mon
$dir a ||
return 1
436 run_mgr
$dir x ||
return 1
439 for osd
in $
(seq 0 $
(expr $OSDS - 1))
441 run_osd
$dir $osd ||
return 1
444 ceph osd set-backfillfull-ratio
.85
446 ceph osd set-require-min-compat-client luminous
447 create_pool fillpool
1 1
448 ceph osd pool
set fillpool size
1 --yes-i-really-mean-it
449 for p
in $
(seq 1 $pools)
451 create_pool
"${poolprefix}$p" 1 1
452 ceph osd pool
set "${poolprefix}$p" size
2
455 wait_for_clean ||
return 1
457 # Partially fill an osd
458 # We have room for 600 6K replicated objects, if we create 2611 byte objects
459 # there is 3600K - (2611 * 600) = 2070K, so the fill pool and one
460 # replica from the other 2 is 85% of 3600K
462 dd if=/dev
/urandom of
=$dir/datafile bs
=2611 count
=1
463 for o
in $
(seq 1 $objects)
465 rados
-p fillpool put obj-fill-
${o} $dir/datafile
468 local fillosd
=$
(get_primary fillpool obj-fill-1
)
469 osd
=$
(expr $fillosd + 1)
470 if [ "$osd" = "$OSDS" ]; then
474 kill_daemon
$dir/osd.
$fillosd.pid TERM
475 ceph osd out osd.
$fillosd
477 _objectstore_tool_nodown
$dir $fillosd --op export-remove
--pgid 1.0 --file $dir/fillexport.out ||
return 1
478 activate_osd
$dir $fillosd ||
return 1
482 dd if=/dev
/urandom of
=$dir/datafile bs
=1024 count
=1
483 for o
in $
(seq 1 $objects)
485 rados
-p "${poolprefix}1" put obj-1-
${o} $dir/datafile
489 # The $osd OSD is started, but we don't wait so we can kill $fillosd at the same time
490 _objectstore_tool_nowait
$dir $osd --op export --pgid 2.0 --file $dir/export.out
491 kill_daemon
$dir/osd.
$fillosd.pid TERM
492 _objectstore_tool_nodown
$dir $fillosd --force --op remove
--pgid 2.0
493 _objectstore_tool_nodown
$dir $fillosd --op import
--pgid 2.0 --file $dir/export.out ||
return 1
494 _objectstore_tool_nodown
$dir $fillosd --op import
--pgid 1.0 --file $dir/fillexport.out ||
return 1
499 # re-write everything
500 dd if=/dev
/urandom of
=$dir/datafile bs
=2611 count
=1
501 for o
in $
(seq 1 $objects)
503 for p
in $
(seq 1 $pools)
505 rados
-p "${poolprefix}$p" put obj-${p}-${o} $dir/datafile
509 kill_daemon $dir/osd.$osd.pid TERM
510 ceph osd out osd.$osd
512 activate_osd $dir $fillosd || return 1
513 ceph osd in osd.$fillosd
516 wait_for_not_backfilling 1200 || return 1
517 wait_for_not_activating 60 || return 1
519 flush_pg_stats || return 1
523 if [ "$
(get_num_in_state backfill_toofull
)" != "1" ];
525 echo "One PG should be
in backfill_toofull
"
526 ERRORS="$
(expr $ERRORS + 1)"
529 if [ "$
(get_num_in_state active
+clean
)" != "2" ];
531 echo "Two PGs should be active
+clean after one PG completed backfill
"
532 ERRORS="$
(expr $ERRORS + 1)"
535 if [ $ERRORS != "0" ];
541 for i in $(seq 1 $pools)
543 delete_pool "${poolprefix}$i"
545 kill_daemons $dir || return 1
546 ! grep -q "num_bytes mismatch
" $dir/osd.*.log || return 1
549 # Make sure that the amount of bytes already on the replica doesn't
550 # cause an out of space condition
552 # Create 1 pool and write 4K * 600 objects
553 # Remove 25% (150) of the objects with one OSD down (noout set)
554 # Increase the size of the remaining 75% (450) of the objects to 6K
555 # Bring back down OSD
556 # The pool should go active+clean
557 function TEST_backfill_grow() {
559 local poolname="test"
562 run_mon $dir a || return 1
563 run_mgr $dir x || return 1
565 for osd in $(seq 0 $(expr $OSDS - 1))
567 run_osd $dir $osd || return 1
570 ceph osd set-backfillfull-ratio .85
572 create_pool $poolname 1 1
573 ceph osd pool set $poolname size 3
576 wait_for_clean || return 1
578 dd if=/dev/urandom of=${dir}/4kdata bs=1k count=4
579 for i in $(seq 1 $objects)
581 rados -p $poolname put obj$i $dir/4kdata
584 local PG=$(get_pg $poolname obj1)
585 # Remember primary during the backfill
586 local primary=$(get_primary $poolname obj1)
587 local otherosd=$(get_not_primary $poolname obj1)
590 kill_daemons $dir TERM $otherosd || return 1
592 rmobjects=$(expr $objects / 4)
593 for i in $(seq 1 $rmobjects)
595 rados -p $poolname rm obj$i
598 dd if=/dev/urandom of=${dir}/6kdata bs=6k count=1
599 for i in $(seq $(expr $rmobjects + 1) $objects)
601 rados -p $poolname put obj$i $dir/6kdata
604 activate_osd $dir $otherosd || return 1
606 ceph tell osd.$primary debug kick_recovery_wq 0
610 wait_for_clean || return 1
612 delete_pool $poolname
613 kill_daemons $dir || return 1
614 ! grep -q "num_bytes mismatch
" $dir/osd.*.log || return 1
617 # Create a 5 shard EC pool on 6 OSD cluster
618 # Fill 1 OSD with 2600K of data take that osd down.
619 # Write the EC pool on 5 OSDs
620 # Take down 1 (must contain an EC shard)
621 # Bring up OSD with fill data
622 # Not enought room to backfill to partially full OSD
623 function TEST_ec_backfill_simple() {
630 local ecobjects=$(expr $objects / $k)
632 run_mon $dir a || return 1
633 run_mgr $dir x || return 1
636 for osd in $(seq 0 $(expr $OSDS - 1))
638 run_osd $dir $osd || return 1
641 ceph osd set-backfillfull-ratio .85
642 create_pool fillpool 1 1
643 ceph osd pool set fillpool size 1 --yes-i-really-mean-it
645 # Partially fill an osd
646 # We have room for 200 18K replicated objects, if we create 13K objects
647 # there is only 3600K - (13K * 200) = 1000K which won't hold
648 # a k=3 shard below ((18K / 3) + 4K) * 200 = 2000K
649 # Actual usage per shard is 8K * 200 = 1600K because 18K/3 is 6K which
650 # rounds to 8K. The 2000K is the ceiling on the 18K * 200 = 3600K logical
652 dd if=/dev/urandom of=$dir/datafile bs=1024 count=13
653 for o in $(seq 1 $ecobjects)
655 rados -p fillpool put obj$o $dir/datafile
658 local fillosd=$(get_primary fillpool obj1)
659 osd=$(expr $fillosd + 1)
660 if [ "$osd" = "$OSDS" ]; then
665 kill_daemon $dir/osd.$fillosd.pid TERM
666 ceph osd out osd.$fillosd
668 ceph osd erasure-code-profile set ec-profile k=$k m=$m crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1
670 for p in $(seq 1 $pools)
672 ceph osd pool create "${poolprefix}$p" 1 1 erasure ec-profile
675 # Can't wait for clean here because we created a stale pg
676 #wait_for_clean || return 1
681 dd if=/dev/urandom of=$dir/datafile bs=1024 count=18
682 for o in $(seq 1 $ecobjects)
684 for p in $(seq 1 $pools)
686 rados -p "${poolprefix}$p" put obj$o $dir/datafile
690 kill_daemon $dir/osd.$osd.pid TERM
691 ceph osd out osd.$osd
693 activate_osd $dir $fillosd || return 1
694 ceph osd in osd.$fillosd
699 wait_for_not_backfilling 1200 || return 1
700 wait_for_not_activating 60 || return 1
705 if [ "$
(ceph pg dump pgs |
grep -v "^1.0" |
grep +backfill_toofull |
wc -l)" != "1" ]; then
706 echo "One pool should have been
in backfill_toofull
"
707 ERRORS="$
(expr $ERRORS + 1)"
710 if [ $ERRORS != "0" ];
716 for i in $(seq 1 $pools)
718 delete_pool "${poolprefix}$i"
720 kill_daemons $dir || return 1
728 for osd in $(seq 0 $(expr $OSDS - 1))
730 if [ $osd = $excludeosd ];
734 if [ -n "$osds" ]; then
742 # Create a pool with size 1 and fill with data so that only 1 EC shard can fit.
743 # Write data to 2 EC pools mapped to the same OSDs (excluding filled one)
744 # Remap the last OSD to partially full OSD on both pools
745 # The 2 pools should race to backfill.
746 # One pool goes active+clean
747 # The other goes acitve+...+backfill_toofull
748 function TEST_ec_backfill_multi() {
755 local ecobjects=$(expr $objects / $k)
757 run_mon $dir a || return 1
758 run_mgr $dir x || return 1
761 for osd in $(seq 0 $(expr $OSDS - 1))
763 run_osd $dir $osd || return 1
766 # This test requires that shards from 2 different pools
767 # fit on a given OSD, but both will not fix. I'm using
768 # making the fillosd plus 1 shard use 75% of the space,
769 # leaving not enough to be under the 85% set here.
770 ceph osd set-backfillfull-ratio .85
772 ceph osd set-require-min-compat-client luminous
773 create_pool fillpool 1 1
774 ceph osd pool set fillpool size 1 --yes-i-really-mean-it
776 # Partially fill an osd
777 # We have room for 200 18K replicated objects, if we create 9K objects
778 # there is only 3600K - (9K * 200) = 1800K which will only hold
779 # one k=3 shard below ((12K / 3) + 4K) * 200 = 1600K
780 # The actual data will be (12K / 3) * 200 = 800K because the extra
781 # is the reservation padding for chunking.
782 dd if=/dev/urandom of=$dir/datafile bs=1024 count=9
783 for o in $(seq 1 $ecobjects)
785 rados -p fillpool put obj$o $dir/datafile
788 local fillosd=$(get_primary fillpool obj1)
789 ceph osd erasure-code-profile set ec-profile k=3 m=2 crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1
791 nonfillosds="$
(osdlist
$OSDS $fillosd)"
793 for p in $(seq 1 $pools)
795 ceph osd pool create "${poolprefix}$p" 1 1 erasure ec-profile
796 ceph osd pg-upmap "$
(expr $p + 1).0" $nonfillosds
799 # Can't wait for clean here because we created a stale pg
800 #wait_for_clean || return 1
805 dd if=/dev/urandom of=$dir/datafile bs=1024 count=12
806 for o in $(seq 1 $ecobjects)
808 for p in $(seq 1 $pools)
810 rados -p "${poolprefix}$p" put obj$o-$p $dir/datafile
816 for p in $(seq 1 $pools)
818 ceph osd pg-upmap $(expr $p + 1).0 ${nonfillosds% *} $fillosd
823 wait_for_not_backfilling 1200 || return 1
824 wait_for_not_activating 60 || return 1
829 if [ "$
(ceph pg dump pgs |
grep -v "^1.0" |
grep +backfill_toofull |
wc -l)" != "1" ];
831 echo "One pool should have been
in backfill_toofull
"
832 ERRORS="$
(expr $ERRORS + 1)"
835 if [ "$
(ceph pg dump pgs |
grep -v "^1.0" |
grep active
+clean |
wc -l)" != "1" ];
837 echo "One didn
't finish backfill"
838 ERRORS="$(expr $ERRORS + 1)"
841 if [ $ERRORS != "0" ];
847 for i in $(seq 1 $pools)
849 delete_pool "${poolprefix}$i"
851 kill_daemons $dir || return 1
854 # Similar to TEST_ec_backfill_multi but one of the ec pools
855 # already had some data on the target OSD
857 # Create a pool with size 1 and fill with data so that only 1 EC shard can fit.
858 # Write a small amount of data to 1 EC pool that still includes the filled one
859 # Take down fillosd with noout set
860 # Write data to 2 EC pools mapped to the same OSDs (excluding filled one)
861 # Remap the last OSD to partially full OSD on both pools
862 # The 2 pools should race to backfill.
863 # One pool goes active+clean
864 # The other goes acitve+...+backfill_toofull
865 function SKIP_TEST_ec_backfill_multi_partial() {
872 local ecobjects=$(expr $objects / $k)
873 local lastosd=$(expr $OSDS - 1)
875 run_mon $dir a || return 1
876 run_mgr $dir x || return 1
879 for osd in $(seq 0 $(expr $OSDS - 1))
881 run_osd $dir $osd || return 1
884 # This test requires that shards from 2 different pools
885 # fit on a given OSD, but both will not fix. I'm using
886 # making the fillosd plus 1 shard use 75% of the space,
887 # leaving not enough to be under the 85% set here.
888 ceph osd set-backfillfull-ratio
.85
890 ceph osd set-require-min-compat-client luminous
891 create_pool fillpool
1 1
892 ceph osd pool
set fillpool size
1 --yes-i-really-mean-it
894 ceph osd pg-upmap
1.0 $lastosd
896 # Partially fill an osd
897 # We have room for 200 18K replicated objects, if we create 9K objects
898 # there is only 3600K - (9K * 200) = 1800K which will only hold
899 # one k=3 shard below ((12K / 3) + 4K) * 200 = 1600K
900 # The actual data will be (12K / 3) * 200 = 800K because the extra
901 # is the reservation padding for chunking.
902 dd if=/dev
/urandom of
=$dir/datafile bs
=1024 count
=9
903 for o
in $
(seq 1 $ecobjects)
905 rados
-p fillpool put obj
$o $dir/datafile
908 local fillosd
=$
(get_primary fillpool obj1
)
909 ceph osd erasure-code-profile
set ec-profile k
=3 m
=2 crush-failure-domain
=osd technique
=reed_sol_van plugin
=jerasure ||
return 1
911 nonfillosds
="$(osdlist $OSDS $fillosd)"
913 for p
in $
(seq 1 $pools)
915 ceph osd pool create
"${poolprefix}$p" 1 1 erasure ec-profile
916 ceph osd pg-upmap
"$(expr $p + 1).0" $
(seq 0 $lastosd)
919 # Can't wait for clean here because we created a stale pg
920 #wait_for_clean || return 1
925 dd if=/dev
/urandom of
=$dir/datafile bs
=1024 count
=1
926 for o
in $
(seq 1 $ecobjects)
928 rados
-p "${poolprefix}1" put obj
$o-1 $dir/datafile
931 for p
in $
(seq 1 $pools)
933 ceph osd pg-upmap
"$(expr $p + 1).0" $
(seq 0 $
(expr $lastosd - 1))
938 #kill_daemons $dir TERM osd.$lastosd || return 1
940 dd if=/dev
/urandom of
=$dir/datafile bs
=1024 count
=12
941 for o
in $
(seq 1 $ecobjects)
943 for p
in $
(seq 1 $pools)
945 rados
-p "${poolprefix}$p" put obj
$o-$p $dir/datafile
951 # Now backfill lastosd by adding back into the upmap
952 for p
in $
(seq 1 $pools)
954 ceph osd pg-upmap
"$(expr $p + 1).0" $
(seq 0 $lastosd)
956 #activate_osd $dir $lastosd || return 1
957 #ceph tell osd.0 debug kick_recovery_wq 0
962 wait_for_not_backfilling
1200 ||
return 1
963 wait_for_not_activating
60 ||
return 1
968 if [ "$(ceph pg dump pgs | grep -v "^
1.0" | grep +backfill_toofull | wc -l)" != "1" ];
970 echo "One pool should have been in backfill_toofull"
971 ERRORS
="$(expr $ERRORS + 1)"
974 if [ "$(ceph pg dump pgs | grep -v "^
1.0" | grep active+clean | wc -l)" != "1" ];
976 echo "One didn't finish backfill"
977 ERRORS
="$(expr $ERRORS + 1)"
980 if [ $ERRORS != "0" ];
986 for i
in $
(seq 1 $pools)
988 delete_pool
"${poolprefix}$i"
990 kill_daemons
$dir ||
return 1
993 function SKIP_TEST_ec_backfill_multi_partial
() {
999 run_mon
$dir a ||
return 1
1000 run_mgr
$dir x ||
return 1
1003 for osd
in $
(seq 0 $
(expr $OSDS - 1))
1005 run_osd
$dir $osd ||
return 1
1008 # Below we need to fit 3200K in 3600K which is 88%
1010 ceph osd set-backfillfull-ratio
.90
1012 ceph osd set-require-min-compat-client luminous
1013 create_pool fillpool
1 1
1014 ceph osd pool
set fillpool size
1 --yes-i-really-mean-it
1016 # Partially fill an osd
1017 # We have room for 200 48K ec objects, if we create 4k replicated objects
1018 # there is 3600K - (4K * 200) = 2800K which won't hold 2 k=3 shard
1019 # of 200 12K objects which takes ((12K / 3) + 4K) * 200 = 1600K each.
1020 # On the other OSDs 2 * 1600K = 3200K which is 88% of 3600K.
1021 dd if=/dev
/urandom of
=$dir/datafile bs
=1024 count
=4
1022 for o
in $
(seq 1 $objects)
1024 rados
-p fillpool put obj
$o $dir/datafile
1027 local fillosd
=$
(get_primary fillpool obj1
)
1028 osd
=$
(expr $fillosd + 1)
1029 if [ "$osd" = "$OSDS" ]; then
1034 kill_daemon
$dir/osd.
$fillosd.pid TERM
1035 ceph osd out osd.
$fillosd
1037 ceph osd erasure-code-profile
set ec-profile k
=3 m
=2 crush-failure-domain
=osd technique
=reed_sol_van plugin
=jerasure ||
return 1
1039 for p
in $
(seq 1 $pools)
1041 ceph osd pool create
"${poolprefix}$p" 1 1 erasure ec-profile
1044 # Can't wait for clean here because we created a stale pg
1045 #wait_for_clean || return 1
1050 dd if=/dev
/urandom of
=$dir/datafile bs
=1024 count
=12
1051 for o
in $
(seq 1 $objects)
1053 for p
in $
(seq 1 $pools)
1055 rados
-p "${poolprefix}$p" put obj
$o $dir/datafile
1059 #ceph pg map 2.0 --format=json | jq '.'
1060 kill_daemon
$dir/osd.
$osd.pid TERM
1061 ceph osd out osd.
$osd
1063 _objectstore_tool_nodown
$dir $osd --op export --pgid 2.0 --file $dir/export.out
1064 _objectstore_tool_nodown
$dir $fillosd --op import
--pgid 2.0 --file $dir/export.out
1066 activate_osd
$dir $fillosd ||
return 1
1067 ceph osd
in osd.
$fillosd
1070 wait_for_not_backfilling
1200 ||
return 1
1071 wait_for_not_activating
60 ||
return 1
1074 if [ "$(ceph pg dump pgs | grep -v "^
1.0" | grep +backfill_toofull | wc -l)" != "1" ];
1076 echo "One pool should have been in backfill_toofull"
1077 ERRORS
="$(expr $ERRORS + 1)"
1080 if [ "$(ceph pg dump pgs | grep -v "^
1.0" | grep active+clean | wc -l)" != "1" ];
1082 echo "One didn't finish backfill"
1083 ERRORS
="$(expr $ERRORS + 1)"
1088 if [ $ERRORS != "0" ];
1093 delete_pool fillpool
1094 for i
in $
(seq 1 $pools)
1096 delete_pool
"${poolprefix}$i"
1098 kill_daemons
$dir ||
return 1
1102 # Write 200 12K objects ((12K / 3) + 4K) *200) = 1600K
1103 # Take 1 shard's OSD down (with noout set)
1104 # Remove 50 objects ((12K / 3) + 4k) * 50) = 400K
1105 # Write 150 36K objects (grow 150 objects) 2400K
1106 # But there is already 1600K usage so backfill
1107 # would be too full if it didn't account for existing data
1108 # Bring back down OSD so it must backfill
1109 # It should go active+clean taking into account data already there
1110 function TEST_ec_backfill_grow
() {
1112 local poolname
="test"
1116 local ecobjects
=$
(expr $objects / $k)
1118 run_mon
$dir a ||
return 1
1119 run_mgr
$dir x ||
return 1
1121 for osd
in $
(seq 0 $
(expr $OSDS - 1))
1123 run_osd
$dir $osd ||
return 1
1126 ceph osd set-backfillfull-ratio
.85
1128 ceph osd set-require-min-compat-client luminous
1129 ceph osd erasure-code-profile
set ec-profile k
=$k m
=$m crush-failure-domain
=osd technique
=reed_sol_van plugin
=jerasure ||
return 1
1130 ceph osd pool create
$poolname 1 1 erasure ec-profile
1132 wait_for_clean ||
return 1
1134 dd if=/dev
/urandom of
=${dir}/12kdata bs
=1k count
=12
1135 for i
in $
(seq 1 $ecobjects)
1137 rados
-p $poolname put obj
$i $dir/12kdata
1140 local PG
=$
(get_pg
$poolname obj1
)
1141 # Remember primary during the backfill
1142 local primary
=$
(get_primary
$poolname obj1
)
1143 local otherosd
=$
(get_not_primary
$poolname obj1
)
1146 kill_daemons
$dir TERM
$otherosd ||
return 1
1148 rmobjects
=$
(expr $ecobjects / 4)
1149 for i
in $
(seq 1 $rmobjects)
1151 rados
-p $poolname rm obj
$i
1154 dd if=/dev
/urandom of
=${dir}/36kdata bs
=1k count
=36
1155 for i
in $
(seq $
(expr $rmobjects + 1) $ecobjects)
1157 rados
-p $poolname put obj
$i $dir/36kdata
1160 activate_osd
$dir $otherosd ||
return 1
1162 ceph tell osd.
$primary debug kick_recovery_wq
0
1166 wait_for_clean ||
return 1
1168 delete_pool
$poolname
1169 kill_daemons
$dir ||
return 1
1172 main osd-backfill-space
"$@"
1175 # compile-command: "make -j4 && ../qa/run-standalone.sh osd-backfill-space.sh"