3 # Copyright (C) 2018 Red Hat <contact@redhat.com>
5 # Author: David Zafman <dzafman@redhat.com>
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU Library Public License as published by
9 # the Free Software Foundation; either version 2, or (at your option)
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU Library Public License for more details.
18 source $CEPH_ROOT/qa
/standalone
/ceph-helpers.sh
24 export CEPH_MON
="127.0.0.1:7180" # git grep '\<7180\>' : there must be only one
26 CEPH_ARGS
+="--fsid=$(uuidgen) --auth-supported=none "
27 CEPH_ARGS
+="--mon-host=$CEPH_MON "
28 CEPH_ARGS
+="--osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10 "
29 CEPH_ARGS
+="--fake_statfs_for_testing=3686400 "
30 CEPH_ARGS
+="--osd_max_backfills=10 "
32 export poolprefix
=test
34 local funcs
=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
35 for func
in $funcs ; do
36 setup
$dir ||
return 1
37 $func $dir ||
return 1
38 teardown
$dir ||
return 1
43 function get_num_in_state
() {
46 expression
+="select(contains(\"${state}\"))"
47 ceph
--format json pg dump pgs
2>/dev
/null | \
48 jq
".pg_stats | [.[] | .state | $expression] | length"
52 function wait_for_not_state
() {
56 local -a delays
=($
(get_timeout_delays
$2 5))
59 flush_pg_stats ||
return 1
60 while test $
(get_num_pgs
) == 0 ; do
65 cur_in_state
=$
(get_num_in_state
${state})
66 test $cur_in_state = "0" && break
67 if test $cur_in_state != $num_in_state ; then
69 num_in_state
=$cur_in_state
70 elif (( $loop >= ${#delays[*]} )) ; then
74 sleep ${delays[$loop]}
81 function wait_for_not_backfilling
() {
83 wait_for_not_state backfilling
$timeout
87 function wait_for_not_activating
() {
89 wait_for_not_state activating
$timeout
92 # All tests are created in an environment which has fake total space
93 # of 3600K (3686400) which can hold 600 6K replicated objects or
94 # 200 18K shards of erasure coded objects. For a k=3, m=2 EC pool
95 # we have a theoretical 54K object but with the chunk size of 4K
96 # and a rounding of 4K to account for the chunks is 36K max object
97 # which is ((36K / 3) + 4K) * 200 = 3200K which is 88% of
100 # Create 2 pools with size 1
101 # Write enough data that only 1 pool pg can fit per osd
102 # Incresase the pool size to 2
103 # On 3 OSDs this should result in 1 OSD with overlapping replicas,
104 # so both pools can't fit. We assume pgid 1.0 and 2.0 won't
105 # map to the same 2 OSDs.
106 # At least 1 pool shouldn't have room to backfill
107 # All other pools should go active+clean
108 function TEST_backfill_test_simple
() {
113 run_mon
$dir a ||
return 1
114 run_mgr
$dir x ||
return 1
117 for osd
in $
(seq 0 $
(expr $OSDS - 1))
119 run_osd
$dir $osd ||
return 1
122 ceph osd set-backfillfull-ratio
.85
124 for p
in $
(seq 1 $pools)
126 create_pool
"${poolprefix}$p" 1 1
127 ceph osd pool
set "${poolprefix}$p" size
1
130 wait_for_clean ||
return 1
132 # This won't work is if the 2 pools primary and only osds
135 dd if=/dev
/urandom of
=$dir/datafile bs
=1024 count
=4
136 for o
in $
(seq 1 $objects)
138 for p
in $
(seq 1 $pools)
140 rados
-p "${poolprefix}$p" put obj
$o $dir/datafile
146 for p
in $
(seq 1 $pools)
148 ceph osd pool
set "${poolprefix}$p" size
2
152 wait_for_not_backfilling
240 ||
return 1
153 wait_for_not_activating
60 ||
return 1
156 if [ "$(ceph pg dump pgs | grep +backfill_toofull | wc -l)" != "1" ];
158 echo "One pool should have been in backfill_toofull"
159 ERRORS
="$(expr $ERRORS + 1)"
162 expected
="$(expr $pools - 1)"
163 if [ "$(ceph pg dump pgs | grep active+clean | wc -l)" != "$expected" ];
165 echo "$expected didn't finish backfill"
166 ERRORS
="$(expr $ERRORS + 1)"
171 if [ $ERRORS != "0" ];
176 for i
in $
(seq 1 $pools)
178 delete_pool
"${poolprefix}$i"
180 kill_daemons
$dir ||
return 1
181 ! grep -q "num_bytes mismatch" $dir/osd.
*.log ||
return 1
185 # Create 8 pools of size 1 on 20 OSDs
186 # Write 4K * 600 objects (only 1 pool pg can fit on any given osd)
187 # Increase pool size to 2
188 # At least 1 pool shouldn't have room to backfill
189 # All other pools should go active+clean
190 function TEST_backfill_test_multi
() {
195 run_mon
$dir a ||
return 1
196 run_mgr
$dir x ||
return 1
199 for osd
in $
(seq 0 $
(expr $OSDS - 1))
201 run_osd
$dir $osd ||
return 1
204 ceph osd set-backfillfull-ratio
.85
206 for p
in $
(seq 1 $pools)
208 create_pool
"${poolprefix}$p" 1 1
209 ceph osd pool
set "${poolprefix}$p" size
1
212 wait_for_clean ||
return 1
214 dd if=/dev
/urandom of
=$dir/datafile bs
=1024 count
=4
215 for o
in $
(seq 1 $objects)
217 for p
in $
(seq 1 $pools)
219 rados
-p "${poolprefix}$p" put obj
$o $dir/datafile
225 for p
in $
(seq 1 $pools)
227 ceph osd pool
set "${poolprefix}$p" size
2
231 wait_for_not_backfilling
240 ||
return 1
232 wait_for_not_activating
60 ||
return 1
235 full
="$(ceph pg dump pgs | grep +backfill_toofull | wc -l)"
236 if [ "$full" -lt "1" ];
238 echo "At least one pool should have been in backfill_toofull"
239 ERRORS
="$(expr $ERRORS + 1)"
242 expected
="$(expr $pools - $full)"
243 if [ "$(ceph pg dump pgs | grep active+clean | wc -l)" != "$expected" ];
245 echo "$expected didn't finish backfill"
246 ERRORS
="$(expr $ERRORS + 1)"
252 ceph status
--format=json-pretty
> $dir/stat.json
254 eval SEV
=$
(jq
'.health.checks.PG_BACKFILL_FULL.severity' $dir/stat.json
)
255 if [ "$SEV" != "HEALTH_WARN" ]; then
256 echo "PG_BACKFILL_FULL severity $SEV not HEALTH_WARN"
257 ERRORS
="$(expr $ERRORS + 1)"
259 eval MSG
=$
(jq
'.health.checks.PG_BACKFILL_FULL.summary.message' $dir/stat.json
)
260 if [ "$MSG" != "Low space hindering backfill (add storage if this doesn't resolve itself): 4 pgs backfill_toofull" ]; then
261 echo "PG_BACKFILL_FULL message '$MSG' mismatched"
262 ERRORS
="$(expr $ERRORS + 1)"
266 if [ $ERRORS != "0" ];
271 for i
in $
(seq 1 $pools)
273 delete_pool
"${poolprefix}$i"
275 # Work around for http://tracker.ceph.com/issues/38195
276 kill_daemons
$dir #|| return 1
277 ! grep -q "num_bytes mismatch" $dir/osd.
*.log ||
return 1
281 # To make sure that when 2 pg try to backfill at the same time to
282 # the same target. This might be covered by the simple test above
283 # but this makes sure we get it.
285 # Create 10 pools of size 2 and identify 2 that have the same
287 # Delete all other pools
288 # Set size to 1 and write 4K * 600 to each pool
290 # The 2 pools should race to backfill.
291 # One pool goes active+clean
292 # The other goes acitve+...+backfill_toofull
293 function TEST_backfill_test_sametarget
() {
298 run_mon
$dir a ||
return 1
299 run_mgr
$dir x ||
return 1
302 for osd
in $
(seq 0 $
(expr $OSDS - 1))
304 run_osd
$dir $osd ||
return 1
307 ceph osd set-backfillfull-ratio
.85
309 for p
in $
(seq 1 $pools)
311 create_pool
"${poolprefix}$p" 1 1
312 ceph osd pool
set "${poolprefix}$p" size
2
316 wait_for_clean ||
return 1
320 # Find 2 pools with a pg that distinct primaries but second
321 # replica on the same osd.
331 for p
in $
(seq 1 $pools)
333 ceph pg map
${p}.0 --format=json | jq
'.acting[]' > $dir/acting
334 local test_osd1
=$
(head -1 $dir/acting
)
335 local test_osd2
=$
(tail -1 $dir/acting
)
340 pool1
="${poolprefix}$p"
343 elif [ $chk_osd1 != $test_osd1 -a $chk_osd2 = $test_osd2 ];
347 pool2
="${poolprefix}$p"
353 if [ "$pool2" = "" ];
355 echo "Failure to find appropirate PGs"
359 for p
in $
(seq 1 $pools)
361 if [ $p != $POOLNUM1 -a $p != $POOLNUM2 ];
363 delete_pool
${poolprefix}$p
367 ceph osd pool
set $pool1 size
1
368 ceph osd pool
set $pool2 size
1
370 wait_for_clean ||
return 1
372 dd if=/dev
/urandom of
=$dir/datafile bs
=1024 count
=4
373 for i
in $
(seq 1 $objects)
375 rados
-p $pool1 put obj
$i $dir/datafile
376 rados
-p $pool2 put obj
$i $dir/datafile
379 ceph osd pool
set $pool1 size
2
380 ceph osd pool
set $pool2 size
2
383 wait_for_not_backfilling
240 ||
return 1
384 wait_for_not_activating
60 ||
return 1
387 if [ "$(ceph pg dump pgs | grep +backfill_toofull | wc -l)" != "1" ];
389 echo "One pool should have been in backfill_toofull"
390 ERRORS
="$(expr $ERRORS + 1)"
393 if [ "$(ceph pg dump pgs | grep active+clean | wc -l)" != "1" ];
395 echo "One didn't finish backfill"
396 ERRORS
="$(expr $ERRORS + 1)"
401 if [ $ERRORS != "0" ];
408 kill_daemons
$dir ||
return 1
409 ! grep -q "num_bytes mismatch" $dir/osd.
*.log ||
return 1
412 # 2 pools can't both backfill to a target which has other data
413 # 1 of the pools has objects that increase from 1024 to 2611 bytes
415 # Write to fill pool which is size 1
416 # Take fill pool osd down (other 2 pools must go to the remaining OSDs
417 # Save an export of data on fill OSD and restart it
418 # Write an intial 1K to pool1 which has pg 2.0
419 # Export 2.0 from non-fillpool OSD don't wait for it to start-up
420 # Take down fillpool OSD
421 # Put 1K object version of 2.0 on fillpool OSD
422 # Put back fillpool data on fillpool OSD
423 # With fillpool down write 2611 byte objects
424 # Take down $osd and bring back $fillosd simultaneously
425 # Wait for backfilling
426 # One PG will be able to backfill its remaining data
427 # One PG must get backfill_toofull
428 function TEST_backfill_multi_partial
() {
434 run_mon
$dir a ||
return 1
435 run_mgr
$dir x ||
return 1
438 for osd
in $
(seq 0 $
(expr $OSDS - 1))
440 run_osd
$dir $osd ||
return 1
443 ceph osd set-backfillfull-ratio
.85
445 ceph osd set-require-min-compat-client luminous
446 create_pool fillpool
1 1
447 ceph osd pool
set fillpool size
1
448 for p
in $
(seq 1 $pools)
450 create_pool
"${poolprefix}$p" 1 1
451 ceph osd pool
set "${poolprefix}$p" size
2
454 wait_for_clean ||
return 1
456 # Partially fill an osd
457 # We have room for 600 6K replicated objects, if we create 2611 byte objects
458 # there is 3600K - (2611 * 600) = 2070K, so the fill pool and one
459 # replica from the other 2 is 85% of 3600K
461 dd if=/dev
/urandom of
=$dir/datafile bs
=2611 count
=1
462 for o
in $
(seq 1 $objects)
464 rados
-p fillpool put obj-fill-
${o} $dir/datafile
467 local fillosd
=$
(get_primary fillpool obj-fill-1
)
468 osd
=$
(expr $fillosd + 1)
469 if [ "$osd" = "$OSDS" ]; then
474 kill $
(cat $dir/osd.
$fillosd.pid
)
475 ceph osd out osd.
$fillosd
478 _objectstore_tool_nodown
$dir $fillosd --op export-remove
--pgid 1.0 --file $dir/fillexport.out ||
return 1
479 activate_osd
$dir $fillosd ||
return 1
483 dd if=/dev
/urandom of
=$dir/datafile bs
=1024 count
=1
484 for o
in $
(seq 1 $objects)
486 rados
-p "${poolprefix}1" put obj-1-
${o} $dir/datafile
490 # The $osd OSD is started, but we don't wait so we can kill $fillosd at the same time
491 _objectstore_tool_nowait
$dir $osd --op export --pgid 2.0 --file $dir/export.out
492 kill $
(cat $dir/osd.
$fillosd.pid
)
494 _objectstore_tool_nodown
$dir $fillosd --force --op remove
--pgid 2.0
495 _objectstore_tool_nodown
$dir $fillosd --op import
--pgid 2.0 --file $dir/export.out ||
return 1
496 _objectstore_tool_nodown
$dir $fillosd --op import
--pgid 1.0 --file $dir/fillexport.out ||
return 1
501 # re-write everything
502 dd if=/dev
/urandom of
=$dir/datafile bs
=2611 count
=1
503 for o
in $
(seq 1 $objects)
505 for p
in $
(seq 1 $pools)
507 rados
-p "${poolprefix}$p" put obj-${p}-${o} $dir/datafile
511 kill $(cat $dir/osd.$osd.pid)
512 ceph osd out osd.$osd
514 activate_osd $dir $fillosd || return 1
515 ceph osd in osd.$fillosd
518 wait_for_not_backfilling 240 || return 1
519 wait_for_not_activating 60 || return 1
521 flush_pg_stats || return 1
525 if [ "$
(get_num_in_state backfill_toofull
)" != "1" ];
527 echo "One PG should be
in backfill_toofull
"
528 ERRORS="$
(expr $ERRORS + 1)"
531 if [ "$
(get_num_in_state active
+clean
)" != "2" ];
533 echo "Two PGs should be active
+clean after one PG completed backfill
"
534 ERRORS="$
(expr $ERRORS + 1)"
537 if [ $ERRORS != "0" ];
543 for i in $(seq 1 $pools)
545 delete_pool "${poolprefix}$i"
547 kill_daemons $dir || return 1
548 ! grep -q "num_bytes mismatch
" $dir/osd.*.log || return 1
551 # Make sure that the amount of bytes already on the replica doesn't
552 # cause an out of space condition
554 # Create 1 pool and write 4K * 600 objects
555 # Remove 25% (150) of the objects with one OSD down (noout set)
556 # Increase the size of the remaining 75% (450) of the objects to 6K
557 # Bring back down OSD
558 # The pool should go active+clean
559 function TEST_backfill_grow() {
561 local poolname="test"
564 run_mon $dir a || return 1
565 run_mgr $dir x || return 1
567 for osd in $(seq 0 $(expr $OSDS - 1))
569 run_osd $dir $osd || return 1
572 ceph osd set-backfillfull-ratio .85
574 create_pool $poolname 1 1
575 ceph osd pool set $poolname size 3
578 wait_for_clean || return 1
580 dd if=/dev/urandom of=${dir}/4kdata bs=1k count=4
581 for i in $(seq 1 $objects)
583 rados -p $poolname put obj$i $dir/4kdata
586 local PG=$(get_pg $poolname obj1)
587 # Remember primary during the backfill
588 local primary=$(get_primary $poolname obj1)
589 local otherosd=$(get_not_primary $poolname obj1)
592 kill_daemons $dir TERM $otherosd || return 1
594 rmobjects=$(expr $objects / 4)
595 for i in $(seq 1 $rmobjects)
597 rados -p $poolname rm obj$i
600 dd if=/dev/urandom of=${dir}/6kdata bs=6k count=1
601 for i in $(seq $(expr $rmobjects + 1) $objects)
603 rados -p $poolname put obj$i $dir/6kdata
606 activate_osd $dir $otherosd || return 1
608 ceph tell osd.$primary debug kick_recovery_wq 0
612 wait_for_clean || return 1
614 delete_pool $poolname
615 kill_daemons $dir || return 1
616 ! grep -q "num_bytes mismatch
" $dir/osd.*.log || return 1
619 # Create a 5 shard EC pool on 6 OSD cluster
620 # Fill 1 OSD with 2600K of data take that osd down.
621 # Write the EC pool on 5 OSDs
622 # Take down 1 (must contain an EC shard)
623 # Bring up OSD with fill data
624 # Not enought room to backfill to partially full OSD
625 function TEST_ec_backfill_simple() {
632 local ecobjects=$(expr $objects / $k)
634 run_mon $dir a || return 1
635 run_mgr $dir x || return 1
638 for osd in $(seq 0 $(expr $OSDS - 1))
640 run_osd $dir $osd || return 1
643 ceph osd set-backfillfull-ratio .85
644 create_pool fillpool 1 1
645 ceph osd pool set fillpool size 1
647 # Partially fill an osd
648 # We have room for 200 18K replicated objects, if we create 13K objects
649 # there is only 3600K - (13K * 200) = 1000K which won't hold
650 # a k=3 shard below ((18K / 3) + 4K) * 200 = 2000K
651 # Actual usage per shard is 8K * 200 = 1600K because 18K/3 is 6K which
652 # rounds to 8K. The 2000K is the ceiling on the 18K * 200 = 3600K logical
654 dd if=/dev/urandom of=$dir/datafile bs=1024 count=13
655 for o in $(seq 1 $ecobjects)
657 rados -p fillpool put obj$o $dir/datafile
660 local fillosd=$(get_primary fillpool obj1)
661 osd=$(expr $fillosd + 1)
662 if [ "$osd" = "$OSDS" ]; then
667 kill $(cat $dir/osd.$fillosd.pid)
668 ceph osd out osd.$fillosd
670 ceph osd erasure-code-profile set ec-profile k=$k m=$m crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1
672 for p in $(seq 1 $pools)
674 ceph osd pool create "${poolprefix}$p" 1 1 erasure ec-profile
677 # Can't wait for clean here because we created a stale pg
678 #wait_for_clean || return 1
683 dd if=/dev/urandom of=$dir/datafile bs=1024 count=18
684 for o in $(seq 1 $ecobjects)
686 for p in $(seq 1 $pools)
688 rados -p "${poolprefix}$p" put obj$o $dir/datafile
692 kill $(cat $dir/osd.$osd.pid)
693 ceph osd out osd.$osd
695 activate_osd $dir $fillosd || return 1
696 ceph osd in osd.$fillosd
701 wait_for_not_backfilling 240 || return 1
702 wait_for_not_activating 60 || return 1
707 if [ "$
(ceph pg dump pgs |
grep -v "^1.0" |
grep +backfill_toofull |
wc -l)" != "1" ]; then
708 echo "One pool should have been
in backfill_toofull
"
709 ERRORS="$
(expr $ERRORS + 1)"
712 if [ $ERRORS != "0" ];
718 for i in $(seq 1 $pools)
720 delete_pool "${poolprefix}$i"
722 kill_daemons $dir || return 1
730 for osd in $(seq 0 $(expr $OSDS - 1))
732 if [ $osd = $excludeosd ];
736 if [ -n "$osds" ]; then
744 # Create a pool with size 1 and fill with data so that only 1 EC shard can fit.
745 # Write data to 2 EC pools mapped to the same OSDs (excluding filled one)
746 # Remap the last OSD to partially full OSD on both pools
747 # The 2 pools should race to backfill.
748 # One pool goes active+clean
749 # The other goes acitve+...+backfill_toofull
750 function TEST_ec_backfill_multi() {
757 local ecobjects=$(expr $objects / $k)
759 run_mon $dir a || return 1
760 run_mgr $dir x || return 1
763 for osd in $(seq 0 $(expr $OSDS - 1))
765 run_osd $dir $osd || return 1
768 # This test requires that shards from 2 different pools
769 # fit on a given OSD, but both will not fix. I'm using
770 # making the fillosd plus 1 shard use 75% of the space,
771 # leaving not enough to be under the 85% set here.
772 ceph osd set-backfillfull-ratio .85
774 ceph osd set-require-min-compat-client luminous
775 create_pool fillpool 1 1
776 ceph osd pool set fillpool size 1
778 # Partially fill an osd
779 # We have room for 200 18K replicated objects, if we create 9K objects
780 # there is only 3600K - (9K * 200) = 1800K which will only hold
781 # one k=3 shard below ((12K / 3) + 4K) * 200 = 1600K
782 # The actual data will be (12K / 3) * 200 = 800K because the extra
783 # is the reservation padding for chunking.
784 dd if=/dev/urandom of=$dir/datafile bs=1024 count=9
785 for o in $(seq 1 $ecobjects)
787 rados -p fillpool put obj$o $dir/datafile
790 local fillosd=$(get_primary fillpool obj1)
791 ceph osd erasure-code-profile set ec-profile k=3 m=2 crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1
793 nonfillosds="$
(osdlist
$OSDS $fillosd)"
795 for p in $(seq 1 $pools)
797 ceph osd pool create "${poolprefix}$p" 1 1 erasure ec-profile
798 ceph osd pg-upmap "$
(expr $p + 1).0" $nonfillosds
801 # Can't wait for clean here because we created a stale pg
802 #wait_for_clean || return 1
807 dd if=/dev/urandom of=$dir/datafile bs=1024 count=12
808 for o in $(seq 1 $ecobjects)
810 for p in $(seq 1 $pools)
812 rados -p "${poolprefix}$p" put obj$o-$p $dir/datafile
818 for p in $(seq 1 $pools)
820 ceph osd pg-upmap $(expr $p + 1).0 ${nonfillosds% *} $fillosd
825 wait_for_not_backfilling 240 || return 1
826 wait_for_not_activating 60 || return 1
831 if [ "$
(ceph pg dump pgs |
grep -v "^1.0" |
grep +backfill_toofull |
wc -l)" != "1" ];
833 echo "One pool should have been
in backfill_toofull
"
834 ERRORS="$
(expr $ERRORS + 1)"
837 if [ "$
(ceph pg dump pgs |
grep -v "^1.0" |
grep active
+clean |
wc -l)" != "1" ];
839 echo "One didn
't finish backfill"
840 ERRORS="$(expr $ERRORS + 1)"
843 if [ $ERRORS != "0" ];
849 for i in $(seq 1 $pools)
851 delete_pool "${poolprefix}$i"
853 kill_daemons $dir || return 1
856 # Similar to TEST_ec_backfill_multi but one of the ec pools
857 # already had some data on the target OSD
859 # Create a pool with size 1 and fill with data so that only 1 EC shard can fit.
860 # Write a small amount of data to 1 EC pool that still includes the filled one
861 # Take down fillosd with noout set
862 # Write data to 2 EC pools mapped to the same OSDs (excluding filled one)
863 # Remap the last OSD to partially full OSD on both pools
864 # The 2 pools should race to backfill.
865 # One pool goes active+clean
866 # The other goes acitve+...+backfill_toofull
867 function SKIP_TEST_ec_backfill_multi_partial() {
874 local ecobjects=$(expr $objects / $k)
875 local lastosd=$(expr $OSDS - 1)
877 run_mon $dir a || return 1
878 run_mgr $dir x || return 1
881 for osd in $(seq 0 $(expr $OSDS - 1))
883 run_osd $dir $osd || return 1
886 # This test requires that shards from 2 different pools
887 # fit on a given OSD, but both will not fix. I'm using
888 # making the fillosd plus 1 shard use 75% of the space,
889 # leaving not enough to be under the 85% set here.
890 ceph osd set-backfillfull-ratio
.85
892 ceph osd set-require-min-compat-client luminous
893 create_pool fillpool
1 1
894 ceph osd pool
set fillpool size
1
896 ceph osd pg-upmap
1.0 $lastosd
898 # Partially fill an osd
899 # We have room for 200 18K replicated objects, if we create 9K objects
900 # there is only 3600K - (9K * 200) = 1800K which will only hold
901 # one k=3 shard below ((12K / 3) + 4K) * 200 = 1600K
902 # The actual data will be (12K / 3) * 200 = 800K because the extra
903 # is the reservation padding for chunking.
904 dd if=/dev
/urandom of
=$dir/datafile bs
=1024 count
=9
905 for o
in $
(seq 1 $ecobjects)
907 rados
-p fillpool put obj
$o $dir/datafile
910 local fillosd
=$
(get_primary fillpool obj1
)
911 ceph osd erasure-code-profile
set ec-profile k
=3 m
=2 crush-failure-domain
=osd technique
=reed_sol_van plugin
=jerasure ||
return 1
913 nonfillosds
="$(osdlist $OSDS $fillosd)"
915 for p
in $
(seq 1 $pools)
917 ceph osd pool create
"${poolprefix}$p" 1 1 erasure ec-profile
918 ceph osd pg-upmap
"$(expr $p + 1).0" $
(seq 0 $lastosd)
921 # Can't wait for clean here because we created a stale pg
922 #wait_for_clean || return 1
927 dd if=/dev
/urandom of
=$dir/datafile bs
=1024 count
=1
928 for o
in $
(seq 1 $ecobjects)
930 rados
-p "${poolprefix}1" put obj
$o-1 $dir/datafile
933 for p
in $
(seq 1 $pools)
935 ceph osd pg-upmap
"$(expr $p + 1).0" $
(seq 0 $
(expr $lastosd - 1))
940 #kill_daemons $dir TERM osd.$lastosd || return 1
942 dd if=/dev
/urandom of
=$dir/datafile bs
=1024 count
=12
943 for o
in $
(seq 1 $ecobjects)
945 for p
in $
(seq 1 $pools)
947 rados
-p "${poolprefix}$p" put obj
$o-$p $dir/datafile
953 # Now backfill lastosd by adding back into the upmap
954 for p
in $
(seq 1 $pools)
956 ceph osd pg-upmap
"$(expr $p + 1).0" $
(seq 0 $lastosd)
958 #activate_osd $dir $lastosd || return 1
959 #ceph tell osd.0 debug kick_recovery_wq 0
964 wait_for_not_backfilling
240 ||
return 1
965 wait_for_not_activating
60 ||
return 1
970 if [ "$(ceph pg dump pgs | grep -v "^
1.0" | grep +backfill_toofull | wc -l)" != "1" ];
972 echo "One pool should have been in backfill_toofull"
973 ERRORS
="$(expr $ERRORS + 1)"
976 if [ "$(ceph pg dump pgs | grep -v "^
1.0" | grep active+clean | wc -l)" != "1" ];
978 echo "One didn't finish backfill"
979 ERRORS
="$(expr $ERRORS + 1)"
982 if [ $ERRORS != "0" ];
988 for i
in $
(seq 1 $pools)
990 delete_pool
"${poolprefix}$i"
992 kill_daemons
$dir ||
return 1
995 function SKIP_TEST_ec_backfill_multi_partial
() {
1001 run_mon
$dir a ||
return 1
1002 run_mgr
$dir x ||
return 1
1005 for osd
in $
(seq 0 $
(expr $OSDS - 1))
1007 run_osd
$dir $osd ||
return 1
1010 # Below we need to fit 3200K in 3600K which is 88%
1012 ceph osd set-backfillfull-ratio
.90
1014 ceph osd set-require-min-compat-client luminous
1015 create_pool fillpool
1 1
1016 ceph osd pool
set fillpool size
1
1018 # Partially fill an osd
1019 # We have room for 200 48K ec objects, if we create 4k replicated objects
1020 # there is 3600K - (4K * 200) = 2800K which won't hold 2 k=3 shard
1021 # of 200 12K objects which takes ((12K / 3) + 4K) * 200 = 1600K each.
1022 # On the other OSDs 2 * 1600K = 3200K which is 88% of 3600K.
1023 dd if=/dev
/urandom of
=$dir/datafile bs
=1024 count
=4
1024 for o
in $
(seq 1 $objects)
1026 rados
-p fillpool put obj
$o $dir/datafile
1029 local fillosd
=$
(get_primary fillpool obj1
)
1030 osd
=$
(expr $fillosd + 1)
1031 if [ "$osd" = "$OSDS" ]; then
1036 kill $
(cat $dir/osd.
$fillosd.pid
)
1037 ceph osd out osd.
$fillosd
1039 ceph osd erasure-code-profile
set ec-profile k
=3 m
=2 crush-failure-domain
=osd technique
=reed_sol_van plugin
=jerasure ||
return 1
1041 for p
in $
(seq 1 $pools)
1043 ceph osd pool create
"${poolprefix}$p" 1 1 erasure ec-profile
1046 # Can't wait for clean here because we created a stale pg
1047 #wait_for_clean || return 1
1052 dd if=/dev
/urandom of
=$dir/datafile bs
=1024 count
=12
1053 for o
in $
(seq 1 $objects)
1055 for p
in $
(seq 1 $pools)
1057 rados
-p "${poolprefix}$p" put obj
$o $dir/datafile
1061 #ceph pg map 2.0 --format=json | jq '.'
1062 kill $
(cat $dir/osd.
$osd.pid
)
1063 ceph osd out osd.
$osd
1065 _objectstore_tool_nodown
$dir $osd --op export --pgid 2.0 --file $dir/export.out
1066 _objectstore_tool_nodown
$dir $fillosd --op import
--pgid 2.0 --file $dir/export.out
1068 activate_osd
$dir $fillosd ||
return 1
1069 ceph osd
in osd.
$fillosd
1072 wait_for_not_backfilling
240 ||
return 1
1073 wait_for_not_activating
60 ||
return 1
1076 if [ "$(ceph pg dump pgs | grep -v "^
1.0" | grep +backfill_toofull | wc -l)" != "1" ];
1078 echo "One pool should have been in backfill_toofull"
1079 ERRORS
="$(expr $ERRORS + 1)"
1082 if [ "$(ceph pg dump pgs | grep -v "^
1.0" | grep active+clean | wc -l)" != "1" ];
1084 echo "One didn't finish backfill"
1085 ERRORS
="$(expr $ERRORS + 1)"
1090 if [ $ERRORS != "0" ];
1095 delete_pool fillpool
1096 for i
in $
(seq 1 $pools)
1098 delete_pool
"${poolprefix}$i"
1100 kill_daemons
$dir ||
return 1
1104 # Write 200 12K objects ((12K / 3) + 4K) *200) = 1600K
1105 # Take 1 shard's OSD down (with noout set)
1106 # Remove 50 objects ((12K / 3) + 4k) * 50) = 400K
1107 # Write 150 36K objects (grow 150 objects) 2400K
1108 # But there is already 1600K usage so backfill
1109 # would be too full if it didn't account for existing data
1110 # Bring back down OSD so it must backfill
1111 # It should go active+clean taking into account data already there
1112 function TEST_ec_backfill_grow
() {
1114 local poolname
="test"
1118 local ecobjects
=$
(expr $objects / $k)
1120 run_mon
$dir a ||
return 1
1121 run_mgr
$dir x ||
return 1
1123 for osd
in $
(seq 0 $
(expr $OSDS - 1))
1125 run_osd
$dir $osd ||
return 1
1128 ceph osd set-backfillfull-ratio
.85
1130 ceph osd set-require-min-compat-client luminous
1131 ceph osd erasure-code-profile
set ec-profile k
=$k m
=$m crush-failure-domain
=osd technique
=reed_sol_van plugin
=jerasure ||
return 1
1132 ceph osd pool create
$poolname 1 1 erasure ec-profile
1134 wait_for_clean ||
return 1
1136 dd if=/dev
/urandom of
=${dir}/12kdata bs
=1k count
=12
1137 for i
in $
(seq 1 $ecobjects)
1139 rados
-p $poolname put obj
$i $dir/12kdata
1142 local PG
=$
(get_pg
$poolname obj1
)
1143 # Remember primary during the backfill
1144 local primary
=$
(get_primary
$poolname obj1
)
1145 local otherosd
=$
(get_not_primary
$poolname obj1
)
1148 kill_daemons
$dir TERM
$otherosd ||
return 1
1150 rmobjects
=$
(expr $ecobjects / 4)
1151 for i
in $
(seq 1 $rmobjects)
1153 rados
-p $poolname rm obj
$i
1156 dd if=/dev
/urandom of
=${dir}/36kdata bs
=1k count
=36
1157 for i
in $
(seq $
(expr $rmobjects + 1) $ecobjects)
1159 rados
-p $poolname put obj
$i $dir/36kdata
1162 activate_osd
$dir $otherosd ||
return 1
1164 ceph tell osd.
$primary debug kick_recovery_wq
0
1168 wait_for_clean ||
return 1
1170 delete_pool
$poolname
1171 kill_daemons
$dir ||
return 1
1174 main osd-backfill-space
"$@"
1177 # compile-command: "make -j4 && ../qa/run-standalone.sh osd-backfill-space.sh"