]> git.proxmox.com Git - ceph.git/blob - ceph/qa/standalone/osd/osd-backfill-space.sh
import new upstream nautilus stable release 14.2.8
[ceph.git] / ceph / qa / standalone / osd / osd-backfill-space.sh
1 #!/usr/bin/env bash
2 #
3 # Copyright (C) 2018 Red Hat <contact@redhat.com>
4 #
5 # Author: David Zafman <dzafman@redhat.com>
6 #
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU Library Public License as published by
9 # the Free Software Foundation; either version 2, or (at your option)
10 # any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU Library Public License for more details.
16 #
17
18 source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
19
20 function run() {
21 local dir=$1
22 shift
23
24 export CEPH_MON="127.0.0.1:7180" # git grep '\<7180\>' : there must be only one
25 export CEPH_ARGS
26 CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
27 CEPH_ARGS+="--mon-host=$CEPH_MON "
28 CEPH_ARGS+="--osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10 "
29 CEPH_ARGS+="--fake_statfs_for_testing=3686400 "
30 CEPH_ARGS+="--osd_max_backfills=10 "
31 export objects=600
32 export poolprefix=test
33
34 local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
35 for func in $funcs ; do
36 setup $dir || return 1
37 $func $dir || return 1
38 teardown $dir || return 1
39 done
40 }
41
42
43 function get_num_in_state() {
44 local state=$1
45 local expression
46 expression+="select(contains(\"${state}\"))"
47 ceph --format json pg dump pgs 2>/dev/null | \
48 jq ".pg_stats | [.[] | .state | $expression] | length"
49 }
50
51
52 function wait_for_not_state() {
53 local state=$1
54 local num_in_state=-1
55 local cur_in_state
56 local -a delays=($(get_timeout_delays $2 5))
57 local -i loop=0
58
59 flush_pg_stats || return 1
60 while test $(get_num_pgs) == 0 ; do
61 sleep 1
62 done
63
64 while true ; do
65 cur_in_state=$(get_num_in_state ${state})
66 test $cur_in_state = "0" && break
67 if test $cur_in_state != $num_in_state ; then
68 loop=0
69 num_in_state=$cur_in_state
70 elif (( $loop >= ${#delays[*]} )) ; then
71 ceph pg dump pgs
72 return 1
73 fi
74 sleep ${delays[$loop]}
75 loop+=1
76 done
77 return 0
78 }
79
80
81 function wait_for_not_backfilling() {
82 local timeout=$1
83 wait_for_not_state backfilling $timeout
84 }
85
86
87 function wait_for_not_activating() {
88 local timeout=$1
89 wait_for_not_state activating $timeout
90 }
91
92 # All tests are created in an environment which has fake total space
93 # of 3600K (3686400) which can hold 600 6K replicated objects or
94 # 200 18K shards of erasure coded objects. For a k=3, m=2 EC pool
95 # we have a theoretical 54K object but with the chunk size of 4K
96 # and a rounding of 4K to account for the chunks is 36K max object
97 # which is ((36K / 3) + 4K) * 200 = 3200K which is 88% of
98 # 3600K for a shard.
99
100 # Create 2 pools with size 1
101 # Write enough data that only 1 pool pg can fit per osd
102 # Incresase the pool size to 2
103 # On 3 OSDs this should result in 1 OSD with overlapping replicas,
104 # so both pools can't fit. We assume pgid 1.0 and 2.0 won't
105 # map to the same 2 OSDs.
106 # At least 1 pool shouldn't have room to backfill
107 # All other pools should go active+clean
108 function TEST_backfill_test_simple() {
109 local dir=$1
110 local pools=2
111 local OSDS=3
112
113 run_mon $dir a || return 1
114 run_mgr $dir x || return 1
115 export CEPH_ARGS
116
117 for osd in $(seq 0 $(expr $OSDS - 1))
118 do
119 run_osd $dir $osd || return 1
120 done
121
122 ceph osd set-backfillfull-ratio .85
123
124 for p in $(seq 1 $pools)
125 do
126 create_pool "${poolprefix}$p" 1 1
127 ceph osd pool set "${poolprefix}$p" size 1
128 done
129
130 wait_for_clean || return 1
131
132 # This won't work is if the 2 pools primary and only osds
133 # are the same.
134
135 dd if=/dev/urandom of=$dir/datafile bs=1024 count=4
136 for o in $(seq 1 $objects)
137 do
138 for p in $(seq 1 $pools)
139 do
140 rados -p "${poolprefix}$p" put obj$o $dir/datafile
141 done
142 done
143
144 ceph pg dump pgs
145
146 for p in $(seq 1 $pools)
147 do
148 ceph osd pool set "${poolprefix}$p" size 2
149 done
150 sleep 5
151
152 wait_for_not_backfilling 240 || return 1
153 wait_for_not_activating 60 || return 1
154
155 ERRORS=0
156 if [ "$(ceph pg dump pgs | grep +backfill_toofull | wc -l)" != "1" ];
157 then
158 echo "One pool should have been in backfill_toofull"
159 ERRORS="$(expr $ERRORS + 1)"
160 fi
161
162 expected="$(expr $pools - 1)"
163 if [ "$(ceph pg dump pgs | grep active+clean | wc -l)" != "$expected" ];
164 then
165 echo "$expected didn't finish backfill"
166 ERRORS="$(expr $ERRORS + 1)"
167 fi
168
169 ceph pg dump pgs
170
171 if [ $ERRORS != "0" ];
172 then
173 return 1
174 fi
175
176 for i in $(seq 1 $pools)
177 do
178 delete_pool "${poolprefix}$i"
179 done
180 kill_daemons $dir || return 1
181 ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1
182 }
183
184
185 # Create 8 pools of size 1 on 20 OSDs
186 # Write 4K * 600 objects (only 1 pool pg can fit on any given osd)
187 # Increase pool size to 2
188 # At least 1 pool shouldn't have room to backfill
189 # All other pools should go active+clean
190 function TEST_backfill_test_multi() {
191 local dir=$1
192 local pools=8
193 local OSDS=20
194
195 run_mon $dir a || return 1
196 run_mgr $dir x || return 1
197 export CEPH_ARGS
198
199 for osd in $(seq 0 $(expr $OSDS - 1))
200 do
201 run_osd $dir $osd || return 1
202 done
203
204 ceph osd set-backfillfull-ratio .85
205
206 for p in $(seq 1 $pools)
207 do
208 create_pool "${poolprefix}$p" 1 1
209 ceph osd pool set "${poolprefix}$p" size 1
210 done
211
212 wait_for_clean || return 1
213
214 dd if=/dev/urandom of=$dir/datafile bs=1024 count=4
215 for o in $(seq 1 $objects)
216 do
217 for p in $(seq 1 $pools)
218 do
219 rados -p "${poolprefix}$p" put obj$o $dir/datafile
220 done
221 done
222
223 ceph pg dump pgs
224
225 for p in $(seq 1 $pools)
226 do
227 ceph osd pool set "${poolprefix}$p" size 2
228 done
229 sleep 5
230
231 wait_for_not_backfilling 240 || return 1
232 wait_for_not_activating 60 || return 1
233
234 ERRORS=0
235 full="$(ceph pg dump pgs | grep +backfill_toofull | wc -l)"
236 if [ "$full" -lt "1" ];
237 then
238 echo "At least one pool should have been in backfill_toofull"
239 ERRORS="$(expr $ERRORS + 1)"
240 fi
241
242 expected="$(expr $pools - $full)"
243 if [ "$(ceph pg dump pgs | grep active+clean | wc -l)" != "$expected" ];
244 then
245 echo "$expected didn't finish backfill"
246 ERRORS="$(expr $ERRORS + 1)"
247 fi
248
249 ceph pg dump pgs
250 ceph status
251
252 ceph status --format=json-pretty > $dir/stat.json
253
254 eval SEV=$(jq '.health.checks.PG_BACKFILL_FULL.severity' $dir/stat.json)
255 if [ "$SEV" != "HEALTH_WARN" ]; then
256 echo "PG_BACKFILL_FULL severity $SEV not HEALTH_WARN"
257 ERRORS="$(expr $ERRORS + 1)"
258 fi
259 eval MSG=$(jq '.health.checks.PG_BACKFILL_FULL.summary.message' $dir/stat.json)
260 if [ "$MSG" != "Low space hindering backfill (add storage if this doesn't resolve itself): 4 pgs backfill_toofull" ]; then
261 echo "PG_BACKFILL_FULL message '$MSG' mismatched"
262 ERRORS="$(expr $ERRORS + 1)"
263 fi
264 rm -f $dir/stat.json
265
266 if [ $ERRORS != "0" ];
267 then
268 return 1
269 fi
270
271 for i in $(seq 1 $pools)
272 do
273 delete_pool "${poolprefix}$i"
274 done
275 # Work around for http://tracker.ceph.com/issues/38195
276 kill_daemons $dir #|| return 1
277 ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1
278 }
279
280
281 # To make sure that when 2 pg try to backfill at the same time to
282 # the same target. This might be covered by the simple test above
283 # but this makes sure we get it.
284 #
285 # Create 10 pools of size 2 and identify 2 that have the same
286 # non-primary osd.
287 # Delete all other pools
288 # Set size to 1 and write 4K * 600 to each pool
289 # Set size back to 2
290 # The 2 pools should race to backfill.
291 # One pool goes active+clean
292 # The other goes acitve+...+backfill_toofull
293 function TEST_backfill_test_sametarget() {
294 local dir=$1
295 local pools=10
296 local OSDS=5
297
298 run_mon $dir a || return 1
299 run_mgr $dir x || return 1
300 export CEPH_ARGS
301
302 for osd in $(seq 0 $(expr $OSDS - 1))
303 do
304 run_osd $dir $osd || return 1
305 done
306
307 ceph osd set-backfillfull-ratio .85
308
309 for p in $(seq 1 $pools)
310 do
311 create_pool "${poolprefix}$p" 1 1
312 ceph osd pool set "${poolprefix}$p" size 2
313 done
314 sleep 5
315
316 wait_for_clean || return 1
317
318 ceph pg dump pgs
319
320 # Find 2 pools with a pg that distinct primaries but second
321 # replica on the same osd.
322 local PG1
323 local POOLNUM1
324 local pool1
325 local chk_osd1
326 local chk_osd2
327
328 local PG2
329 local POOLNUM2
330 local pool2
331 for p in $(seq 1 $pools)
332 do
333 ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting
334 local test_osd1=$(head -1 $dir/acting)
335 local test_osd2=$(tail -1 $dir/acting)
336 if [ $p = "1" ];
337 then
338 PG1="${p}.0"
339 POOLNUM1=$p
340 pool1="${poolprefix}$p"
341 chk_osd1=$test_osd1
342 chk_osd2=$test_osd2
343 elif [ $chk_osd1 != $test_osd1 -a $chk_osd2 = $test_osd2 ];
344 then
345 PG2="${p}.0"
346 POOLNUM2=$p
347 pool2="${poolprefix}$p"
348 break
349 fi
350 done
351 rm -f $dir/acting
352
353 if [ "$pool2" = "" ];
354 then
355 echo "Failure to find appropirate PGs"
356 return 1
357 fi
358
359 for p in $(seq 1 $pools)
360 do
361 if [ $p != $POOLNUM1 -a $p != $POOLNUM2 ];
362 then
363 delete_pool ${poolprefix}$p
364 fi
365 done
366
367 ceph osd pool set $pool1 size 1
368 ceph osd pool set $pool2 size 1
369
370 wait_for_clean || return 1
371
372 dd if=/dev/urandom of=$dir/datafile bs=1024 count=4
373 for i in $(seq 1 $objects)
374 do
375 rados -p $pool1 put obj$i $dir/datafile
376 rados -p $pool2 put obj$i $dir/datafile
377 done
378
379 ceph osd pool set $pool1 size 2
380 ceph osd pool set $pool2 size 2
381 sleep 5
382
383 wait_for_not_backfilling 240 || return 1
384 wait_for_not_activating 60 || return 1
385
386 ERRORS=0
387 if [ "$(ceph pg dump pgs | grep +backfill_toofull | wc -l)" != "1" ];
388 then
389 echo "One pool should have been in backfill_toofull"
390 ERRORS="$(expr $ERRORS + 1)"
391 fi
392
393 if [ "$(ceph pg dump pgs | grep active+clean | wc -l)" != "1" ];
394 then
395 echo "One didn't finish backfill"
396 ERRORS="$(expr $ERRORS + 1)"
397 fi
398
399 ceph pg dump pgs
400
401 if [ $ERRORS != "0" ];
402 then
403 return 1
404 fi
405
406 delete_pool $pool1
407 delete_pool $pool2
408 kill_daemons $dir || return 1
409 ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1
410 }
411
412 # 2 pools can't both backfill to a target which has other data
413 # 1 of the pools has objects that increase from 1024 to 2611 bytes
414 #
415 # Write to fill pool which is size 1
416 # Take fill pool osd down (other 2 pools must go to the remaining OSDs
417 # Save an export of data on fill OSD and restart it
418 # Write an intial 1K to pool1 which has pg 2.0
419 # Export 2.0 from non-fillpool OSD don't wait for it to start-up
420 # Take down fillpool OSD
421 # Put 1K object version of 2.0 on fillpool OSD
422 # Put back fillpool data on fillpool OSD
423 # With fillpool down write 2611 byte objects
424 # Take down $osd and bring back $fillosd simultaneously
425 # Wait for backfilling
426 # One PG will be able to backfill its remaining data
427 # One PG must get backfill_toofull
428 function TEST_backfill_multi_partial() {
429 local dir=$1
430 local EC=$2
431 local pools=2
432 local OSDS=3
433
434 run_mon $dir a || return 1
435 run_mgr $dir x || return 1
436 export CEPH_ARGS
437
438 for osd in $(seq 0 $(expr $OSDS - 1))
439 do
440 run_osd $dir $osd || return 1
441 done
442
443 ceph osd set-backfillfull-ratio .85
444
445 ceph osd set-require-min-compat-client luminous
446 create_pool fillpool 1 1
447 ceph osd pool set fillpool size 1
448 for p in $(seq 1 $pools)
449 do
450 create_pool "${poolprefix}$p" 1 1
451 ceph osd pool set "${poolprefix}$p" size 2
452 done
453
454 wait_for_clean || return 1
455
456 # Partially fill an osd
457 # We have room for 600 6K replicated objects, if we create 2611 byte objects
458 # there is 3600K - (2611 * 600) = 2070K, so the fill pool and one
459 # replica from the other 2 is 85% of 3600K
460
461 dd if=/dev/urandom of=$dir/datafile bs=2611 count=1
462 for o in $(seq 1 $objects)
463 do
464 rados -p fillpool put obj-fill-${o} $dir/datafile
465 done
466
467 local fillosd=$(get_primary fillpool obj-fill-1)
468 osd=$(expr $fillosd + 1)
469 if [ "$osd" = "$OSDS" ]; then
470 osd="0"
471 fi
472
473 sleep 5
474 kill $(cat $dir/osd.$fillosd.pid)
475 ceph osd out osd.$fillosd
476 sleep 2
477
478 _objectstore_tool_nodown $dir $fillosd --op export-remove --pgid 1.0 --file $dir/fillexport.out || return 1
479 activate_osd $dir $fillosd || return 1
480
481 ceph pg dump pgs
482
483 dd if=/dev/urandom of=$dir/datafile bs=1024 count=1
484 for o in $(seq 1 $objects)
485 do
486 rados -p "${poolprefix}1" put obj-1-${o} $dir/datafile
487 done
488
489 ceph pg dump pgs
490 # The $osd OSD is started, but we don't wait so we can kill $fillosd at the same time
491 _objectstore_tool_nowait $dir $osd --op export --pgid 2.0 --file $dir/export.out
492 kill $(cat $dir/osd.$fillosd.pid)
493 sleep 5
494 _objectstore_tool_nodown $dir $fillosd --force --op remove --pgid 2.0
495 _objectstore_tool_nodown $dir $fillosd --op import --pgid 2.0 --file $dir/export.out || return 1
496 _objectstore_tool_nodown $dir $fillosd --op import --pgid 1.0 --file $dir/fillexport.out || return 1
497 ceph pg dump pgs
498 sleep 20
499 ceph pg dump pgs
500
501 # re-write everything
502 dd if=/dev/urandom of=$dir/datafile bs=2611 count=1
503 for o in $(seq 1 $objects)
504 do
505 for p in $(seq 1 $pools)
506 do
507 rados -p "${poolprefix}$p" put obj-${p}-${o} $dir/datafile
508 done
509 done
510
511 kill $(cat $dir/osd.$osd.pid)
512 ceph osd out osd.$osd
513
514 activate_osd $dir $fillosd || return 1
515 ceph osd in osd.$fillosd
516 sleep 15
517
518 wait_for_not_backfilling 240 || return 1
519 wait_for_not_activating 60 || return 1
520
521 flush_pg_stats || return 1
522 ceph pg dump pgs
523
524 ERRORS=0
525 if [ "$(get_num_in_state backfill_toofull)" != "1" ];
526 then
527 echo "One PG should be in backfill_toofull"
528 ERRORS="$(expr $ERRORS + 1)"
529 fi
530
531 if [ "$(get_num_in_state active+clean)" != "2" ];
532 then
533 echo "Two PGs should be active+clean after one PG completed backfill"
534 ERRORS="$(expr $ERRORS + 1)"
535 fi
536
537 if [ $ERRORS != "0" ];
538 then
539 return 1
540 fi
541
542 delete_pool fillpool
543 for i in $(seq 1 $pools)
544 do
545 delete_pool "${poolprefix}$i"
546 done
547 kill_daemons $dir || return 1
548 ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1
549 }
550
551 # Make sure that the amount of bytes already on the replica doesn't
552 # cause an out of space condition
553 #
554 # Create 1 pool and write 4K * 600 objects
555 # Remove 25% (150) of the objects with one OSD down (noout set)
556 # Increase the size of the remaining 75% (450) of the objects to 6K
557 # Bring back down OSD
558 # The pool should go active+clean
559 function TEST_backfill_grow() {
560 local dir=$1
561 local poolname="test"
562 local OSDS=3
563
564 run_mon $dir a || return 1
565 run_mgr $dir x || return 1
566
567 for osd in $(seq 0 $(expr $OSDS - 1))
568 do
569 run_osd $dir $osd || return 1
570 done
571
572 ceph osd set-backfillfull-ratio .85
573
574 create_pool $poolname 1 1
575 ceph osd pool set $poolname size 3
576 sleep 5
577
578 wait_for_clean || return 1
579
580 dd if=/dev/urandom of=${dir}/4kdata bs=1k count=4
581 for i in $(seq 1 $objects)
582 do
583 rados -p $poolname put obj$i $dir/4kdata
584 done
585
586 local PG=$(get_pg $poolname obj1)
587 # Remember primary during the backfill
588 local primary=$(get_primary $poolname obj1)
589 local otherosd=$(get_not_primary $poolname obj1)
590
591 ceph osd set noout
592 kill_daemons $dir TERM $otherosd || return 1
593
594 rmobjects=$(expr $objects / 4)
595 for i in $(seq 1 $rmobjects)
596 do
597 rados -p $poolname rm obj$i
598 done
599
600 dd if=/dev/urandom of=${dir}/6kdata bs=6k count=1
601 for i in $(seq $(expr $rmobjects + 1) $objects)
602 do
603 rados -p $poolname put obj$i $dir/6kdata
604 done
605
606 activate_osd $dir $otherosd || return 1
607
608 ceph tell osd.$primary debug kick_recovery_wq 0
609
610 sleep 2
611
612 wait_for_clean || return 1
613
614 delete_pool $poolname
615 kill_daemons $dir || return 1
616 ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1
617 }
618
619 # Create a 5 shard EC pool on 6 OSD cluster
620 # Fill 1 OSD with 2600K of data take that osd down.
621 # Write the EC pool on 5 OSDs
622 # Take down 1 (must contain an EC shard)
623 # Bring up OSD with fill data
624 # Not enought room to backfill to partially full OSD
625 function TEST_ec_backfill_simple() {
626 local dir=$1
627 local EC=$2
628 local pools=1
629 local OSDS=6
630 local k=3
631 local m=2
632 local ecobjects=$(expr $objects / $k)
633
634 run_mon $dir a || return 1
635 run_mgr $dir x || return 1
636 export CEPH_ARGS
637
638 for osd in $(seq 0 $(expr $OSDS - 1))
639 do
640 run_osd $dir $osd || return 1
641 done
642
643 ceph osd set-backfillfull-ratio .85
644 create_pool fillpool 1 1
645 ceph osd pool set fillpool size 1
646
647 # Partially fill an osd
648 # We have room for 200 18K replicated objects, if we create 13K objects
649 # there is only 3600K - (13K * 200) = 1000K which won't hold
650 # a k=3 shard below ((18K / 3) + 4K) * 200 = 2000K
651 # Actual usage per shard is 8K * 200 = 1600K because 18K/3 is 6K which
652 # rounds to 8K. The 2000K is the ceiling on the 18K * 200 = 3600K logical
653 # bytes in the pool.
654 dd if=/dev/urandom of=$dir/datafile bs=1024 count=13
655 for o in $(seq 1 $ecobjects)
656 do
657 rados -p fillpool put obj$o $dir/datafile
658 done
659
660 local fillosd=$(get_primary fillpool obj1)
661 osd=$(expr $fillosd + 1)
662 if [ "$osd" = "$OSDS" ]; then
663 osd="0"
664 fi
665
666 sleep 5
667 kill $(cat $dir/osd.$fillosd.pid)
668 ceph osd out osd.$fillosd
669 sleep 2
670 ceph osd erasure-code-profile set ec-profile k=$k m=$m crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1
671
672 for p in $(seq 1 $pools)
673 do
674 ceph osd pool create "${poolprefix}$p" 1 1 erasure ec-profile
675 done
676
677 # Can't wait for clean here because we created a stale pg
678 #wait_for_clean || return 1
679 sleep 5
680
681 ceph pg dump pgs
682
683 dd if=/dev/urandom of=$dir/datafile bs=1024 count=18
684 for o in $(seq 1 $ecobjects)
685 do
686 for p in $(seq 1 $pools)
687 do
688 rados -p "${poolprefix}$p" put obj$o $dir/datafile
689 done
690 done
691
692 kill $(cat $dir/osd.$osd.pid)
693 ceph osd out osd.$osd
694
695 activate_osd $dir $fillosd || return 1
696 ceph osd in osd.$fillosd
697 sleep 30
698
699 ceph pg dump pgs
700
701 wait_for_not_backfilling 240 || return 1
702 wait_for_not_activating 60 || return 1
703
704 ceph pg dump pgs
705
706 ERRORS=0
707 if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep +backfill_toofull | wc -l)" != "1" ]; then
708 echo "One pool should have been in backfill_toofull"
709 ERRORS="$(expr $ERRORS + 1)"
710 fi
711
712 if [ $ERRORS != "0" ];
713 then
714 return 1
715 fi
716
717 delete_pool fillpool
718 for i in $(seq 1 $pools)
719 do
720 delete_pool "${poolprefix}$i"
721 done
722 kill_daemons $dir || return 1
723 }
724
725 function osdlist() {
726 local OSDS=$1
727 local excludeosd=$2
728
729 osds=""
730 for osd in $(seq 0 $(expr $OSDS - 1))
731 do
732 if [ $osd = $excludeosd ];
733 then
734 continue
735 fi
736 if [ -n "$osds" ]; then
737 osds="${osds} "
738 fi
739 osds="${osds}${osd}"
740 done
741 echo $osds
742 }
743
744 # Create a pool with size 1 and fill with data so that only 1 EC shard can fit.
745 # Write data to 2 EC pools mapped to the same OSDs (excluding filled one)
746 # Remap the last OSD to partially full OSD on both pools
747 # The 2 pools should race to backfill.
748 # One pool goes active+clean
749 # The other goes acitve+...+backfill_toofull
750 function TEST_ec_backfill_multi() {
751 local dir=$1
752 local EC=$2
753 local pools=2
754 local OSDS=6
755 local k=3
756 local m=2
757 local ecobjects=$(expr $objects / $k)
758
759 run_mon $dir a || return 1
760 run_mgr $dir x || return 1
761 export CEPH_ARGS
762
763 for osd in $(seq 0 $(expr $OSDS - 1))
764 do
765 run_osd $dir $osd || return 1
766 done
767
768 # This test requires that shards from 2 different pools
769 # fit on a given OSD, but both will not fix. I'm using
770 # making the fillosd plus 1 shard use 75% of the space,
771 # leaving not enough to be under the 85% set here.
772 ceph osd set-backfillfull-ratio .85
773
774 ceph osd set-require-min-compat-client luminous
775 create_pool fillpool 1 1
776 ceph osd pool set fillpool size 1
777
778 # Partially fill an osd
779 # We have room for 200 18K replicated objects, if we create 9K objects
780 # there is only 3600K - (9K * 200) = 1800K which will only hold
781 # one k=3 shard below ((12K / 3) + 4K) * 200 = 1600K
782 # The actual data will be (12K / 3) * 200 = 800K because the extra
783 # is the reservation padding for chunking.
784 dd if=/dev/urandom of=$dir/datafile bs=1024 count=9
785 for o in $(seq 1 $ecobjects)
786 do
787 rados -p fillpool put obj$o $dir/datafile
788 done
789
790 local fillosd=$(get_primary fillpool obj1)
791 ceph osd erasure-code-profile set ec-profile k=3 m=2 crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1
792
793 nonfillosds="$(osdlist $OSDS $fillosd)"
794
795 for p in $(seq 1 $pools)
796 do
797 ceph osd pool create "${poolprefix}$p" 1 1 erasure ec-profile
798 ceph osd pg-upmap "$(expr $p + 1).0" $nonfillosds
799 done
800
801 # Can't wait for clean here because we created a stale pg
802 #wait_for_clean || return 1
803 sleep 15
804
805 ceph pg dump pgs
806
807 dd if=/dev/urandom of=$dir/datafile bs=1024 count=12
808 for o in $(seq 1 $ecobjects)
809 do
810 for p in $(seq 1 $pools)
811 do
812 rados -p "${poolprefix}$p" put obj$o-$p $dir/datafile
813 done
814 done
815
816 ceph pg dump pgs
817
818 for p in $(seq 1 $pools)
819 do
820 ceph osd pg-upmap $(expr $p + 1).0 ${nonfillosds% *} $fillosd
821 done
822
823 sleep 10
824
825 wait_for_not_backfilling 240 || return 1
826 wait_for_not_activating 60 || return 1
827
828 ceph pg dump pgs
829
830 ERRORS=0
831 if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep +backfill_toofull | wc -l)" != "1" ];
832 then
833 echo "One pool should have been in backfill_toofull"
834 ERRORS="$(expr $ERRORS + 1)"
835 fi
836
837 if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep active+clean | wc -l)" != "1" ];
838 then
839 echo "One didn't finish backfill"
840 ERRORS="$(expr $ERRORS + 1)"
841 fi
842
843 if [ $ERRORS != "0" ];
844 then
845 return 1
846 fi
847
848 delete_pool fillpool
849 for i in $(seq 1 $pools)
850 do
851 delete_pool "${poolprefix}$i"
852 done
853 kill_daemons $dir || return 1
854 }
855
856 # Similar to TEST_ec_backfill_multi but one of the ec pools
857 # already had some data on the target OSD
858
859 # Create a pool with size 1 and fill with data so that only 1 EC shard can fit.
860 # Write a small amount of data to 1 EC pool that still includes the filled one
861 # Take down fillosd with noout set
862 # Write data to 2 EC pools mapped to the same OSDs (excluding filled one)
863 # Remap the last OSD to partially full OSD on both pools
864 # The 2 pools should race to backfill.
865 # One pool goes active+clean
866 # The other goes acitve+...+backfill_toofull
867 function SKIP_TEST_ec_backfill_multi_partial() {
868 local dir=$1
869 local EC=$2
870 local pools=2
871 local OSDS=5
872 local k=3
873 local m=2
874 local ecobjects=$(expr $objects / $k)
875 local lastosd=$(expr $OSDS - 1)
876
877 run_mon $dir a || return 1
878 run_mgr $dir x || return 1
879 export CEPH_ARGS
880
881 for osd in $(seq 0 $(expr $OSDS - 1))
882 do
883 run_osd $dir $osd || return 1
884 done
885
886 # This test requires that shards from 2 different pools
887 # fit on a given OSD, but both will not fix. I'm using
888 # making the fillosd plus 1 shard use 75% of the space,
889 # leaving not enough to be under the 85% set here.
890 ceph osd set-backfillfull-ratio .85
891
892 ceph osd set-require-min-compat-client luminous
893 create_pool fillpool 1 1
894 ceph osd pool set fillpool size 1
895 # last osd
896 ceph osd pg-upmap 1.0 $lastosd
897
898 # Partially fill an osd
899 # We have room for 200 18K replicated objects, if we create 9K objects
900 # there is only 3600K - (9K * 200) = 1800K which will only hold
901 # one k=3 shard below ((12K / 3) + 4K) * 200 = 1600K
902 # The actual data will be (12K / 3) * 200 = 800K because the extra
903 # is the reservation padding for chunking.
904 dd if=/dev/urandom of=$dir/datafile bs=1024 count=9
905 for o in $(seq 1 $ecobjects)
906 do
907 rados -p fillpool put obj$o $dir/datafile
908 done
909
910 local fillosd=$(get_primary fillpool obj1)
911 ceph osd erasure-code-profile set ec-profile k=3 m=2 crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1
912
913 nonfillosds="$(osdlist $OSDS $fillosd)"
914
915 for p in $(seq 1 $pools)
916 do
917 ceph osd pool create "${poolprefix}$p" 1 1 erasure ec-profile
918 ceph osd pg-upmap "$(expr $p + 1).0" $(seq 0 $lastosd)
919 done
920
921 # Can't wait for clean here because we created a stale pg
922 #wait_for_clean || return 1
923 sleep 15
924
925 ceph pg dump pgs
926
927 dd if=/dev/urandom of=$dir/datafile bs=1024 count=1
928 for o in $(seq 1 $ecobjects)
929 do
930 rados -p "${poolprefix}1" put obj$o-1 $dir/datafile
931 done
932
933 for p in $(seq 1 $pools)
934 do
935 ceph osd pg-upmap "$(expr $p + 1).0" $(seq 0 $(expr $lastosd - 1))
936 done
937 ceph pg dump pgs
938
939 #ceph osd set noout
940 #kill_daemons $dir TERM osd.$lastosd || return 1
941
942 dd if=/dev/urandom of=$dir/datafile bs=1024 count=12
943 for o in $(seq 1 $ecobjects)
944 do
945 for p in $(seq 1 $pools)
946 do
947 rados -p "${poolprefix}$p" put obj$o-$p $dir/datafile
948 done
949 done
950
951 ceph pg dump pgs
952
953 # Now backfill lastosd by adding back into the upmap
954 for p in $(seq 1 $pools)
955 do
956 ceph osd pg-upmap "$(expr $p + 1).0" $(seq 0 $lastosd)
957 done
958 #activate_osd $dir $lastosd || return 1
959 #ceph tell osd.0 debug kick_recovery_wq 0
960
961 sleep 10
962 ceph pg dump pgs
963
964 wait_for_not_backfilling 240 || return 1
965 wait_for_not_activating 60 || return 1
966
967 ceph pg dump pgs
968
969 ERRORS=0
970 if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep +backfill_toofull | wc -l)" != "1" ];
971 then
972 echo "One pool should have been in backfill_toofull"
973 ERRORS="$(expr $ERRORS + 1)"
974 fi
975
976 if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep active+clean | wc -l)" != "1" ];
977 then
978 echo "One didn't finish backfill"
979 ERRORS="$(expr $ERRORS + 1)"
980 fi
981
982 if [ $ERRORS != "0" ];
983 then
984 return 1
985 fi
986
987 delete_pool fillpool
988 for i in $(seq 1 $pools)
989 do
990 delete_pool "${poolprefix}$i"
991 done
992 kill_daemons $dir || return 1
993 }
994
995 function SKIP_TEST_ec_backfill_multi_partial() {
996 local dir=$1
997 local EC=$2
998 local pools=2
999 local OSDS=6
1000
1001 run_mon $dir a || return 1
1002 run_mgr $dir x || return 1
1003 export CEPH_ARGS
1004
1005 for osd in $(seq 0 $(expr $OSDS - 1))
1006 do
1007 run_osd $dir $osd || return 1
1008 done
1009
1010 # Below we need to fit 3200K in 3600K which is 88%
1011 # so set to 90%
1012 ceph osd set-backfillfull-ratio .90
1013
1014 ceph osd set-require-min-compat-client luminous
1015 create_pool fillpool 1 1
1016 ceph osd pool set fillpool size 1
1017
1018 # Partially fill an osd
1019 # We have room for 200 48K ec objects, if we create 4k replicated objects
1020 # there is 3600K - (4K * 200) = 2800K which won't hold 2 k=3 shard
1021 # of 200 12K objects which takes ((12K / 3) + 4K) * 200 = 1600K each.
1022 # On the other OSDs 2 * 1600K = 3200K which is 88% of 3600K.
1023 dd if=/dev/urandom of=$dir/datafile bs=1024 count=4
1024 for o in $(seq 1 $objects)
1025 do
1026 rados -p fillpool put obj$o $dir/datafile
1027 done
1028
1029 local fillosd=$(get_primary fillpool obj1)
1030 osd=$(expr $fillosd + 1)
1031 if [ "$osd" = "$OSDS" ]; then
1032 osd="0"
1033 fi
1034
1035 sleep 5
1036 kill $(cat $dir/osd.$fillosd.pid)
1037 ceph osd out osd.$fillosd
1038 sleep 2
1039 ceph osd erasure-code-profile set ec-profile k=3 m=2 crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1
1040
1041 for p in $(seq 1 $pools)
1042 do
1043 ceph osd pool create "${poolprefix}$p" 1 1 erasure ec-profile
1044 done
1045
1046 # Can't wait for clean here because we created a stale pg
1047 #wait_for_clean || return 1
1048 sleep 5
1049
1050 ceph pg dump pgs
1051
1052 dd if=/dev/urandom of=$dir/datafile bs=1024 count=12
1053 for o in $(seq 1 $objects)
1054 do
1055 for p in $(seq 1 $pools)
1056 do
1057 rados -p "${poolprefix}$p" put obj$o $dir/datafile
1058 done
1059 done
1060
1061 #ceph pg map 2.0 --format=json | jq '.'
1062 kill $(cat $dir/osd.$osd.pid)
1063 ceph osd out osd.$osd
1064
1065 _objectstore_tool_nodown $dir $osd --op export --pgid 2.0 --file $dir/export.out
1066 _objectstore_tool_nodown $dir $fillosd --op import --pgid 2.0 --file $dir/export.out
1067
1068 activate_osd $dir $fillosd || return 1
1069 ceph osd in osd.$fillosd
1070 sleep 15
1071
1072 wait_for_not_backfilling 240 || return 1
1073 wait_for_not_activating 60 || return 1
1074
1075 ERRORS=0
1076 if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep +backfill_toofull | wc -l)" != "1" ];
1077 then
1078 echo "One pool should have been in backfill_toofull"
1079 ERRORS="$(expr $ERRORS + 1)"
1080 fi
1081
1082 if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep active+clean | wc -l)" != "1" ];
1083 then
1084 echo "One didn't finish backfill"
1085 ERRORS="$(expr $ERRORS + 1)"
1086 fi
1087
1088 ceph pg dump pgs
1089
1090 if [ $ERRORS != "0" ];
1091 then
1092 return 1
1093 fi
1094
1095 delete_pool fillpool
1096 for i in $(seq 1 $pools)
1097 do
1098 delete_pool "${poolprefix}$i"
1099 done
1100 kill_daemons $dir || return 1
1101 }
1102
1103 # Create 1 EC pool
1104 # Write 200 12K objects ((12K / 3) + 4K) *200) = 1600K
1105 # Take 1 shard's OSD down (with noout set)
1106 # Remove 50 objects ((12K / 3) + 4k) * 50) = 400K
1107 # Write 150 36K objects (grow 150 objects) 2400K
1108 # But there is already 1600K usage so backfill
1109 # would be too full if it didn't account for existing data
1110 # Bring back down OSD so it must backfill
1111 # It should go active+clean taking into account data already there
1112 function TEST_ec_backfill_grow() {
1113 local dir=$1
1114 local poolname="test"
1115 local OSDS=6
1116 local k=3
1117 local m=2
1118 local ecobjects=$(expr $objects / $k)
1119
1120 run_mon $dir a || return 1
1121 run_mgr $dir x || return 1
1122
1123 for osd in $(seq 0 $(expr $OSDS - 1))
1124 do
1125 run_osd $dir $osd || return 1
1126 done
1127
1128 ceph osd set-backfillfull-ratio .85
1129
1130 ceph osd set-require-min-compat-client luminous
1131 ceph osd erasure-code-profile set ec-profile k=$k m=$m crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1
1132 ceph osd pool create $poolname 1 1 erasure ec-profile
1133
1134 wait_for_clean || return 1
1135
1136 dd if=/dev/urandom of=${dir}/12kdata bs=1k count=12
1137 for i in $(seq 1 $ecobjects)
1138 do
1139 rados -p $poolname put obj$i $dir/12kdata
1140 done
1141
1142 local PG=$(get_pg $poolname obj1)
1143 # Remember primary during the backfill
1144 local primary=$(get_primary $poolname obj1)
1145 local otherosd=$(get_not_primary $poolname obj1)
1146
1147 ceph osd set noout
1148 kill_daemons $dir TERM $otherosd || return 1
1149
1150 rmobjects=$(expr $ecobjects / 4)
1151 for i in $(seq 1 $rmobjects)
1152 do
1153 rados -p $poolname rm obj$i
1154 done
1155
1156 dd if=/dev/urandom of=${dir}/36kdata bs=1k count=36
1157 for i in $(seq $(expr $rmobjects + 1) $ecobjects)
1158 do
1159 rados -p $poolname put obj$i $dir/36kdata
1160 done
1161
1162 activate_osd $dir $otherosd || return 1
1163
1164 ceph tell osd.$primary debug kick_recovery_wq 0
1165
1166 sleep 2
1167
1168 wait_for_clean || return 1
1169
1170 delete_pool $poolname
1171 kill_daemons $dir || return 1
1172 }
1173
1174 main osd-backfill-space "$@"
1175
1176 # Local Variables:
1177 # compile-command: "make -j4 && ../qa/run-standalone.sh osd-backfill-space.sh"
1178 # End: