]> git.proxmox.com Git - ceph.git/blob - ceph/qa/standalone/osd/osd-backfill-space.sh
import 14.2.4 nautilus point release
[ceph.git] / ceph / qa / standalone / osd / osd-backfill-space.sh
1 #!/usr/bin/env bash
2 #
3 # Copyright (C) 2018 Red Hat <contact@redhat.com>
4 #
5 # Author: David Zafman <dzafman@redhat.com>
6 #
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU Library Public License as published by
9 # the Free Software Foundation; either version 2, or (at your option)
10 # any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU Library Public License for more details.
16 #
17
18 source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
19
20 function run() {
21 local dir=$1
22 shift
23
24 export CEPH_MON="127.0.0.1:7180" # git grep '\<7180\>' : there must be only one
25 export CEPH_ARGS
26 CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
27 CEPH_ARGS+="--mon-host=$CEPH_MON "
28 CEPH_ARGS+="--osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10 "
29 CEPH_ARGS+="--fake_statfs_for_testing=3686400 "
30 CEPH_ARGS+="--osd_max_backfills=10 "
31 export objects=600
32 export poolprefix=test
33
34 local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
35 for func in $funcs ; do
36 setup $dir || return 1
37 $func $dir || return 1
38 teardown $dir || return 1
39 done
40 }
41
42
43 function get_num_in_state() {
44 local state=$1
45 local expression
46 expression+="select(contains(\"${state}\"))"
47 ceph --format json pg dump pgs 2>/dev/null | \
48 jq ".pg_stats | [.[] | .state | $expression] | length"
49 }
50
51
52 function wait_for_state() {
53 local state=$1
54 local num_in_state=-1
55 local cur_in_state
56 local -a delays=($(get_timeout_delays $2 5))
57 local -i loop=0
58
59 flush_pg_stats || return 1
60 while test $(get_num_pgs) == 0 ; do
61 sleep 1
62 done
63
64 while true ; do
65 cur_in_state=$(get_num_in_state ${state})
66 test $cur_in_state = "0" && break
67 if test $cur_in_state != $num_in_state ; then
68 loop=0
69 num_in_state=$cur_in_state
70 elif (( $loop >= ${#delays[*]} )) ; then
71 ceph pg dump pgs
72 return 1
73 fi
74 sleep ${delays[$loop]}
75 loop+=1
76 done
77 return 0
78 }
79
80
81 function wait_for_backfill() {
82 local timeout=$1
83 wait_for_state backfilling $timeout
84 }
85
86
87 function wait_for_active() {
88 local timeout=$1
89 wait_for_state activating $timeout
90 }
91
92 # All tests are created in an environment which has fake total space
93 # of 3600K (3686400) which can hold 600 6K replicated objects or
94 # 200 18K shards of erasure coded objects. For a k=3, m=2 EC pool
95 # we have a theoretical 54K object but with the chunk size of 4K
96 # and a rounding of 4K to account for the chunks is 36K max object
97 # which is ((36K / 3) + 4K) * 200 = 3200K which is 88% of
98 # 3600K for a shard.
99
100 # Create 2 pools with size 1
101 # Write enough data that only 1 pool pg can fit per osd
102 # Incresase the pool size to 2
103 # On 3 OSDs this should result in 1 OSD with overlapping replicas,
104 # so both pools can't fit. We assume pgid 1.0 and 2.0 won't
105 # map to the same 2 OSDs.
106 # At least 1 pool shouldn't have room to backfill
107 # All other pools should go active+clean
108 function TEST_backfill_test_simple() {
109 local dir=$1
110 local pools=2
111 local OSDS=3
112
113 run_mon $dir a || return 1
114 run_mgr $dir x || return 1
115 export CEPH_ARGS
116
117 for osd in $(seq 0 $(expr $OSDS - 1))
118 do
119 run_osd $dir $osd || return 1
120 done
121
122 ceph osd set-backfillfull-ratio .85
123
124 for p in $(seq 1 $pools)
125 do
126 create_pool "${poolprefix}$p" 1 1
127 ceph osd pool set "${poolprefix}$p" size 1
128 done
129
130 wait_for_clean || return 1
131
132 # This won't work is if the 2 pools primary and only osds
133 # are the same.
134
135 dd if=/dev/urandom of=$dir/datafile bs=1024 count=4
136 for o in $(seq 1 $objects)
137 do
138 for p in $(seq 1 $pools)
139 do
140 rados -p "${poolprefix}$p" put obj$o $dir/datafile
141 done
142 done
143
144 ceph pg dump pgs
145
146 for p in $(seq 1 $pools)
147 do
148 ceph osd pool set "${poolprefix}$p" size 2
149 done
150 sleep 5
151
152 wait_for_backfill 240 || return 1
153 wait_for_active 60 || return 1
154
155 ERRORS=0
156 if [ "$(ceph pg dump pgs | grep +backfill_toofull | wc -l)" != "1" ];
157 then
158 echo "One pool should have been in backfill_toofull"
159 ERRORS="$(expr $ERRORS + 1)"
160 fi
161
162 expected="$(expr $pools - 1)"
163 if [ "$(ceph pg dump pgs | grep active+clean | wc -l)" != "$expected" ];
164 then
165 echo "$expected didn't finish backfill"
166 ERRORS="$(expr $ERRORS + 1)"
167 fi
168
169 ceph pg dump pgs
170
171 if [ $ERRORS != "0" ];
172 then
173 return 1
174 fi
175
176 for i in $(seq 1 $pools)
177 do
178 delete_pool "${poolprefix}$i"
179 done
180 kill_daemons $dir || return 1
181 ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1
182 }
183
184
185 # Create 8 pools of size 1 on 20 OSDs
186 # Write 4K * 600 objects (only 1 pool pg can fit on any given osd)
187 # Increase pool size to 2
188 # At least 1 pool shouldn't have room to backfill
189 # All other pools should go active+clean
190 function TEST_backfill_test_multi() {
191 local dir=$1
192 local pools=8
193 local OSDS=20
194
195 run_mon $dir a || return 1
196 run_mgr $dir x || return 1
197 export CEPH_ARGS
198
199 for osd in $(seq 0 $(expr $OSDS - 1))
200 do
201 run_osd $dir $osd || return 1
202 done
203
204 ceph osd set-backfillfull-ratio .85
205
206 for p in $(seq 1 $pools)
207 do
208 create_pool "${poolprefix}$p" 1 1
209 ceph osd pool set "${poolprefix}$p" size 1
210 done
211
212 wait_for_clean || return 1
213
214 dd if=/dev/urandom of=$dir/datafile bs=1024 count=4
215 for o in $(seq 1 $objects)
216 do
217 for p in $(seq 1 $pools)
218 do
219 rados -p "${poolprefix}$p" put obj$o $dir/datafile
220 done
221 done
222
223 ceph pg dump pgs
224
225 for p in $(seq 1 $pools)
226 do
227 ceph osd pool set "${poolprefix}$p" size 2
228 done
229 sleep 5
230
231 wait_for_backfill 240 || return 1
232 wait_for_active 60 || return 1
233
234 ERRORS=0
235 full="$(ceph pg dump pgs | grep +backfill_toofull | wc -l)"
236 if [ "$full" -lt "1" ];
237 then
238 echo "At least one pool should have been in backfill_toofull"
239 ERRORS="$(expr $ERRORS + 1)"
240 fi
241
242 expected="$(expr $pools - $full)"
243 if [ "$(ceph pg dump pgs | grep active+clean | wc -l)" != "$expected" ];
244 then
245 echo "$expected didn't finish backfill"
246 ERRORS="$(expr $ERRORS + 1)"
247 fi
248
249 ceph pg dump pgs
250
251 if [ $ERRORS != "0" ];
252 then
253 return 1
254 fi
255
256 for i in $(seq 1 $pools)
257 do
258 delete_pool "${poolprefix}$i"
259 done
260 # Work around for http://tracker.ceph.com/issues/38195
261 kill_daemons $dir #|| return 1
262 ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1
263 }
264
265
266 # To make sure that when 2 pg try to backfill at the same time to
267 # the same target. This might be covered by the simple test above
268 # but this makes sure we get it.
269 #
270 # Create 10 pools of size 2 and identify 2 that have the same
271 # non-primary osd.
272 # Delete all other pools
273 # Set size to 1 and write 4K * 600 to each pool
274 # Set size back to 2
275 # The 2 pools should race to backfill.
276 # One pool goes active+clean
277 # The other goes acitve+...+backfill_toofull
278 function TEST_backfill_test_sametarget() {
279 local dir=$1
280 local pools=10
281 local OSDS=5
282
283 run_mon $dir a || return 1
284 run_mgr $dir x || return 1
285 export CEPH_ARGS
286
287 for osd in $(seq 0 $(expr $OSDS - 1))
288 do
289 run_osd $dir $osd || return 1
290 done
291
292 ceph osd set-backfillfull-ratio .85
293
294 for p in $(seq 1 $pools)
295 do
296 create_pool "${poolprefix}$p" 1 1
297 ceph osd pool set "${poolprefix}$p" size 2
298 done
299 sleep 5
300
301 wait_for_clean || return 1
302
303 ceph pg dump pgs
304
305 # Find 2 pools with a pg that distinct primaries but second
306 # replica on the same osd.
307 local PG1
308 local POOLNUM1
309 local pool1
310 local chk_osd1
311 local chk_osd2
312
313 local PG2
314 local POOLNUM2
315 local pool2
316 for p in $(seq 1 $pools)
317 do
318 ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting
319 local test_osd1=$(head -1 $dir/acting)
320 local test_osd2=$(tail -1 $dir/acting)
321 if [ $p = "1" ];
322 then
323 PG1="${p}.0"
324 POOLNUM1=$p
325 pool1="${poolprefix}$p"
326 chk_osd1=$test_osd1
327 chk_osd2=$test_osd2
328 elif [ $chk_osd1 != $test_osd1 -a $chk_osd2 = $test_osd2 ];
329 then
330 PG2="${p}.0"
331 POOLNUM2=$p
332 pool2="${poolprefix}$p"
333 break
334 fi
335 done
336 rm -f $dir/acting
337
338 if [ "$pool2" = "" ];
339 then
340 echo "Failure to find appropirate PGs"
341 return 1
342 fi
343
344 for p in $(seq 1 $pools)
345 do
346 if [ $p != $POOLNUM1 -a $p != $POOLNUM2 ];
347 then
348 delete_pool ${poolprefix}$p
349 fi
350 done
351
352 ceph osd pool set $pool1 size 1
353 ceph osd pool set $pool2 size 1
354
355 wait_for_clean || return 1
356
357 dd if=/dev/urandom of=$dir/datafile bs=1024 count=4
358 for i in $(seq 1 $objects)
359 do
360 rados -p $pool1 put obj$i $dir/datafile
361 rados -p $pool2 put obj$i $dir/datafile
362 done
363
364 ceph osd pool set $pool1 size 2
365 ceph osd pool set $pool2 size 2
366 sleep 5
367
368 wait_for_backfill 240 || return 1
369 wait_for_active 60 || return 1
370
371 ERRORS=0
372 if [ "$(ceph pg dump pgs | grep +backfill_toofull | wc -l)" != "1" ];
373 then
374 echo "One pool should have been in backfill_toofull"
375 ERRORS="$(expr $ERRORS + 1)"
376 fi
377
378 if [ "$(ceph pg dump pgs | grep active+clean | wc -l)" != "1" ];
379 then
380 echo "One didn't finish backfill"
381 ERRORS="$(expr $ERRORS + 1)"
382 fi
383
384 ceph pg dump pgs
385
386 if [ $ERRORS != "0" ];
387 then
388 return 1
389 fi
390
391 delete_pool $pool1
392 delete_pool $pool2
393 kill_daemons $dir || return 1
394 ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1
395 }
396
397 # 2 pools can't both backfill to a target which has other data
398 # 1 of the pools has objects that increase from 1024 to 2611 bytes
399 #
400 # Write to fill pool which is size 1
401 # Take fill pool osd down (other 2 pools must go to the remaining OSDs
402 # Save an export of data on fill OSD and restart it
403 # Write an intial 1K to pool1 which has pg 2.0
404 # Export 2.0 from non-fillpool OSD don't wait for it to start-up
405 # Take down fillpool OSD
406 # Put 1K object version of 2.0 on fillpool OSD
407 # Put back fillpool data on fillpool OSD
408 # With fillpool down write 2611 byte objects
409 # Take down $osd and bring back $fillosd simultaneously
410 # Wait for backfilling
411 # One PG will be able to backfill its remaining data
412 # One PG must get backfill_toofull
413 function TEST_backfill_multi_partial() {
414 local dir=$1
415 local EC=$2
416 local pools=2
417 local OSDS=3
418
419 run_mon $dir a || return 1
420 run_mgr $dir x || return 1
421 export CEPH_ARGS
422
423 for osd in $(seq 0 $(expr $OSDS - 1))
424 do
425 run_osd $dir $osd || return 1
426 done
427
428 ceph osd set-backfillfull-ratio .85
429
430 ceph osd set-require-min-compat-client luminous
431 create_pool fillpool 1 1
432 ceph osd pool set fillpool size 1
433 for p in $(seq 1 $pools)
434 do
435 create_pool "${poolprefix}$p" 1 1
436 ceph osd pool set "${poolprefix}$p" size 2
437 done
438
439 wait_for_clean || return 1
440
441 # Partially fill an osd
442 # We have room for 600 6K replicated objects, if we create 2611 byte objects
443 # there is 3600K - (2611 * 600) = 2070K, so the fill pool and one
444 # replica from the other 2 is 85% of 3600K
445
446 dd if=/dev/urandom of=$dir/datafile bs=2611 count=1
447 for o in $(seq 1 $objects)
448 do
449 rados -p fillpool put obj-fill-${o} $dir/datafile
450 done
451
452 local fillosd=$(get_primary fillpool obj-fill-1)
453 osd=$(expr $fillosd + 1)
454 if [ "$osd" = "$OSDS" ]; then
455 osd="0"
456 fi
457
458 sleep 5
459 kill $(cat $dir/osd.$fillosd.pid)
460 ceph osd out osd.$fillosd
461 sleep 2
462
463 _objectstore_tool_nodown $dir $fillosd --op export-remove --pgid 1.0 --file $dir/fillexport.out || return 1
464 activate_osd $dir $fillosd || return 1
465
466 ceph pg dump pgs
467
468 dd if=/dev/urandom of=$dir/datafile bs=1024 count=1
469 for o in $(seq 1 $objects)
470 do
471 rados -p "${poolprefix}1" put obj-1-${o} $dir/datafile
472 done
473
474 ceph pg dump pgs
475 # The $osd OSD is started, but we don't wait so we can kill $fillosd at the same time
476 _objectstore_tool_nowait $dir $osd --op export --pgid 2.0 --file $dir/export.out
477 kill $(cat $dir/osd.$fillosd.pid)
478 sleep 5
479 _objectstore_tool_nodown $dir $fillosd --force --op remove --pgid 2.0
480 _objectstore_tool_nodown $dir $fillosd --op import --pgid 2.0 --file $dir/export.out || return 1
481 _objectstore_tool_nodown $dir $fillosd --op import --pgid 1.0 --file $dir/fillexport.out || return 1
482 ceph pg dump pgs
483 sleep 20
484 ceph pg dump pgs
485
486 # re-write everything
487 dd if=/dev/urandom of=$dir/datafile bs=2611 count=1
488 for o in $(seq 1 $objects)
489 do
490 for p in $(seq 1 $pools)
491 do
492 rados -p "${poolprefix}$p" put obj-${p}-${o} $dir/datafile
493 done
494 done
495
496 kill $(cat $dir/osd.$osd.pid)
497 ceph osd out osd.$osd
498
499 activate_osd $dir $fillosd || return 1
500 ceph osd in osd.$fillosd
501 sleep 15
502
503 wait_for_backfill 240 || return 1
504 wait_for_active 60 || return 1
505
506 flush_pg_stats || return 1
507 ceph pg dump pgs
508
509 ERRORS=0
510 if [ "$(get_num_in_state backfill_toofull)" != "1" ];
511 then
512 echo "One PG should be in backfill_toofull"
513 ERRORS="$(expr $ERRORS + 1)"
514 fi
515
516 if [ "$(get_num_in_state active+clean)" != "2" ];
517 then
518 echo "Two PGs should be active+clean after one PG completed backfill"
519 ERRORS="$(expr $ERRORS + 1)"
520 fi
521
522 if [ $ERRORS != "0" ];
523 then
524 return 1
525 fi
526
527 delete_pool fillpool
528 for i in $(seq 1 $pools)
529 do
530 delete_pool "${poolprefix}$i"
531 done
532 kill_daemons $dir || return 1
533 ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1
534 }
535
536 # Make sure that the amount of bytes already on the replica doesn't
537 # cause an out of space condition
538 #
539 # Create 1 pool and write 4K * 600 objects
540 # Remove 25% (150) of the objects with one OSD down (noout set)
541 # Increase the size of the remaining 75% (450) of the objects to 6K
542 # Bring back down OSD
543 # The pool should go active+clean
544 function TEST_backfill_grow() {
545 local dir=$1
546 local poolname="test"
547 local OSDS=3
548
549 run_mon $dir a || return 1
550 run_mgr $dir x || return 1
551
552 for osd in $(seq 0 $(expr $OSDS - 1))
553 do
554 run_osd $dir $osd || return 1
555 done
556
557 ceph osd set-backfillfull-ratio .85
558
559 create_pool $poolname 1 1
560 ceph osd pool set $poolname size 3
561 sleep 5
562
563 wait_for_clean || return 1
564
565 dd if=/dev/urandom of=${dir}/4kdata bs=1k count=4
566 for i in $(seq 1 $objects)
567 do
568 rados -p $poolname put obj$i $dir/4kdata
569 done
570
571 local PG=$(get_pg $poolname obj1)
572 # Remember primary during the backfill
573 local primary=$(get_primary $poolname obj1)
574 local otherosd=$(get_not_primary $poolname obj1)
575
576 ceph osd set noout
577 kill_daemons $dir TERM $otherosd || return 1
578
579 rmobjects=$(expr $objects / 4)
580 for i in $(seq 1 $rmobjects)
581 do
582 rados -p $poolname rm obj$i
583 done
584
585 dd if=/dev/urandom of=${dir}/6kdata bs=6k count=1
586 for i in $(seq $(expr $rmobjects + 1) $objects)
587 do
588 rados -p $poolname put obj$i $dir/6kdata
589 done
590
591 activate_osd $dir $otherosd || return 1
592
593 ceph tell osd.$primary debug kick_recovery_wq 0
594
595 sleep 2
596
597 wait_for_clean || return 1
598
599 delete_pool $poolname
600 kill_daemons $dir || return 1
601 ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1
602 }
603
604 # Create a 5 shard EC pool on 6 OSD cluster
605 # Fill 1 OSD with 2600K of data take that osd down.
606 # Write the EC pool on 5 OSDs
607 # Take down 1 (must contain an EC shard)
608 # Bring up OSD with fill data
609 # Not enought room to backfill to partially full OSD
610 function TEST_ec_backfill_simple() {
611 local dir=$1
612 local EC=$2
613 local pools=1
614 local OSDS=6
615 local k=3
616 local m=2
617 local ecobjects=$(expr $objects / $k)
618
619 run_mon $dir a || return 1
620 run_mgr $dir x || return 1
621 export CEPH_ARGS
622
623 for osd in $(seq 0 $(expr $OSDS - 1))
624 do
625 run_osd $dir $osd || return 1
626 done
627
628 ceph osd set-backfillfull-ratio .85
629 create_pool fillpool 1 1
630 ceph osd pool set fillpool size 1
631
632 # Partially fill an osd
633 # We have room for 200 18K replicated objects, if we create 13K objects
634 # there is only 3600K - (13K * 200) = 1000K which won't hold
635 # a k=3 shard below ((18K / 3) + 4K) * 200 = 2000K
636 # Actual usage per shard is 8K * 200 = 1600K because 18K/3 is 6K which
637 # rounds to 8K. The 2000K is the ceiling on the 18K * 200 = 3600K logical
638 # bytes in the pool.
639 dd if=/dev/urandom of=$dir/datafile bs=1024 count=13
640 for o in $(seq 1 $ecobjects)
641 do
642 rados -p fillpool put obj$o $dir/datafile
643 done
644
645 local fillosd=$(get_primary fillpool obj1)
646 osd=$(expr $fillosd + 1)
647 if [ "$osd" = "$OSDS" ]; then
648 osd="0"
649 fi
650
651 sleep 5
652 kill $(cat $dir/osd.$fillosd.pid)
653 ceph osd out osd.$fillosd
654 sleep 2
655 ceph osd erasure-code-profile set ec-profile k=$k m=$m crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1
656
657 for p in $(seq 1 $pools)
658 do
659 ceph osd pool create "${poolprefix}$p" 1 1 erasure ec-profile
660 done
661
662 # Can't wait for clean here because we created a stale pg
663 #wait_for_clean || return 1
664 sleep 5
665
666 ceph pg dump pgs
667
668 dd if=/dev/urandom of=$dir/datafile bs=1024 count=18
669 for o in $(seq 1 $ecobjects)
670 do
671 for p in $(seq 1 $pools)
672 do
673 rados -p "${poolprefix}$p" put obj$o $dir/datafile
674 done
675 done
676
677 kill $(cat $dir/osd.$osd.pid)
678 ceph osd out osd.$osd
679
680 activate_osd $dir $fillosd || return 1
681 ceph osd in osd.$fillosd
682 sleep 30
683
684 ceph pg dump pgs
685
686 wait_for_backfill 240 || return 1
687 wait_for_active 60 || return 1
688
689 ceph pg dump pgs
690
691 ERRORS=0
692 if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep +backfill_toofull | wc -l)" != "1" ]; then
693 echo "One pool should have been in backfill_toofull"
694 ERRORS="$(expr $ERRORS + 1)"
695 fi
696
697 if [ $ERRORS != "0" ];
698 then
699 return 1
700 fi
701
702 delete_pool fillpool
703 for i in $(seq 1 $pools)
704 do
705 delete_pool "${poolprefix}$i"
706 done
707 kill_daemons $dir || return 1
708 }
709
710 function osdlist() {
711 local OSDS=$1
712 local excludeosd=$2
713
714 osds=""
715 for osd in $(seq 0 $(expr $OSDS - 1))
716 do
717 if [ $osd = $excludeosd ];
718 then
719 continue
720 fi
721 if [ -n "$osds" ]; then
722 osds="${osds} "
723 fi
724 osds="${osds}${osd}"
725 done
726 echo $osds
727 }
728
729 # Create a pool with size 1 and fill with data so that only 1 EC shard can fit.
730 # Write data to 2 EC pools mapped to the same OSDs (excluding filled one)
731 # Remap the last OSD to partially full OSD on both pools
732 # The 2 pools should race to backfill.
733 # One pool goes active+clean
734 # The other goes acitve+...+backfill_toofull
735 function TEST_ec_backfill_multi() {
736 local dir=$1
737 local EC=$2
738 local pools=2
739 local OSDS=6
740 local k=3
741 local m=2
742 local ecobjects=$(expr $objects / $k)
743
744 run_mon $dir a || return 1
745 run_mgr $dir x || return 1
746 export CEPH_ARGS
747
748 for osd in $(seq 0 $(expr $OSDS - 1))
749 do
750 run_osd $dir $osd || return 1
751 done
752
753 # This test requires that shards from 2 different pools
754 # fit on a given OSD, but both will not fix. I'm using
755 # making the fillosd plus 1 shard use 75% of the space,
756 # leaving not enough to be under the 85% set here.
757 ceph osd set-backfillfull-ratio .85
758
759 ceph osd set-require-min-compat-client luminous
760 create_pool fillpool 1 1
761 ceph osd pool set fillpool size 1
762
763 # Partially fill an osd
764 # We have room for 200 18K replicated objects, if we create 9K objects
765 # there is only 3600K - (9K * 200) = 1800K which will only hold
766 # one k=3 shard below ((12K / 3) + 4K) * 200 = 1600K
767 # The actual data will be (12K / 3) * 200 = 800K because the extra
768 # is the reservation padding for chunking.
769 dd if=/dev/urandom of=$dir/datafile bs=1024 count=9
770 for o in $(seq 1 $ecobjects)
771 do
772 rados -p fillpool put obj$o $dir/datafile
773 done
774
775 local fillosd=$(get_primary fillpool obj1)
776 ceph osd erasure-code-profile set ec-profile k=3 m=2 crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1
777
778 nonfillosds="$(osdlist $OSDS $fillosd)"
779
780 for p in $(seq 1 $pools)
781 do
782 ceph osd pool create "${poolprefix}$p" 1 1 erasure ec-profile
783 ceph osd pg-upmap "$(expr $p + 1).0" $nonfillosds
784 done
785
786 # Can't wait for clean here because we created a stale pg
787 #wait_for_clean || return 1
788 sleep 15
789
790 ceph pg dump pgs
791
792 dd if=/dev/urandom of=$dir/datafile bs=1024 count=12
793 for o in $(seq 1 $ecobjects)
794 do
795 for p in $(seq 1 $pools)
796 do
797 rados -p "${poolprefix}$p" put obj$o-$p $dir/datafile
798 done
799 done
800
801 ceph pg dump pgs
802
803 for p in $(seq 1 $pools)
804 do
805 ceph osd pg-upmap $(expr $p + 1).0 ${nonfillosds% *} $fillosd
806 done
807
808 sleep 10
809
810 wait_for_backfill 240 || return 1
811 wait_for_active 60 || return 1
812
813 ceph pg dump pgs
814
815 ERRORS=0
816 if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep +backfill_toofull | wc -l)" != "1" ];
817 then
818 echo "One pool should have been in backfill_toofull"
819 ERRORS="$(expr $ERRORS + 1)"
820 fi
821
822 if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep active+clean | wc -l)" != "1" ];
823 then
824 echo "One didn't finish backfill"
825 ERRORS="$(expr $ERRORS + 1)"
826 fi
827
828 if [ $ERRORS != "0" ];
829 then
830 return 1
831 fi
832
833 delete_pool fillpool
834 for i in $(seq 1 $pools)
835 do
836 delete_pool "${poolprefix}$i"
837 done
838 kill_daemons $dir || return 1
839 }
840
841 # Similar to TEST_ec_backfill_multi but one of the ec pools
842 # already had some data on the target OSD
843
844 # Create a pool with size 1 and fill with data so that only 1 EC shard can fit.
845 # Write a small amount of data to 1 EC pool that still includes the filled one
846 # Take down fillosd with noout set
847 # Write data to 2 EC pools mapped to the same OSDs (excluding filled one)
848 # Remap the last OSD to partially full OSD on both pools
849 # The 2 pools should race to backfill.
850 # One pool goes active+clean
851 # The other goes acitve+...+backfill_toofull
852 function SKIP_TEST_ec_backfill_multi_partial() {
853 local dir=$1
854 local EC=$2
855 local pools=2
856 local OSDS=5
857 local k=3
858 local m=2
859 local ecobjects=$(expr $objects / $k)
860 local lastosd=$(expr $OSDS - 1)
861
862 run_mon $dir a || return 1
863 run_mgr $dir x || return 1
864 export CEPH_ARGS
865
866 for osd in $(seq 0 $(expr $OSDS - 1))
867 do
868 run_osd $dir $osd || return 1
869 done
870
871 # This test requires that shards from 2 different pools
872 # fit on a given OSD, but both will not fix. I'm using
873 # making the fillosd plus 1 shard use 75% of the space,
874 # leaving not enough to be under the 85% set here.
875 ceph osd set-backfillfull-ratio .85
876
877 ceph osd set-require-min-compat-client luminous
878 create_pool fillpool 1 1
879 ceph osd pool set fillpool size 1
880 # last osd
881 ceph osd pg-upmap 1.0 $lastosd
882
883 # Partially fill an osd
884 # We have room for 200 18K replicated objects, if we create 9K objects
885 # there is only 3600K - (9K * 200) = 1800K which will only hold
886 # one k=3 shard below ((12K / 3) + 4K) * 200 = 1600K
887 # The actual data will be (12K / 3) * 200 = 800K because the extra
888 # is the reservation padding for chunking.
889 dd if=/dev/urandom of=$dir/datafile bs=1024 count=9
890 for o in $(seq 1 $ecobjects)
891 do
892 rados -p fillpool put obj$o $dir/datafile
893 done
894
895 local fillosd=$(get_primary fillpool obj1)
896 ceph osd erasure-code-profile set ec-profile k=3 m=2 crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1
897
898 nonfillosds="$(osdlist $OSDS $fillosd)"
899
900 for p in $(seq 1 $pools)
901 do
902 ceph osd pool create "${poolprefix}$p" 1 1 erasure ec-profile
903 ceph osd pg-upmap "$(expr $p + 1).0" $(seq 0 $lastosd)
904 done
905
906 # Can't wait for clean here because we created a stale pg
907 #wait_for_clean || return 1
908 sleep 15
909
910 ceph pg dump pgs
911
912 dd if=/dev/urandom of=$dir/datafile bs=1024 count=1
913 for o in $(seq 1 $ecobjects)
914 do
915 rados -p "${poolprefix}1" put obj$o-1 $dir/datafile
916 done
917
918 for p in $(seq 1 $pools)
919 do
920 ceph osd pg-upmap "$(expr $p + 1).0" $(seq 0 $(expr $lastosd - 1))
921 done
922 ceph pg dump pgs
923
924 #ceph osd set noout
925 #kill_daemons $dir TERM osd.$lastosd || return 1
926
927 dd if=/dev/urandom of=$dir/datafile bs=1024 count=12
928 for o in $(seq 1 $ecobjects)
929 do
930 for p in $(seq 1 $pools)
931 do
932 rados -p "${poolprefix}$p" put obj$o-$p $dir/datafile
933 done
934 done
935
936 ceph pg dump pgs
937
938 # Now backfill lastosd by adding back into the upmap
939 for p in $(seq 1 $pools)
940 do
941 ceph osd pg-upmap "$(expr $p + 1).0" $(seq 0 $lastosd)
942 done
943 #activate_osd $dir $lastosd || return 1
944 #ceph tell osd.0 debug kick_recovery_wq 0
945
946 sleep 10
947 ceph pg dump pgs
948
949 wait_for_backfill 240 || return 1
950 wait_for_active 60 || return 1
951
952 ceph pg dump pgs
953
954 ERRORS=0
955 if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep +backfill_toofull | wc -l)" != "1" ];
956 then
957 echo "One pool should have been in backfill_toofull"
958 ERRORS="$(expr $ERRORS + 1)"
959 fi
960
961 if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep active+clean | wc -l)" != "1" ];
962 then
963 echo "One didn't finish backfill"
964 ERRORS="$(expr $ERRORS + 1)"
965 fi
966
967 if [ $ERRORS != "0" ];
968 then
969 return 1
970 fi
971
972 delete_pool fillpool
973 for i in $(seq 1 $pools)
974 do
975 delete_pool "${poolprefix}$i"
976 done
977 kill_daemons $dir || return 1
978 }
979
980 function SKIP_TEST_ec_backfill_multi_partial() {
981 local dir=$1
982 local EC=$2
983 local pools=2
984 local OSDS=6
985
986 run_mon $dir a || return 1
987 run_mgr $dir x || return 1
988 export CEPH_ARGS
989
990 for osd in $(seq 0 $(expr $OSDS - 1))
991 do
992 run_osd $dir $osd || return 1
993 done
994
995 # Below we need to fit 3200K in 3600K which is 88%
996 # so set to 90%
997 ceph osd set-backfillfull-ratio .90
998
999 ceph osd set-require-min-compat-client luminous
1000 create_pool fillpool 1 1
1001 ceph osd pool set fillpool size 1
1002
1003 # Partially fill an osd
1004 # We have room for 200 48K ec objects, if we create 4k replicated objects
1005 # there is 3600K - (4K * 200) = 2800K which won't hold 2 k=3 shard
1006 # of 200 12K objects which takes ((12K / 3) + 4K) * 200 = 1600K each.
1007 # On the other OSDs 2 * 1600K = 3200K which is 88% of 3600K.
1008 dd if=/dev/urandom of=$dir/datafile bs=1024 count=4
1009 for o in $(seq 1 $objects)
1010 do
1011 rados -p fillpool put obj$o $dir/datafile
1012 done
1013
1014 local fillosd=$(get_primary fillpool obj1)
1015 osd=$(expr $fillosd + 1)
1016 if [ "$osd" = "$OSDS" ]; then
1017 osd="0"
1018 fi
1019
1020 sleep 5
1021 kill $(cat $dir/osd.$fillosd.pid)
1022 ceph osd out osd.$fillosd
1023 sleep 2
1024 ceph osd erasure-code-profile set ec-profile k=3 m=2 crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1
1025
1026 for p in $(seq 1 $pools)
1027 do
1028 ceph osd pool create "${poolprefix}$p" 1 1 erasure ec-profile
1029 done
1030
1031 # Can't wait for clean here because we created a stale pg
1032 #wait_for_clean || return 1
1033 sleep 5
1034
1035 ceph pg dump pgs
1036
1037 dd if=/dev/urandom of=$dir/datafile bs=1024 count=12
1038 for o in $(seq 1 $objects)
1039 do
1040 for p in $(seq 1 $pools)
1041 do
1042 rados -p "${poolprefix}$p" put obj$o $dir/datafile
1043 done
1044 done
1045
1046 #ceph pg map 2.0 --format=json | jq '.'
1047 kill $(cat $dir/osd.$osd.pid)
1048 ceph osd out osd.$osd
1049
1050 _objectstore_tool_nodown $dir $osd --op export --pgid 2.0 --file $dir/export.out
1051 _objectstore_tool_nodown $dir $fillosd --op import --pgid 2.0 --file $dir/export.out
1052
1053 activate_osd $dir $fillosd || return 1
1054 ceph osd in osd.$fillosd
1055 sleep 15
1056
1057 wait_for_backfill 240 || return 1
1058 wait_for_active 60 || return 1
1059
1060 ERRORS=0
1061 if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep +backfill_toofull | wc -l)" != "1" ];
1062 then
1063 echo "One pool should have been in backfill_toofull"
1064 ERRORS="$(expr $ERRORS + 1)"
1065 fi
1066
1067 if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep active+clean | wc -l)" != "1" ];
1068 then
1069 echo "One didn't finish backfill"
1070 ERRORS="$(expr $ERRORS + 1)"
1071 fi
1072
1073 ceph pg dump pgs
1074
1075 if [ $ERRORS != "0" ];
1076 then
1077 return 1
1078 fi
1079
1080 delete_pool fillpool
1081 for i in $(seq 1 $pools)
1082 do
1083 delete_pool "${poolprefix}$i"
1084 done
1085 kill_daemons $dir || return 1
1086 }
1087
1088 # Create 1 EC pool
1089 # Write 200 12K objects ((12K / 3) + 4K) *200) = 1600K
1090 # Take 1 shard's OSD down (with noout set)
1091 # Remove 50 objects ((12K / 3) + 4k) * 50) = 400K
1092 # Write 150 36K objects (grow 150 objects) 2400K
1093 # But there is already 1600K usage so backfill
1094 # would be too full if it didn't account for existing data
1095 # Bring back down OSD so it must backfill
1096 # It should go active+clean taking into account data already there
1097 function TEST_ec_backfill_grow() {
1098 local dir=$1
1099 local poolname="test"
1100 local OSDS=6
1101 local k=3
1102 local m=2
1103 local ecobjects=$(expr $objects / $k)
1104
1105 run_mon $dir a || return 1
1106 run_mgr $dir x || return 1
1107
1108 for osd in $(seq 0 $(expr $OSDS - 1))
1109 do
1110 run_osd $dir $osd || return 1
1111 done
1112
1113 ceph osd set-backfillfull-ratio .85
1114
1115 ceph osd set-require-min-compat-client luminous
1116 ceph osd erasure-code-profile set ec-profile k=$k m=$m crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1
1117 ceph osd pool create $poolname 1 1 erasure ec-profile
1118
1119 wait_for_clean || return 1
1120
1121 dd if=/dev/urandom of=${dir}/12kdata bs=1k count=12
1122 for i in $(seq 1 $ecobjects)
1123 do
1124 rados -p $poolname put obj$i $dir/12kdata
1125 done
1126
1127 local PG=$(get_pg $poolname obj1)
1128 # Remember primary during the backfill
1129 local primary=$(get_primary $poolname obj1)
1130 local otherosd=$(get_not_primary $poolname obj1)
1131
1132 ceph osd set noout
1133 kill_daemons $dir TERM $otherosd || return 1
1134
1135 rmobjects=$(expr $ecobjects / 4)
1136 for i in $(seq 1 $rmobjects)
1137 do
1138 rados -p $poolname rm obj$i
1139 done
1140
1141 dd if=/dev/urandom of=${dir}/36kdata bs=1k count=36
1142 for i in $(seq $(expr $rmobjects + 1) $ecobjects)
1143 do
1144 rados -p $poolname put obj$i $dir/36kdata
1145 done
1146
1147 activate_osd $dir $otherosd || return 1
1148
1149 ceph tell osd.$primary debug kick_recovery_wq 0
1150
1151 sleep 2
1152
1153 wait_for_clean || return 1
1154
1155 delete_pool $poolname
1156 kill_daemons $dir || return 1
1157 }
1158
1159 main osd-backfill-space "$@"
1160
1161 # Local Variables:
1162 # compile-command: "make -j4 && ../qa/run-standalone.sh osd-backfill-space.sh"
1163 # End: