]> git.proxmox.com Git - ceph.git/blob - ceph/qa/standalone/osd-backfill/osd-backfill-space.sh
8bc452d6cbbfc2b401bd5cfd24027d0d66016bdc
[ceph.git] / ceph / qa / standalone / osd-backfill / osd-backfill-space.sh
1 #!/usr/bin/env bash
2 #
3 # Copyright (C) 2018 Red Hat <contact@redhat.com>
4 #
5 # Author: David Zafman <dzafman@redhat.com>
6 #
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU Library Public License as published by
9 # the Free Software Foundation; either version 2, or (at your option)
10 # any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU Library Public License for more details.
16 #
17
18 source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
19
20 function run() {
21 local dir=$1
22 shift
23
24 export CEPH_MON="127.0.0.1:7180" # git grep '\<7180\>' : there must be only one
25 export CEPH_ARGS
26 CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
27 CEPH_ARGS+="--mon-host=$CEPH_MON "
28 CEPH_ARGS+="--osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10 "
29 CEPH_ARGS+="--fake_statfs_for_testing=3686400 "
30 CEPH_ARGS+="--osd_max_backfills=10 "
31 CEPH_ARGS+="--osd_mclock_profile=high_recovery_ops "
32 export objects=600
33 export poolprefix=test
34
35 local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
36 for func in $funcs ; do
37 setup $dir || return 1
38 $func $dir || return 1
39 teardown $dir || return 1
40 done
41 }
42
43
44 function get_num_in_state() {
45 local state=$1
46 local expression
47 expression+="select(contains(\"${state}\"))"
48 ceph --format json pg dump pgs 2>/dev/null | \
49 jq ".pg_stats | [.[] | .state | $expression] | length"
50 }
51
52
53 function wait_for_not_state() {
54 local state=$1
55 local num_in_state=-1
56 local cur_in_state
57 local -a delays=($(get_timeout_delays $2 5))
58 local -i loop=0
59
60 flush_pg_stats || return 1
61 while test $(get_num_pgs) == 0 ; do
62 sleep 1
63 done
64
65 while true ; do
66 cur_in_state=$(get_num_in_state ${state})
67 test $cur_in_state = "0" && break
68 if test $cur_in_state != $num_in_state ; then
69 loop=0
70 num_in_state=$cur_in_state
71 elif (( $loop >= ${#delays[*]} )) ; then
72 ceph pg dump pgs
73 return 1
74 fi
75 sleep ${delays[$loop]}
76 loop+=1
77 done
78 return 0
79 }
80
81
82 function wait_for_not_backfilling() {
83 local timeout=$1
84 wait_for_not_state backfilling $timeout
85 }
86
87
88 function wait_for_not_activating() {
89 local timeout=$1
90 wait_for_not_state activating $timeout
91 }
92
93 # All tests are created in an environment which has fake total space
94 # of 3600K (3686400) which can hold 600 6K replicated objects or
95 # 200 18K shards of erasure coded objects. For a k=3, m=2 EC pool
96 # we have a theoretical 54K object but with the chunk size of 4K
97 # and a rounding of 4K to account for the chunks is 36K max object
98 # which is ((36K / 3) + 4K) * 200 = 3200K which is 88% of
99 # 3600K for a shard.
100
101 # Create 2 pools with size 1
102 # Write enough data that only 1 pool pg can fit per osd
103 # Incresase the pool size to 2
104 # On 3 OSDs this should result in 1 OSD with overlapping replicas,
105 # so both pools can't fit. We assume pgid 1.0 and 2.0 won't
106 # map to the same 2 OSDs.
107 # At least 1 pool shouldn't have room to backfill
108 # All other pools should go active+clean
109 function TEST_backfill_test_simple() {
110 local dir=$1
111 local pools=2
112 local OSDS=3
113
114 run_mon $dir a || return 1
115 run_mgr $dir x || return 1
116 export CEPH_ARGS
117
118 for osd in $(seq 0 $(expr $OSDS - 1))
119 do
120 run_osd $dir $osd || return 1
121 done
122
123 ceph osd set-backfillfull-ratio .85
124
125 for p in $(seq 1 $pools)
126 do
127 create_pool "${poolprefix}$p" 1 1
128 ceph osd pool set "${poolprefix}$p" size 1 --yes-i-really-mean-it
129 done
130
131 wait_for_clean || return 1
132
133 # This won't work is if the 2 pools primary and only osds
134 # are the same.
135
136 dd if=/dev/urandom of=$dir/datafile bs=1024 count=4
137 for o in $(seq 1 $objects)
138 do
139 for p in $(seq 1 $pools)
140 do
141 rados -p "${poolprefix}$p" put obj$o $dir/datafile
142 done
143 done
144
145 ceph pg dump pgs
146
147 for p in $(seq 1 $pools)
148 do
149 ceph osd pool set "${poolprefix}$p" size 2
150 done
151 sleep 30
152
153 wait_for_not_backfilling 1200 || return 1
154 wait_for_not_activating 60 || return 1
155
156 ERRORS=0
157 if [ "$(ceph pg dump pgs | grep +backfill_toofull | wc -l)" != "1" ];
158 then
159 echo "One pool should have been in backfill_toofull"
160 ERRORS="$(expr $ERRORS + 1)"
161 fi
162
163 expected="$(expr $pools - 1)"
164 if [ "$(ceph pg dump pgs | grep active+clean | wc -l)" != "$expected" ];
165 then
166 echo "$expected didn't finish backfill"
167 ERRORS="$(expr $ERRORS + 1)"
168 fi
169
170 ceph pg dump pgs
171
172 if [ $ERRORS != "0" ];
173 then
174 return 1
175 fi
176
177 for i in $(seq 1 $pools)
178 do
179 delete_pool "${poolprefix}$i"
180 done
181 kill_daemons $dir || return 1
182 ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1
183 }
184
185
186 # Create 8 pools of size 1 on 20 OSDs
187 # Write 4K * 600 objects (only 1 pool pg can fit on any given osd)
188 # Increase pool size to 2
189 # At least 1 pool shouldn't have room to backfill
190 # All other pools should go active+clean
191 function TEST_backfill_test_multi() {
192 local dir=$1
193 local pools=8
194 local OSDS=20
195
196 run_mon $dir a || return 1
197 run_mgr $dir x || return 1
198 export CEPH_ARGS
199
200 for osd in $(seq 0 $(expr $OSDS - 1))
201 do
202 run_osd $dir $osd || return 1
203 done
204
205 ceph osd set-backfillfull-ratio .85
206
207 for p in $(seq 1 $pools)
208 do
209 create_pool "${poolprefix}$p" 1 1
210 ceph osd pool set "${poolprefix}$p" size 1 --yes-i-really-mean-it
211 done
212
213 wait_for_clean || return 1
214
215 dd if=/dev/urandom of=$dir/datafile bs=1024 count=4
216 for o in $(seq 1 $objects)
217 do
218 for p in $(seq 1 $pools)
219 do
220 rados -p "${poolprefix}$p" put obj$o $dir/datafile
221 done
222 done
223
224 ceph pg dump pgs
225
226 for p in $(seq 1 $pools)
227 do
228 ceph osd pool set "${poolprefix}$p" size 2
229 done
230 sleep 30
231
232 wait_for_not_backfilling 1200 || return 1
233 wait_for_not_activating 60 || return 1
234
235 ERRORS=0
236 full="$(ceph pg dump pgs | grep +backfill_toofull | wc -l)"
237 if [ "$full" -lt "1" ];
238 then
239 echo "At least one pool should have been in backfill_toofull"
240 ERRORS="$(expr $ERRORS + 1)"
241 fi
242
243 expected="$(expr $pools - $full)"
244 if [ "$(ceph pg dump pgs | grep active+clean | wc -l)" != "$expected" ];
245 then
246 echo "$expected didn't finish backfill"
247 ERRORS="$(expr $ERRORS + 1)"
248 fi
249
250 ceph pg dump pgs
251 ceph status
252
253 ceph status --format=json-pretty > $dir/stat.json
254
255 eval SEV=$(jq '.health.checks.PG_BACKFILL_FULL.severity' $dir/stat.json)
256 if [ "$SEV" != "HEALTH_WARN" ]; then
257 echo "PG_BACKFILL_FULL severity $SEV not HEALTH_WARN"
258 ERRORS="$(expr $ERRORS + 1)"
259 fi
260 eval MSG=$(jq '.health.checks.PG_BACKFILL_FULL.summary.message' $dir/stat.json)
261 if [ "$MSG" != "Low space hindering backfill (add storage if this doesn't resolve itself): 4 pgs backfill_toofull" ]; then
262 echo "PG_BACKFILL_FULL message '$MSG' mismatched"
263 ERRORS="$(expr $ERRORS + 1)"
264 fi
265 rm -f $dir/stat.json
266
267 if [ $ERRORS != "0" ];
268 then
269 return 1
270 fi
271
272 for i in $(seq 1 $pools)
273 do
274 delete_pool "${poolprefix}$i"
275 done
276 # Work around for http://tracker.ceph.com/issues/38195
277 kill_daemons $dir #|| return 1
278 ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1
279 }
280
281
282 # To make sure that when 2 pg try to backfill at the same time to
283 # the same target. This might be covered by the simple test above
284 # but this makes sure we get it.
285 #
286 # Create 10 pools of size 2 and identify 2 that have the same
287 # non-primary osd.
288 # Delete all other pools
289 # Set size to 1 and write 4K * 600 to each pool
290 # Set size back to 2
291 # The 2 pools should race to backfill.
292 # One pool goes active+clean
293 # The other goes acitve+...+backfill_toofull
294 function TEST_backfill_test_sametarget() {
295 local dir=$1
296 local pools=10
297 local OSDS=5
298
299 run_mon $dir a || return 1
300 run_mgr $dir x || return 1
301 export CEPH_ARGS
302
303 for osd in $(seq 0 $(expr $OSDS - 1))
304 do
305 run_osd $dir $osd || return 1
306 done
307
308 ceph osd set-backfillfull-ratio .85
309
310 for p in $(seq 1 $pools)
311 do
312 create_pool "${poolprefix}$p" 1 1
313 ceph osd pool set "${poolprefix}$p" size 2
314 done
315 sleep 5
316
317 wait_for_clean || return 1
318
319 ceph pg dump pgs
320
321 # Find 2 pools with a pg that distinct primaries but second
322 # replica on the same osd.
323 local PG1
324 local POOLNUM1
325 local pool1
326 local chk_osd1
327 local chk_osd2
328
329 local PG2
330 local POOLNUM2
331 local pool2
332 for p in $(seq 1 $pools)
333 do
334 ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting
335 local test_osd1=$(head -1 $dir/acting)
336 local test_osd2=$(tail -1 $dir/acting)
337 if [ $p = "1" ];
338 then
339 PG1="${p}.0"
340 POOLNUM1=$p
341 pool1="${poolprefix}$p"
342 chk_osd1=$test_osd1
343 chk_osd2=$test_osd2
344 elif [ $chk_osd1 != $test_osd1 -a $chk_osd2 = $test_osd2 ];
345 then
346 PG2="${p}.0"
347 POOLNUM2=$p
348 pool2="${poolprefix}$p"
349 break
350 fi
351 done
352 rm -f $dir/acting
353
354 if [ "$pool2" = "" ];
355 then
356 echo "Failure to find appropirate PGs"
357 return 1
358 fi
359
360 for p in $(seq 1 $pools)
361 do
362 if [ $p != $POOLNUM1 -a $p != $POOLNUM2 ];
363 then
364 delete_pool ${poolprefix}$p
365 fi
366 done
367
368 ceph osd pool set $pool1 size 1 --yes-i-really-mean-it
369 ceph osd pool set $pool2 size 1 --yes-i-really-mean-it
370
371 wait_for_clean || return 1
372
373 dd if=/dev/urandom of=$dir/datafile bs=1024 count=4
374 for i in $(seq 1 $objects)
375 do
376 rados -p $pool1 put obj$i $dir/datafile
377 rados -p $pool2 put obj$i $dir/datafile
378 done
379
380 ceph osd pool set $pool1 size 2
381 ceph osd pool set $pool2 size 2
382 sleep 30
383
384 wait_for_not_backfilling 1200 || return 1
385 wait_for_not_activating 60 || return 1
386
387 ERRORS=0
388 if [ "$(ceph pg dump pgs | grep +backfill_toofull | wc -l)" != "1" ];
389 then
390 echo "One pool should have been in backfill_toofull"
391 ERRORS="$(expr $ERRORS + 1)"
392 fi
393
394 if [ "$(ceph pg dump pgs | grep active+clean | wc -l)" != "1" ];
395 then
396 echo "One didn't finish backfill"
397 ERRORS="$(expr $ERRORS + 1)"
398 fi
399
400 ceph pg dump pgs
401
402 if [ $ERRORS != "0" ];
403 then
404 return 1
405 fi
406
407 delete_pool $pool1
408 delete_pool $pool2
409 kill_daemons $dir || return 1
410 ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1
411 }
412
413 # 2 pools can't both backfill to a target which has other data
414 # 1 of the pools has objects that increase from 1024 to 2611 bytes
415 #
416 # Write to fill pool which is size 1
417 # Take fill pool osd down (other 2 pools must go to the remaining OSDs
418 # Save an export of data on fill OSD and restart it
419 # Write an intial 1K to pool1 which has pg 2.0
420 # Export 2.0 from non-fillpool OSD don't wait for it to start-up
421 # Take down fillpool OSD
422 # Put 1K object version of 2.0 on fillpool OSD
423 # Put back fillpool data on fillpool OSD
424 # With fillpool down write 2611 byte objects
425 # Take down $osd and bring back $fillosd simultaneously
426 # Wait for backfilling
427 # One PG will be able to backfill its remaining data
428 # One PG must get backfill_toofull
429 function TEST_backfill_multi_partial() {
430 local dir=$1
431 local EC=$2
432 local pools=2
433 local OSDS=3
434
435 run_mon $dir a || return 1
436 run_mgr $dir x || return 1
437 export CEPH_ARGS
438
439 for osd in $(seq 0 $(expr $OSDS - 1))
440 do
441 run_osd $dir $osd || return 1
442 done
443
444 ceph osd set-backfillfull-ratio .85
445
446 ceph osd set-require-min-compat-client luminous
447 create_pool fillpool 1 1
448 ceph osd pool set fillpool size 1 --yes-i-really-mean-it
449 for p in $(seq 1 $pools)
450 do
451 create_pool "${poolprefix}$p" 1 1
452 ceph osd pool set "${poolprefix}$p" size 2
453 done
454
455 wait_for_clean || return 1
456
457 # Partially fill an osd
458 # We have room for 600 6K replicated objects, if we create 2611 byte objects
459 # there is 3600K - (2611 * 600) = 2070K, so the fill pool and one
460 # replica from the other 2 is 85% of 3600K
461
462 dd if=/dev/urandom of=$dir/datafile bs=2611 count=1
463 for o in $(seq 1 $objects)
464 do
465 rados -p fillpool put obj-fill-${o} $dir/datafile
466 done
467
468 local fillosd=$(get_primary fillpool obj-fill-1)
469 osd=$(expr $fillosd + 1)
470 if [ "$osd" = "$OSDS" ]; then
471 osd="0"
472 fi
473
474 kill_daemon $dir/osd.$fillosd.pid TERM
475 ceph osd out osd.$fillosd
476
477 _objectstore_tool_nodown $dir $fillosd --op export-remove --pgid 1.0 --file $dir/fillexport.out || return 1
478 activate_osd $dir $fillosd || return 1
479
480 ceph pg dump pgs
481
482 dd if=/dev/urandom of=$dir/datafile bs=1024 count=1
483 for o in $(seq 1 $objects)
484 do
485 rados -p "${poolprefix}1" put obj-1-${o} $dir/datafile
486 done
487
488 ceph pg dump pgs
489 # The $osd OSD is started, but we don't wait so we can kill $fillosd at the same time
490 _objectstore_tool_nowait $dir $osd --op export --pgid 2.0 --file $dir/export.out
491 kill_daemon $dir/osd.$fillosd.pid TERM
492 _objectstore_tool_nodown $dir $fillosd --force --op remove --pgid 2.0
493 _objectstore_tool_nodown $dir $fillosd --op import --pgid 2.0 --file $dir/export.out || return 1
494 _objectstore_tool_nodown $dir $fillosd --op import --pgid 1.0 --file $dir/fillexport.out || return 1
495 ceph pg dump pgs
496 sleep 20
497 ceph pg dump pgs
498
499 # re-write everything
500 dd if=/dev/urandom of=$dir/datafile bs=2611 count=1
501 for o in $(seq 1 $objects)
502 do
503 for p in $(seq 1 $pools)
504 do
505 rados -p "${poolprefix}$p" put obj-${p}-${o} $dir/datafile
506 done
507 done
508
509 kill_daemon $dir/osd.$osd.pid TERM
510 ceph osd out osd.$osd
511
512 activate_osd $dir $fillosd || return 1
513 ceph osd in osd.$fillosd
514 sleep 30
515
516 wait_for_not_backfilling 1200 || return 1
517 wait_for_not_activating 60 || return 1
518
519 flush_pg_stats || return 1
520 ceph pg dump pgs
521
522 ERRORS=0
523 if [ "$(get_num_in_state backfill_toofull)" != "1" ];
524 then
525 echo "One PG should be in backfill_toofull"
526 ERRORS="$(expr $ERRORS + 1)"
527 fi
528
529 if [ "$(get_num_in_state active+clean)" != "2" ];
530 then
531 echo "Two PGs should be active+clean after one PG completed backfill"
532 ERRORS="$(expr $ERRORS + 1)"
533 fi
534
535 if [ $ERRORS != "0" ];
536 then
537 return 1
538 fi
539
540 delete_pool fillpool
541 for i in $(seq 1 $pools)
542 do
543 delete_pool "${poolprefix}$i"
544 done
545 kill_daemons $dir || return 1
546 ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1
547 }
548
549 # Make sure that the amount of bytes already on the replica doesn't
550 # cause an out of space condition
551 #
552 # Create 1 pool and write 4K * 600 objects
553 # Remove 25% (150) of the objects with one OSD down (noout set)
554 # Increase the size of the remaining 75% (450) of the objects to 6K
555 # Bring back down OSD
556 # The pool should go active+clean
557 function TEST_backfill_grow() {
558 local dir=$1
559 local poolname="test"
560 local OSDS=3
561
562 run_mon $dir a || return 1
563 run_mgr $dir x || return 1
564
565 for osd in $(seq 0 $(expr $OSDS - 1))
566 do
567 run_osd $dir $osd || return 1
568 done
569
570 ceph osd set-backfillfull-ratio .85
571
572 create_pool $poolname 1 1
573 ceph osd pool set $poolname size 3
574 sleep 5
575
576 wait_for_clean || return 1
577
578 dd if=/dev/urandom of=${dir}/4kdata bs=1k count=4
579 for i in $(seq 1 $objects)
580 do
581 rados -p $poolname put obj$i $dir/4kdata
582 done
583
584 local PG=$(get_pg $poolname obj1)
585 # Remember primary during the backfill
586 local primary=$(get_primary $poolname obj1)
587 local otherosd=$(get_not_primary $poolname obj1)
588
589 ceph osd set noout
590 kill_daemons $dir TERM $otherosd || return 1
591
592 rmobjects=$(expr $objects / 4)
593 for i in $(seq 1 $rmobjects)
594 do
595 rados -p $poolname rm obj$i
596 done
597
598 dd if=/dev/urandom of=${dir}/6kdata bs=6k count=1
599 for i in $(seq $(expr $rmobjects + 1) $objects)
600 do
601 rados -p $poolname put obj$i $dir/6kdata
602 done
603
604 activate_osd $dir $otherosd || return 1
605
606 ceph tell osd.$primary debug kick_recovery_wq 0
607
608 sleep 2
609
610 wait_for_clean || return 1
611
612 delete_pool $poolname
613 kill_daemons $dir || return 1
614 ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1
615 }
616
617 # Create a 5 shard EC pool on 6 OSD cluster
618 # Fill 1 OSD with 2600K of data take that osd down.
619 # Write the EC pool on 5 OSDs
620 # Take down 1 (must contain an EC shard)
621 # Bring up OSD with fill data
622 # Not enought room to backfill to partially full OSD
623 function TEST_ec_backfill_simple() {
624 local dir=$1
625 local EC=$2
626 local pools=1
627 local OSDS=6
628 local k=3
629 local m=2
630 local ecobjects=$(expr $objects / $k)
631
632 run_mon $dir a || return 1
633 run_mgr $dir x || return 1
634 export CEPH_ARGS
635
636 for osd in $(seq 0 $(expr $OSDS - 1))
637 do
638 run_osd $dir $osd || return 1
639 done
640
641 ceph osd set-backfillfull-ratio .85
642 create_pool fillpool 1 1
643 ceph osd pool set fillpool size 1 --yes-i-really-mean-it
644
645 # Partially fill an osd
646 # We have room for 200 18K replicated objects, if we create 13K objects
647 # there is only 3600K - (13K * 200) = 1000K which won't hold
648 # a k=3 shard below ((18K / 3) + 4K) * 200 = 2000K
649 # Actual usage per shard is 8K * 200 = 1600K because 18K/3 is 6K which
650 # rounds to 8K. The 2000K is the ceiling on the 18K * 200 = 3600K logical
651 # bytes in the pool.
652 dd if=/dev/urandom of=$dir/datafile bs=1024 count=13
653 for o in $(seq 1 $ecobjects)
654 do
655 rados -p fillpool put obj$o $dir/datafile
656 done
657
658 local fillosd=$(get_primary fillpool obj1)
659 osd=$(expr $fillosd + 1)
660 if [ "$osd" = "$OSDS" ]; then
661 osd="0"
662 fi
663
664 sleep 5
665 kill_daemon $dir/osd.$fillosd.pid TERM
666 ceph osd out osd.$fillosd
667 sleep 2
668 ceph osd erasure-code-profile set ec-profile k=$k m=$m crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1
669
670 for p in $(seq 1 $pools)
671 do
672 ceph osd pool create "${poolprefix}$p" 1 1 erasure ec-profile
673 done
674
675 # Can't wait for clean here because we created a stale pg
676 #wait_for_clean || return 1
677 sleep 5
678
679 ceph pg dump pgs
680
681 dd if=/dev/urandom of=$dir/datafile bs=1024 count=18
682 for o in $(seq 1 $ecobjects)
683 do
684 for p in $(seq 1 $pools)
685 do
686 rados -p "${poolprefix}$p" put obj$o $dir/datafile
687 done
688 done
689
690 kill_daemon $dir/osd.$osd.pid TERM
691 ceph osd out osd.$osd
692
693 activate_osd $dir $fillosd || return 1
694 ceph osd in osd.$fillosd
695 sleep 30
696
697 ceph pg dump pgs
698
699 wait_for_not_backfilling 1200 || return 1
700 wait_for_not_activating 60 || return 1
701
702 ceph pg dump pgs
703
704 ERRORS=0
705 if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep +backfill_toofull | wc -l)" != "1" ]; then
706 echo "One pool should have been in backfill_toofull"
707 ERRORS="$(expr $ERRORS + 1)"
708 fi
709
710 if [ $ERRORS != "0" ];
711 then
712 return 1
713 fi
714
715 delete_pool fillpool
716 for i in $(seq 1 $pools)
717 do
718 delete_pool "${poolprefix}$i"
719 done
720 kill_daemons $dir || return 1
721 }
722
723 function osdlist() {
724 local OSDS=$1
725 local excludeosd=$2
726
727 osds=""
728 for osd in $(seq 0 $(expr $OSDS - 1))
729 do
730 if [ $osd = $excludeosd ];
731 then
732 continue
733 fi
734 if [ -n "$osds" ]; then
735 osds="${osds} "
736 fi
737 osds="${osds}${osd}"
738 done
739 echo $osds
740 }
741
742 # Create a pool with size 1 and fill with data so that only 1 EC shard can fit.
743 # Write data to 2 EC pools mapped to the same OSDs (excluding filled one)
744 # Remap the last OSD to partially full OSD on both pools
745 # The 2 pools should race to backfill.
746 # One pool goes active+clean
747 # The other goes acitve+...+backfill_toofull
748 function TEST_ec_backfill_multi() {
749 local dir=$1
750 local EC=$2
751 local pools=2
752 local OSDS=6
753 local k=3
754 local m=2
755 local ecobjects=$(expr $objects / $k)
756
757 run_mon $dir a || return 1
758 run_mgr $dir x || return 1
759 export CEPH_ARGS
760
761 for osd in $(seq 0 $(expr $OSDS - 1))
762 do
763 run_osd $dir $osd || return 1
764 done
765
766 # This test requires that shards from 2 different pools
767 # fit on a given OSD, but both will not fix. I'm using
768 # making the fillosd plus 1 shard use 75% of the space,
769 # leaving not enough to be under the 85% set here.
770 ceph osd set-backfillfull-ratio .85
771
772 ceph osd set-require-min-compat-client luminous
773 create_pool fillpool 1 1
774 ceph osd pool set fillpool size 1 --yes-i-really-mean-it
775
776 # Partially fill an osd
777 # We have room for 200 18K replicated objects, if we create 9K objects
778 # there is only 3600K - (9K * 200) = 1800K which will only hold
779 # one k=3 shard below ((12K / 3) + 4K) * 200 = 1600K
780 # The actual data will be (12K / 3) * 200 = 800K because the extra
781 # is the reservation padding for chunking.
782 dd if=/dev/urandom of=$dir/datafile bs=1024 count=9
783 for o in $(seq 1 $ecobjects)
784 do
785 rados -p fillpool put obj$o $dir/datafile
786 done
787
788 local fillosd=$(get_primary fillpool obj1)
789 ceph osd erasure-code-profile set ec-profile k=3 m=2 crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1
790
791 nonfillosds="$(osdlist $OSDS $fillosd)"
792
793 for p in $(seq 1 $pools)
794 do
795 ceph osd pool create "${poolprefix}$p" 1 1 erasure ec-profile
796 ceph osd pg-upmap "$(expr $p + 1).0" $nonfillosds
797 done
798
799 # Can't wait for clean here because we created a stale pg
800 #wait_for_clean || return 1
801 sleep 15
802
803 ceph pg dump pgs
804
805 dd if=/dev/urandom of=$dir/datafile bs=1024 count=12
806 for o in $(seq 1 $ecobjects)
807 do
808 for p in $(seq 1 $pools)
809 do
810 rados -p "${poolprefix}$p" put obj$o-$p $dir/datafile
811 done
812 done
813
814 ceph pg dump pgs
815
816 for p in $(seq 1 $pools)
817 do
818 ceph osd pg-upmap $(expr $p + 1).0 ${nonfillosds% *} $fillosd
819 done
820
821 sleep 30
822
823 wait_for_not_backfilling 1200 || return 1
824 wait_for_not_activating 60 || return 1
825
826 ceph pg dump pgs
827
828 ERRORS=0
829 if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep +backfill_toofull | wc -l)" != "1" ];
830 then
831 echo "One pool should have been in backfill_toofull"
832 ERRORS="$(expr $ERRORS + 1)"
833 fi
834
835 if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep active+clean | wc -l)" != "1" ];
836 then
837 echo "One didn't finish backfill"
838 ERRORS="$(expr $ERRORS + 1)"
839 fi
840
841 if [ $ERRORS != "0" ];
842 then
843 return 1
844 fi
845
846 delete_pool fillpool
847 for i in $(seq 1 $pools)
848 do
849 delete_pool "${poolprefix}$i"
850 done
851 kill_daemons $dir || return 1
852 }
853
854 # Similar to TEST_ec_backfill_multi but one of the ec pools
855 # already had some data on the target OSD
856
857 # Create a pool with size 1 and fill with data so that only 1 EC shard can fit.
858 # Write a small amount of data to 1 EC pool that still includes the filled one
859 # Take down fillosd with noout set
860 # Write data to 2 EC pools mapped to the same OSDs (excluding filled one)
861 # Remap the last OSD to partially full OSD on both pools
862 # The 2 pools should race to backfill.
863 # One pool goes active+clean
864 # The other goes acitve+...+backfill_toofull
865 function SKIP_TEST_ec_backfill_multi_partial() {
866 local dir=$1
867 local EC=$2
868 local pools=2
869 local OSDS=5
870 local k=3
871 local m=2
872 local ecobjects=$(expr $objects / $k)
873 local lastosd=$(expr $OSDS - 1)
874
875 run_mon $dir a || return 1
876 run_mgr $dir x || return 1
877 export CEPH_ARGS
878
879 for osd in $(seq 0 $(expr $OSDS - 1))
880 do
881 run_osd $dir $osd || return 1
882 done
883
884 # This test requires that shards from 2 different pools
885 # fit on a given OSD, but both will not fix. I'm using
886 # making the fillosd plus 1 shard use 75% of the space,
887 # leaving not enough to be under the 85% set here.
888 ceph osd set-backfillfull-ratio .85
889
890 ceph osd set-require-min-compat-client luminous
891 create_pool fillpool 1 1
892 ceph osd pool set fillpool size 1 --yes-i-really-mean-it
893 # last osd
894 ceph osd pg-upmap 1.0 $lastosd
895
896 # Partially fill an osd
897 # We have room for 200 18K replicated objects, if we create 9K objects
898 # there is only 3600K - (9K * 200) = 1800K which will only hold
899 # one k=3 shard below ((12K / 3) + 4K) * 200 = 1600K
900 # The actual data will be (12K / 3) * 200 = 800K because the extra
901 # is the reservation padding for chunking.
902 dd if=/dev/urandom of=$dir/datafile bs=1024 count=9
903 for o in $(seq 1 $ecobjects)
904 do
905 rados -p fillpool put obj$o $dir/datafile
906 done
907
908 local fillosd=$(get_primary fillpool obj1)
909 ceph osd erasure-code-profile set ec-profile k=3 m=2 crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1
910
911 nonfillosds="$(osdlist $OSDS $fillosd)"
912
913 for p in $(seq 1 $pools)
914 do
915 ceph osd pool create "${poolprefix}$p" 1 1 erasure ec-profile
916 ceph osd pg-upmap "$(expr $p + 1).0" $(seq 0 $lastosd)
917 done
918
919 # Can't wait for clean here because we created a stale pg
920 #wait_for_clean || return 1
921 sleep 15
922
923 ceph pg dump pgs
924
925 dd if=/dev/urandom of=$dir/datafile bs=1024 count=1
926 for o in $(seq 1 $ecobjects)
927 do
928 rados -p "${poolprefix}1" put obj$o-1 $dir/datafile
929 done
930
931 for p in $(seq 1 $pools)
932 do
933 ceph osd pg-upmap "$(expr $p + 1).0" $(seq 0 $(expr $lastosd - 1))
934 done
935 ceph pg dump pgs
936
937 #ceph osd set noout
938 #kill_daemons $dir TERM osd.$lastosd || return 1
939
940 dd if=/dev/urandom of=$dir/datafile bs=1024 count=12
941 for o in $(seq 1 $ecobjects)
942 do
943 for p in $(seq 1 $pools)
944 do
945 rados -p "${poolprefix}$p" put obj$o-$p $dir/datafile
946 done
947 done
948
949 ceph pg dump pgs
950
951 # Now backfill lastosd by adding back into the upmap
952 for p in $(seq 1 $pools)
953 do
954 ceph osd pg-upmap "$(expr $p + 1).0" $(seq 0 $lastosd)
955 done
956 #activate_osd $dir $lastosd || return 1
957 #ceph tell osd.0 debug kick_recovery_wq 0
958
959 sleep 30
960 ceph pg dump pgs
961
962 wait_for_not_backfilling 1200 || return 1
963 wait_for_not_activating 60 || return 1
964
965 ceph pg dump pgs
966
967 ERRORS=0
968 if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep +backfill_toofull | wc -l)" != "1" ];
969 then
970 echo "One pool should have been in backfill_toofull"
971 ERRORS="$(expr $ERRORS + 1)"
972 fi
973
974 if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep active+clean | wc -l)" != "1" ];
975 then
976 echo "One didn't finish backfill"
977 ERRORS="$(expr $ERRORS + 1)"
978 fi
979
980 if [ $ERRORS != "0" ];
981 then
982 return 1
983 fi
984
985 delete_pool fillpool
986 for i in $(seq 1 $pools)
987 do
988 delete_pool "${poolprefix}$i"
989 done
990 kill_daemons $dir || return 1
991 }
992
993 function SKIP_TEST_ec_backfill_multi_partial() {
994 local dir=$1
995 local EC=$2
996 local pools=2
997 local OSDS=6
998
999 run_mon $dir a || return 1
1000 run_mgr $dir x || return 1
1001 export CEPH_ARGS
1002
1003 for osd in $(seq 0 $(expr $OSDS - 1))
1004 do
1005 run_osd $dir $osd || return 1
1006 done
1007
1008 # Below we need to fit 3200K in 3600K which is 88%
1009 # so set to 90%
1010 ceph osd set-backfillfull-ratio .90
1011
1012 ceph osd set-require-min-compat-client luminous
1013 create_pool fillpool 1 1
1014 ceph osd pool set fillpool size 1 --yes-i-really-mean-it
1015
1016 # Partially fill an osd
1017 # We have room for 200 48K ec objects, if we create 4k replicated objects
1018 # there is 3600K - (4K * 200) = 2800K which won't hold 2 k=3 shard
1019 # of 200 12K objects which takes ((12K / 3) + 4K) * 200 = 1600K each.
1020 # On the other OSDs 2 * 1600K = 3200K which is 88% of 3600K.
1021 dd if=/dev/urandom of=$dir/datafile bs=1024 count=4
1022 for o in $(seq 1 $objects)
1023 do
1024 rados -p fillpool put obj$o $dir/datafile
1025 done
1026
1027 local fillosd=$(get_primary fillpool obj1)
1028 osd=$(expr $fillosd + 1)
1029 if [ "$osd" = "$OSDS" ]; then
1030 osd="0"
1031 fi
1032
1033 sleep 5
1034 kill_daemon $dir/osd.$fillosd.pid TERM
1035 ceph osd out osd.$fillosd
1036 sleep 2
1037 ceph osd erasure-code-profile set ec-profile k=3 m=2 crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1
1038
1039 for p in $(seq 1 $pools)
1040 do
1041 ceph osd pool create "${poolprefix}$p" 1 1 erasure ec-profile
1042 done
1043
1044 # Can't wait for clean here because we created a stale pg
1045 #wait_for_clean || return 1
1046 sleep 5
1047
1048 ceph pg dump pgs
1049
1050 dd if=/dev/urandom of=$dir/datafile bs=1024 count=12
1051 for o in $(seq 1 $objects)
1052 do
1053 for p in $(seq 1 $pools)
1054 do
1055 rados -p "${poolprefix}$p" put obj$o $dir/datafile
1056 done
1057 done
1058
1059 #ceph pg map 2.0 --format=json | jq '.'
1060 kill_daemon $dir/osd.$osd.pid TERM
1061 ceph osd out osd.$osd
1062
1063 _objectstore_tool_nodown $dir $osd --op export --pgid 2.0 --file $dir/export.out
1064 _objectstore_tool_nodown $dir $fillosd --op import --pgid 2.0 --file $dir/export.out
1065
1066 activate_osd $dir $fillosd || return 1
1067 ceph osd in osd.$fillosd
1068 sleep 30
1069
1070 wait_for_not_backfilling 1200 || return 1
1071 wait_for_not_activating 60 || return 1
1072
1073 ERRORS=0
1074 if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep +backfill_toofull | wc -l)" != "1" ];
1075 then
1076 echo "One pool should have been in backfill_toofull"
1077 ERRORS="$(expr $ERRORS + 1)"
1078 fi
1079
1080 if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep active+clean | wc -l)" != "1" ];
1081 then
1082 echo "One didn't finish backfill"
1083 ERRORS="$(expr $ERRORS + 1)"
1084 fi
1085
1086 ceph pg dump pgs
1087
1088 if [ $ERRORS != "0" ];
1089 then
1090 return 1
1091 fi
1092
1093 delete_pool fillpool
1094 for i in $(seq 1 $pools)
1095 do
1096 delete_pool "${poolprefix}$i"
1097 done
1098 kill_daemons $dir || return 1
1099 }
1100
1101 # Create 1 EC pool
1102 # Write 200 12K objects ((12K / 3) + 4K) *200) = 1600K
1103 # Take 1 shard's OSD down (with noout set)
1104 # Remove 50 objects ((12K / 3) + 4k) * 50) = 400K
1105 # Write 150 36K objects (grow 150 objects) 2400K
1106 # But there is already 1600K usage so backfill
1107 # would be too full if it didn't account for existing data
1108 # Bring back down OSD so it must backfill
1109 # It should go active+clean taking into account data already there
1110 function TEST_ec_backfill_grow() {
1111 local dir=$1
1112 local poolname="test"
1113 local OSDS=6
1114 local k=3
1115 local m=2
1116 local ecobjects=$(expr $objects / $k)
1117
1118 run_mon $dir a || return 1
1119 run_mgr $dir x || return 1
1120
1121 for osd in $(seq 0 $(expr $OSDS - 1))
1122 do
1123 run_osd $dir $osd || return 1
1124 done
1125
1126 ceph osd set-backfillfull-ratio .85
1127
1128 ceph osd set-require-min-compat-client luminous
1129 ceph osd erasure-code-profile set ec-profile k=$k m=$m crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1
1130 ceph osd pool create $poolname 1 1 erasure ec-profile
1131
1132 wait_for_clean || return 1
1133
1134 dd if=/dev/urandom of=${dir}/12kdata bs=1k count=12
1135 for i in $(seq 1 $ecobjects)
1136 do
1137 rados -p $poolname put obj$i $dir/12kdata
1138 done
1139
1140 local PG=$(get_pg $poolname obj1)
1141 # Remember primary during the backfill
1142 local primary=$(get_primary $poolname obj1)
1143 local otherosd=$(get_not_primary $poolname obj1)
1144
1145 ceph osd set noout
1146 kill_daemons $dir TERM $otherosd || return 1
1147
1148 rmobjects=$(expr $ecobjects / 4)
1149 for i in $(seq 1 $rmobjects)
1150 do
1151 rados -p $poolname rm obj$i
1152 done
1153
1154 dd if=/dev/urandom of=${dir}/36kdata bs=1k count=36
1155 for i in $(seq $(expr $rmobjects + 1) $ecobjects)
1156 do
1157 rados -p $poolname put obj$i $dir/36kdata
1158 done
1159
1160 activate_osd $dir $otherosd || return 1
1161
1162 ceph tell osd.$primary debug kick_recovery_wq 0
1163
1164 sleep 2
1165
1166 wait_for_clean || return 1
1167
1168 delete_pool $poolname
1169 kill_daemons $dir || return 1
1170 }
1171
1172 main osd-backfill-space "$@"
1173
1174 # Local Variables:
1175 # compile-command: "make -j4 && ../qa/run-standalone.sh osd-backfill-space.sh"
1176 # End: