ceph/qa/standalone/osd/osd-backfill-space.sh

   1 #!/usr/bin/env bash
   2 #
   3 # Copyright (C) 2018 Red Hat <contact@redhat.com>
   4 #
   5 # Author: David Zafman <dzafman@redhat.com>
   6 #
   7 # This program is free software; you can redistribute it and/or modify
   8 # it under the terms of the GNU Library Public License as published by
   9 # the Free Software Foundation; either version 2, or (at your option)
  10 # any later version.
  11 #
  12 # This program is distributed in the hope that it will be useful,
  13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 # GNU Library Public License for more details.
  16 #
  17
  18 source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
  19
  20 function run() {
  21     local dir=$1
  22     shift
  23
  24     export CEPH_MON="127.0.0.1:7180" # git grep '\<7180\>' : there must be only one
  25     export CEPH_ARGS
  26     CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
  27     CEPH_ARGS+="--mon-host=$CEPH_MON "
  28     CEPH_ARGS+="--osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10 "
  29     CEPH_ARGS+="--fake_statfs_for_testing=3686400 "
  30     CEPH_ARGS+="--osd_max_backfills=10 "
  31     export objects=600
  32     export poolprefix=test
  33
  34     local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
  35     for func in $funcs ; do
  36         setup $dir || return 1
  37         $func $dir || return 1
  38         teardown $dir || return 1
  39     done
  40 }
  41
  42
  43 function get_num_in_state() {
  44     local state=$1
  45     local expression
  46     expression+="select(contains(\"${state}\"))"
  47     ceph --format json pg dump pgs 2>/dev/null | \
  48         jq ".pg_stats | [.[] | .state | $expression] | length"
  49 }
  50
  51
  52 function wait_for_not_state() {
  53     local state=$1
  54     local num_in_state=-1
  55     local cur_in_state
  56     local -a delays=($(get_timeout_delays $2 5))
  57     local -i loop=0
  58
  59     flush_pg_stats || return 1
  60     while test $(get_num_pgs) == 0 ; do
  61         sleep 1
  62     done
  63
  64     while true ; do
  65         cur_in_state=$(get_num_in_state ${state})
  66         test $cur_in_state = "0" && break
  67         if test $cur_in_state != $num_in_state ; then
  68             loop=0
  69             num_in_state=$cur_in_state
  70         elif (( $loop >= ${#delays[*]} )) ; then
  71             ceph pg dump pgs
  72             return 1
  73         fi
  74         sleep ${delays[$loop]}
  75         loop+=1
  76     done
  77     return 0
  78 }
  79
  80
  81 function wait_for_not_backfilling() {
  82     local timeout=$1
  83     wait_for_not_state backfilling $timeout
  84 }
  85
  86
  87 function wait_for_not_activating() {
  88     local timeout=$1
  89     wait_for_not_state activating $timeout
  90 }
  91
  92 # All tests are created in an environment which has fake total space
  93 # of 3600K (3686400) which can hold 600 6K replicated objects or
  94 # 200 18K shards of erasure coded objects.  For a k=3, m=2 EC pool
  95 # we have a theoretical 54K object but with the chunk size of 4K
  96 # and a rounding of 4K to account for the chunks is 36K max object
  97 # which is ((36K / 3) + 4K) * 200  = 3200K which is 88% of
  98 # 3600K for a shard.
  99
 100 # Create 2 pools with size 1
 101 # Write enough data that only 1 pool pg can fit per osd
 102 # Incresase the pool size to 2
 103 # On 3 OSDs this should result in 1 OSD with overlapping replicas,
 104 # so both pools can't fit.  We assume pgid 1.0 and 2.0 won't
 105 # map to the same 2 OSDs.
 106 # At least 1 pool shouldn't have room to backfill
 107 # All other pools should go active+clean
 108 function TEST_backfill_test_simple() {
 109     local dir=$1
 110     local pools=2
 111     local OSDS=3
 112
 113     run_mon $dir a || return 1
 114     run_mgr $dir x || return 1
 115     export CEPH_ARGS
 116
 117     for osd in $(seq 0 $(expr $OSDS - 1))
 118     do
 119       run_osd $dir $osd || return 1
 120     done
 121
 122     ceph osd set-backfillfull-ratio .85
 123
 124     for p in $(seq 1 $pools)
 125     do
 126       create_pool "${poolprefix}$p" 1 1
 127       ceph osd pool set "${poolprefix}$p" size 1
 128     done
 129
 130     wait_for_clean || return 1
 131
 132     # This won't work is if the 2 pools primary and only osds
 133     # are the same.
 134
 135     dd if=/dev/urandom of=$dir/datafile bs=1024 count=4
 136     for o in $(seq 1 $objects)
 137     do
 138       for p in $(seq 1 $pools)
 139       do
 140         rados -p "${poolprefix}$p" put obj$o $dir/datafile
 141       done
 142     done
 143
 144     ceph pg dump pgs
 145
 146     for p in $(seq 1 $pools)
 147     do
 148       ceph osd pool set "${poolprefix}$p" size 2
 149     done
 150     sleep 5
 151
 152     wait_for_not_backfilling 240 || return 1
 153     wait_for_not_activating 60 || return 1
 154
 155     ERRORS=0
 156     if [ "$(ceph pg dump pgs | grep +backfill_toofull | wc -l)" != "1" ];
 157     then
 158       echo "One pool should have been in backfill_toofull"
 159       ERRORS="$(expr $ERRORS + 1)"
 160     fi
 161
 162     expected="$(expr $pools - 1)"
 163     if [ "$(ceph pg dump pgs | grep active+clean | wc -l)" != "$expected" ];
 164     then
 165       echo "$expected didn't finish backfill"
 166       ERRORS="$(expr $ERRORS + 1)"
 167     fi
 168
 169     ceph pg dump pgs
 170
 171     if [ $ERRORS != "0" ];
 172     then
 173       return 1
 174     fi
 175
 176     for i in $(seq 1 $pools)
 177     do
 178       delete_pool "${poolprefix}$i"
 179     done
 180     kill_daemons $dir || return 1
 181     ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1
 182 }
 183
 184
 185 # Create 8 pools of size 1 on 20 OSDs
 186 # Write 4K * 600 objects (only 1 pool pg can fit on any given osd)
 187 # Increase pool size to 2
 188 # At least 1 pool shouldn't have room to backfill
 189 # All other pools should go active+clean
 190 function TEST_backfill_test_multi() {
 191     local dir=$1
 192     local pools=8
 193     local OSDS=20
 194
 195     run_mon $dir a || return 1
 196     run_mgr $dir x || return 1
 197     export CEPH_ARGS
 198
 199     for osd in $(seq 0 $(expr $OSDS - 1))
 200     do
 201       run_osd $dir $osd || return 1
 202     done
 203
 204     ceph osd set-backfillfull-ratio .85
 205
 206     for p in $(seq 1 $pools)
 207     do
 208       create_pool "${poolprefix}$p" 1 1
 209       ceph osd pool set "${poolprefix}$p" size 1
 210     done
 211
 212     wait_for_clean || return 1
 213
 214     dd if=/dev/urandom of=$dir/datafile bs=1024 count=4
 215     for o in $(seq 1 $objects)
 216     do
 217       for p in $(seq 1 $pools)
 218       do
 219         rados -p "${poolprefix}$p" put obj$o $dir/datafile
 220       done
 221     done
 222
 223     ceph pg dump pgs
 224
 225     for p in $(seq 1 $pools)
 226     do
 227       ceph osd pool set "${poolprefix}$p" size 2
 228     done
 229     sleep 5
 230
 231     wait_for_not_backfilling 240 || return 1
 232     wait_for_not_activating 60 || return 1
 233
 234     ERRORS=0
 235     full="$(ceph pg dump pgs | grep +backfill_toofull | wc -l)"
 236     if [ "$full" -lt "1" ];
 237     then
 238       echo "At least one pool should have been in backfill_toofull"
 239       ERRORS="$(expr $ERRORS + 1)"
 240     fi
 241
 242     expected="$(expr $pools - $full)"
 243     if [ "$(ceph pg dump pgs | grep active+clean | wc -l)" != "$expected" ];
 244     then
 245       echo "$expected didn't finish backfill"
 246       ERRORS="$(expr $ERRORS + 1)"
 247     fi
 248
 249     ceph pg dump pgs
 250     ceph status
 251
 252     ceph status --format=json-pretty > $dir/stat.json
 253
 254     eval SEV=$(jq '.health.checks.PG_BACKFILL_FULL.severity' $dir/stat.json)
 255     if [ "$SEV" != "HEALTH_WARN" ]; then
 256       echo "PG_BACKFILL_FULL severity $SEV not HEALTH_WARN"
 257       ERRORS="$(expr $ERRORS + 1)"
 258     fi
 259     eval MSG=$(jq '.health.checks.PG_BACKFILL_FULL.summary.message' $dir/stat.json)
 260     if [ "$MSG" != "Low space hindering backfill (add storage if this doesn't resolve itself): 4 pgs backfill_toofull" ]; then
 261       echo "PG_BACKFILL_FULL message '$MSG' mismatched"
 262       ERRORS="$(expr $ERRORS + 1)"
 263     fi
 264     rm -f $dir/stat.json
 265
 266     if [ $ERRORS != "0" ];
 267     then
 268       return 1
 269     fi
 270
 271     for i in $(seq 1 $pools)
 272     do
 273       delete_pool "${poolprefix}$i"
 274     done
 275     # Work around for http://tracker.ceph.com/issues/38195
 276     kill_daemons $dir #|| return 1
 277     ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1
 278 }
 279
 280
 281 # To make sure that when 2 pg try to backfill at the same time to
 282 # the same target.  This might be covered by the simple test above
 283 # but this makes sure we get it.
 284 #
 285 # Create 10 pools of size 2 and identify 2 that have the same
 286 # non-primary osd.
 287 # Delete all other pools
 288 # Set size to 1 and write 4K * 600 to each pool
 289 # Set size back to 2
 290 # The 2 pools should race to backfill.
 291 # One pool goes active+clean
 292 # The other goes acitve+...+backfill_toofull
 293 function TEST_backfill_test_sametarget() {
 294     local dir=$1
 295     local pools=10
 296     local OSDS=5
 297
 298     run_mon $dir a || return 1
 299     run_mgr $dir x || return 1
 300     export CEPH_ARGS
 301
 302     for osd in $(seq 0 $(expr $OSDS - 1))
 303     do
 304       run_osd $dir $osd || return 1
 305     done
 306
 307     ceph osd set-backfillfull-ratio .85
 308
 309     for p in $(seq 1 $pools)
 310     do
 311       create_pool "${poolprefix}$p" 1 1
 312       ceph osd pool set "${poolprefix}$p" size 2
 313     done
 314     sleep 5
 315
 316     wait_for_clean || return 1
 317
 318     ceph pg dump pgs
 319
 320     # Find 2 pools with a pg that distinct primaries but second
 321     # replica on the same osd.
 322     local PG1
 323     local POOLNUM1
 324     local pool1
 325     local chk_osd1
 326     local chk_osd2
 327
 328     local PG2
 329     local POOLNUM2
 330     local pool2
 331     for p in $(seq 1 $pools)
 332     do
 333       ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting
 334       local test_osd1=$(head -1 $dir/acting)
 335       local test_osd2=$(tail -1 $dir/acting)
 336       if [ $p = "1" ];
 337       then
 338         PG1="${p}.0"
 339         POOLNUM1=$p
 340         pool1="${poolprefix}$p"
 341         chk_osd1=$test_osd1
 342         chk_osd2=$test_osd2
 343       elif [ $chk_osd1 != $test_osd1 -a $chk_osd2 = $test_osd2 ];
 344       then
 345         PG2="${p}.0"
 346         POOLNUM2=$p
 347         pool2="${poolprefix}$p"
 348         break
 349       fi
 350     done
 351     rm -f $dir/acting
 352
 353     if [ "$pool2" = "" ];
 354     then
 355       echo "Failure to find appropirate PGs"
 356       return 1
 357     fi
 358
 359     for p in $(seq 1 $pools)
 360     do
 361       if [ $p != $POOLNUM1 -a $p != $POOLNUM2 ];
 362       then
 363         delete_pool ${poolprefix}$p
 364       fi
 365     done
 366
 367     ceph osd pool set $pool1 size 1
 368     ceph osd pool set $pool2 size 1
 369
 370     wait_for_clean || return 1
 371
 372     dd if=/dev/urandom of=$dir/datafile bs=1024 count=4
 373     for i in $(seq 1 $objects)
 374     do
 375         rados -p $pool1 put obj$i $dir/datafile
 376         rados -p $pool2 put obj$i $dir/datafile
 377     done
 378
 379     ceph osd pool set $pool1 size 2
 380     ceph osd pool set $pool2 size 2
 381     sleep 5
 382
 383     wait_for_not_backfilling 240 || return 1
 384     wait_for_not_activating 60 || return 1
 385
 386     ERRORS=0
 387     if [ "$(ceph pg dump pgs | grep +backfill_toofull | wc -l)" != "1" ];
 388     then
 389       echo "One pool should have been in backfill_toofull"
 390       ERRORS="$(expr $ERRORS + 1)"
 391     fi
 392
 393     if [ "$(ceph pg dump pgs | grep active+clean | wc -l)" != "1" ];
 394     then
 395       echo "One didn't finish backfill"
 396       ERRORS="$(expr $ERRORS + 1)"
 397     fi
 398
 399     ceph pg dump pgs
 400
 401     if [ $ERRORS != "0" ];
 402     then
 403       return 1
 404     fi
 405
 406     delete_pool $pool1
 407     delete_pool $pool2
 408     kill_daemons $dir || return 1
 409     ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1
 410 }
 411
 412 # 2 pools can't both backfill to a target which has other data
 413 # 1 of the pools has objects that increase from 1024 to 2611 bytes
 414 #
 415 # Write to fill pool which is size 1
 416 # Take fill pool osd down (other 2 pools must go to the remaining OSDs
 417 # Save an export of data on fill OSD and restart it
 418 # Write an intial 1K to pool1 which has pg 2.0
 419 # Export 2.0 from non-fillpool OSD don't wait for it to start-up
 420 # Take down fillpool OSD
 421 # Put 1K object version of 2.0 on fillpool OSD
 422 # Put back fillpool data on fillpool OSD
 423 # With fillpool down write 2611 byte objects
 424 # Take down $osd and bring back $fillosd simultaneously
 425 # Wait for backfilling
 426 # One PG will be able to backfill its remaining data
 427 # One PG must get backfill_toofull
 428 function TEST_backfill_multi_partial() {
 429     local dir=$1
 430     local EC=$2
 431     local pools=2
 432     local OSDS=3
 433
 434     run_mon $dir a || return 1
 435     run_mgr $dir x || return 1
 436     export CEPH_ARGS
 437
 438     for osd in $(seq 0 $(expr $OSDS - 1))
 439     do
 440       run_osd $dir $osd || return 1
 441     done
 442
 443     ceph osd set-backfillfull-ratio .85
 444
 445     ceph osd set-require-min-compat-client luminous
 446     create_pool fillpool 1 1
 447     ceph osd pool set fillpool size 1
 448     for p in $(seq 1 $pools)
 449     do
 450       create_pool "${poolprefix}$p" 1 1
 451       ceph osd pool set "${poolprefix}$p" size 2
 452     done
 453
 454     wait_for_clean || return 1
 455
 456     # Partially fill an osd
 457     # We have room for 600 6K replicated objects, if we create 2611 byte objects
 458     # there is 3600K - (2611 * 600) = 2070K, so the fill pool and one
 459     # replica from the other 2 is 85% of 3600K
 460
 461     dd if=/dev/urandom of=$dir/datafile bs=2611 count=1
 462     for o in $(seq 1 $objects)
 463     do
 464       rados -p fillpool put obj-fill-${o} $dir/datafile
 465     done
 466
 467     local fillosd=$(get_primary fillpool obj-fill-1)
 468     osd=$(expr $fillosd + 1)
 469     if [ "$osd" = "$OSDS" ]; then
 470       osd="0"
 471     fi
 472
 473     sleep 5
 474     kill $(cat $dir/osd.$fillosd.pid)
 475     ceph osd out osd.$fillosd
 476     sleep 2
 477
 478     _objectstore_tool_nodown $dir $fillosd --op export-remove --pgid 1.0 --file $dir/fillexport.out || return 1
 479     activate_osd $dir $fillosd || return 1
 480
 481     ceph pg dump pgs
 482
 483     dd if=/dev/urandom of=$dir/datafile bs=1024 count=1
 484     for o in $(seq 1 $objects)
 485     do
 486       rados -p "${poolprefix}1" put obj-1-${o} $dir/datafile
 487     done
 488
 489     ceph pg dump pgs
 490     # The $osd OSD is started, but we don't wait so we can kill $fillosd at the same time
 491     _objectstore_tool_nowait $dir $osd --op export --pgid 2.0 --file $dir/export.out
 492     kill $(cat $dir/osd.$fillosd.pid)
 493     sleep 5
 494     _objectstore_tool_nodown $dir $fillosd --force --op remove --pgid 2.0
 495     _objectstore_tool_nodown $dir $fillosd --op import --pgid 2.0 --file $dir/export.out || return 1
 496     _objectstore_tool_nodown $dir $fillosd --op import --pgid 1.0 --file $dir/fillexport.out || return 1
 497     ceph pg dump pgs
 498     sleep 20
 499     ceph pg dump pgs
 500
 501     # re-write everything
 502     dd if=/dev/urandom of=$dir/datafile bs=2611 count=1
 503     for o in $(seq 1 $objects)
 504     do
 505       for p in $(seq 1 $pools)
 506       do
 507         rados -p "${poolprefix}$p" put obj-${p}-${o} $dir/datafile
 508       done
 509     done
 510
 511     kill $(cat $dir/osd.$osd.pid)
 512     ceph osd out osd.$osd
 513
 514     activate_osd $dir $fillosd || return 1
 515     ceph osd in osd.$fillosd
 516     sleep 15
 517
 518     wait_for_not_backfilling 240 || return 1
 519     wait_for_not_activating 60 || return 1
 520
 521     flush_pg_stats || return 1
 522     ceph pg dump pgs
 523
 524     ERRORS=0
 525     if [ "$(get_num_in_state backfill_toofull)" != "1" ];
 526     then
 527       echo "One PG should be in backfill_toofull"
 528       ERRORS="$(expr $ERRORS + 1)"
 529     fi
 530
 531     if [ "$(get_num_in_state active+clean)" != "2" ];
 532     then
 533       echo "Two PGs should be active+clean after one PG completed backfill"
 534       ERRORS="$(expr $ERRORS + 1)"
 535     fi
 536
 537     if [ $ERRORS != "0" ];
 538     then
 539       return 1
 540     fi
 541
 542     delete_pool fillpool
 543     for i in $(seq 1 $pools)
 544     do
 545       delete_pool "${poolprefix}$i"
 546     done
 547     kill_daemons $dir || return 1
 548     ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1
 549 }
 550
 551 # Make sure that the amount of bytes already on the replica doesn't
 552 # cause an out of space condition
 553 #
 554 # Create 1 pool and write 4K * 600 objects
 555 # Remove 25% (150) of the objects with one OSD down (noout set)
 556 # Increase the size of the remaining 75% (450) of the objects to 6K
 557 # Bring back down OSD
 558 # The pool should go active+clean
 559 function TEST_backfill_grow() {
 560     local dir=$1
 561     local poolname="test"
 562     local OSDS=3
 563
 564     run_mon $dir a || return 1
 565     run_mgr $dir x || return 1
 566
 567     for osd in $(seq 0 $(expr $OSDS - 1))
 568     do
 569       run_osd $dir $osd || return 1
 570     done
 571
 572     ceph osd set-backfillfull-ratio .85
 573
 574     create_pool $poolname 1 1
 575     ceph osd pool set $poolname size 3
 576     sleep 5
 577
 578     wait_for_clean || return 1
 579
 580     dd if=/dev/urandom of=${dir}/4kdata bs=1k count=4
 581     for i in $(seq 1 $objects)
 582     do
 583         rados -p $poolname put obj$i $dir/4kdata
 584     done
 585
 586     local PG=$(get_pg $poolname obj1)
 587     # Remember primary during the backfill
 588     local primary=$(get_primary $poolname obj1)
 589     local otherosd=$(get_not_primary $poolname obj1)
 590
 591     ceph osd set noout
 592     kill_daemons $dir TERM $otherosd || return 1
 593
 594     rmobjects=$(expr $objects / 4)
 595     for i in $(seq 1 $rmobjects)
 596     do
 597         rados -p $poolname rm obj$i
 598     done
 599
 600     dd if=/dev/urandom of=${dir}/6kdata bs=6k count=1
 601     for i in $(seq $(expr $rmobjects + 1) $objects)
 602     do
 603         rados -p $poolname put obj$i $dir/6kdata
 604     done
 605
 606     activate_osd $dir $otherosd || return 1
 607
 608     ceph tell osd.$primary debug kick_recovery_wq 0
 609
 610     sleep 2
 611
 612     wait_for_clean || return 1
 613
 614     delete_pool $poolname
 615     kill_daemons $dir || return 1
 616     ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1
 617 }
 618
 619 # Create a 5 shard EC pool on 6 OSD cluster
 620 # Fill 1 OSD with 2600K of data take that osd down.
 621 # Write the EC pool on 5 OSDs
 622 # Take down 1 (must contain an EC shard)
 623 # Bring up OSD with fill data
 624 # Not enought room to backfill to partially full OSD
 625 function TEST_ec_backfill_simple() {
 626     local dir=$1
 627     local EC=$2
 628     local pools=1
 629     local OSDS=6
 630     local k=3
 631     local m=2
 632     local ecobjects=$(expr $objects / $k)
 633
 634     run_mon $dir a || return 1
 635     run_mgr $dir x || return 1
 636     export CEPH_ARGS
 637
 638     for osd in $(seq 0 $(expr $OSDS - 1))
 639     do
 640       run_osd $dir $osd || return 1
 641     done
 642
 643     ceph osd set-backfillfull-ratio .85
 644     create_pool fillpool 1 1
 645     ceph osd pool set fillpool size 1
 646
 647     # Partially fill an osd
 648     # We have room for 200 18K replicated objects, if we create 13K objects
 649     # there is only 3600K - (13K * 200) = 1000K which won't hold
 650     # a k=3 shard below ((18K / 3) + 4K) * 200 = 2000K
 651     # Actual usage per shard is 8K * 200 = 1600K because 18K/3 is 6K which
 652     # rounds to 8K.  The 2000K is the ceiling on the 18K * 200 = 3600K logical
 653     # bytes in the pool.
 654     dd if=/dev/urandom of=$dir/datafile bs=1024 count=13
 655     for o in $(seq 1 $ecobjects)
 656     do
 657       rados -p fillpool put obj$o $dir/datafile
 658     done
 659
 660     local fillosd=$(get_primary fillpool obj1)
 661     osd=$(expr $fillosd + 1)
 662     if [ "$osd" = "$OSDS" ]; then
 663       osd="0"
 664     fi
 665
 666     sleep 5
 667     kill $(cat $dir/osd.$fillosd.pid)
 668     ceph osd out osd.$fillosd
 669     sleep 2
 670     ceph osd erasure-code-profile set ec-profile k=$k m=$m crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1
 671
 672     for p in $(seq 1 $pools)
 673     do
 674         ceph osd pool create "${poolprefix}$p" 1 1 erasure ec-profile
 675     done
 676
 677     # Can't wait for clean here because we created a stale pg
 678     #wait_for_clean || return 1
 679     sleep 5
 680
 681     ceph pg dump pgs
 682
 683     dd if=/dev/urandom of=$dir/datafile bs=1024 count=18
 684     for o in $(seq 1 $ecobjects)
 685     do
 686       for p in $(seq 1 $pools)
 687       do
 688         rados -p "${poolprefix}$p" put obj$o $dir/datafile
 689       done
 690     done
 691
 692     kill $(cat $dir/osd.$osd.pid)
 693     ceph osd out osd.$osd
 694
 695     activate_osd $dir $fillosd || return 1
 696     ceph osd in osd.$fillosd
 697     sleep 30
 698
 699     ceph pg dump pgs
 700
 701     wait_for_not_backfilling 240 || return 1
 702     wait_for_not_activating 60 || return 1
 703
 704     ceph pg dump pgs
 705
 706     ERRORS=0
 707     if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep +backfill_toofull | wc -l)" != "1" ]; then
 708       echo "One pool should have been in backfill_toofull"
 709       ERRORS="$(expr $ERRORS + 1)"
 710     fi
 711
 712     if [ $ERRORS != "0" ];
 713     then
 714       return 1
 715     fi
 716
 717     delete_pool fillpool
 718     for i in $(seq 1 $pools)
 719     do
 720       delete_pool "${poolprefix}$i"
 721     done
 722     kill_daemons $dir || return 1
 723 }
 724
 725 function osdlist() {
 726     local OSDS=$1
 727     local excludeosd=$2
 728
 729     osds=""
 730     for osd in $(seq 0 $(expr $OSDS - 1))
 731     do
 732       if [ $osd = $excludeosd ];
 733       then
 734         continue
 735       fi
 736       if [ -n "$osds" ]; then
 737         osds="${osds} "
 738       fi
 739       osds="${osds}${osd}"
 740     done
 741     echo $osds
 742 }
 743
 744 # Create a pool with size 1 and fill with data so that only 1 EC shard can fit.
 745 # Write data to 2 EC pools mapped to the same OSDs (excluding filled one)
 746 # Remap the last OSD to partially full OSD on both pools
 747 # The 2 pools should race to backfill.
 748 # One pool goes active+clean
 749 # The other goes acitve+...+backfill_toofull
 750 function TEST_ec_backfill_multi() {
 751     local dir=$1
 752     local EC=$2
 753     local pools=2
 754     local OSDS=6
 755     local k=3
 756     local m=2
 757     local ecobjects=$(expr $objects / $k)
 758
 759     run_mon $dir a || return 1
 760     run_mgr $dir x || return 1
 761     export CEPH_ARGS
 762
 763     for osd in $(seq 0 $(expr $OSDS - 1))
 764     do
 765       run_osd $dir $osd || return 1
 766     done
 767
 768     # This test requires that shards from 2 different pools
 769     # fit on a given OSD, but both will not fix.  I'm using
 770     # making the fillosd plus 1 shard use 75% of the space,
 771     # leaving not enough to be under the 85% set here.
 772     ceph osd set-backfillfull-ratio .85
 773
 774     ceph osd set-require-min-compat-client luminous
 775     create_pool fillpool 1 1
 776     ceph osd pool set fillpool size 1
 777
 778     # Partially fill an osd
 779     # We have room for 200 18K replicated objects, if we create 9K objects
 780     # there is only 3600K - (9K * 200) = 1800K which will only hold
 781     # one k=3 shard below ((12K / 3) + 4K) * 200 = 1600K
 782     # The actual data will be (12K / 3) * 200 = 800K because the extra
 783     # is the reservation padding for chunking.
 784     dd if=/dev/urandom of=$dir/datafile bs=1024 count=9
 785     for o in $(seq 1 $ecobjects)
 786     do
 787       rados -p fillpool put obj$o $dir/datafile
 788     done
 789
 790     local fillosd=$(get_primary fillpool obj1)
 791     ceph osd erasure-code-profile set ec-profile k=3 m=2 crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1
 792
 793     nonfillosds="$(osdlist $OSDS $fillosd)"
 794
 795     for p in $(seq 1 $pools)
 796     do
 797         ceph osd pool create "${poolprefix}$p" 1 1 erasure ec-profile
 798         ceph osd pg-upmap "$(expr $p + 1).0" $nonfillosds
 799     done
 800
 801     # Can't wait for clean here because we created a stale pg
 802     #wait_for_clean || return 1
 803     sleep 15
 804
 805     ceph pg dump pgs
 806
 807     dd if=/dev/urandom of=$dir/datafile bs=1024 count=12
 808     for o in $(seq 1 $ecobjects)
 809     do
 810       for p in $(seq 1 $pools)
 811       do
 812         rados -p "${poolprefix}$p" put obj$o-$p $dir/datafile
 813       done
 814     done
 815
 816     ceph pg dump pgs
 817
 818     for p in $(seq 1 $pools)
 819     do
 820       ceph osd pg-upmap $(expr $p + 1).0 ${nonfillosds% *} $fillosd
 821     done
 822
 823     sleep 10
 824
 825     wait_for_not_backfilling 240 || return 1
 826     wait_for_not_activating 60 || return 1
 827
 828     ceph pg dump pgs
 829
 830     ERRORS=0
 831     if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep +backfill_toofull | wc -l)" != "1" ];
 832     then
 833       echo "One pool should have been in backfill_toofull"
 834       ERRORS="$(expr $ERRORS + 1)"
 835     fi
 836
 837     if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep active+clean | wc -l)" != "1" ];
 838     then
 839       echo "One didn't finish backfill"
 840       ERRORS="$(expr $ERRORS + 1)"
 841     fi
 842
 843     if [ $ERRORS != "0" ];
 844     then
 845       return 1
 846     fi
 847
 848     delete_pool fillpool
 849     for i in $(seq 1 $pools)
 850     do
 851       delete_pool "${poolprefix}$i"
 852     done
 853     kill_daemons $dir || return 1
 854 }
 855
 856 # Similar to TEST_ec_backfill_multi but one of the ec pools
 857 # already had some data on the target OSD
 858
 859 # Create a pool with size 1 and fill with data so that only 1 EC shard can fit.
 860 # Write a small amount of data to 1 EC pool that still includes the filled one
 861 # Take down fillosd with noout set
 862 # Write data to 2 EC pools mapped to the same OSDs (excluding filled one)
 863 # Remap the last OSD to partially full OSD on both pools
 864 # The 2 pools should race to backfill.
 865 # One pool goes active+clean
 866 # The other goes acitve+...+backfill_toofull
 867 function SKIP_TEST_ec_backfill_multi_partial() {
 868     local dir=$1
 869     local EC=$2
 870     local pools=2
 871     local OSDS=5
 872     local k=3
 873     local m=2
 874     local ecobjects=$(expr $objects / $k)
 875     local lastosd=$(expr $OSDS - 1)
 876
 877     run_mon $dir a || return 1
 878     run_mgr $dir x || return 1
 879     export CEPH_ARGS
 880
 881     for osd in $(seq 0 $(expr $OSDS - 1))
 882     do
 883       run_osd $dir $osd || return 1
 884     done
 885
 886     # This test requires that shards from 2 different pools
 887     # fit on a given OSD, but both will not fix.  I'm using
 888     # making the fillosd plus 1 shard use 75% of the space,
 889     # leaving not enough to be under the 85% set here.
 890     ceph osd set-backfillfull-ratio .85
 891
 892     ceph osd set-require-min-compat-client luminous
 893     create_pool fillpool 1 1
 894     ceph osd pool set fillpool size 1
 895     # last osd
 896     ceph osd pg-upmap 1.0 $lastosd
 897
 898     # Partially fill an osd
 899     # We have room for 200 18K replicated objects, if we create 9K objects
 900     # there is only 3600K - (9K * 200) = 1800K which will only hold
 901     # one k=3 shard below ((12K / 3) + 4K) * 200 = 1600K
 902     # The actual data will be (12K / 3) * 200 = 800K because the extra
 903     # is the reservation padding for chunking.
 904     dd if=/dev/urandom of=$dir/datafile bs=1024 count=9
 905     for o in $(seq 1 $ecobjects)
 906     do
 907       rados -p fillpool put obj$o $dir/datafile
 908     done
 909
 910     local fillosd=$(get_primary fillpool obj1)
 911     ceph osd erasure-code-profile set ec-profile k=3 m=2 crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1
 912
 913     nonfillosds="$(osdlist $OSDS $fillosd)"
 914
 915     for p in $(seq 1 $pools)
 916     do
 917         ceph osd pool create "${poolprefix}$p" 1 1 erasure ec-profile
 918         ceph osd pg-upmap "$(expr $p + 1).0" $(seq 0 $lastosd)
 919     done
 920
 921     # Can't wait for clean here because we created a stale pg
 922     #wait_for_clean || return 1
 923     sleep 15
 924
 925     ceph pg dump pgs
 926
 927     dd if=/dev/urandom of=$dir/datafile bs=1024 count=1
 928     for o in $(seq 1 $ecobjects)
 929     do
 930       rados -p "${poolprefix}1" put obj$o-1 $dir/datafile
 931     done
 932
 933     for p in $(seq 1 $pools)
 934     do
 935         ceph osd pg-upmap "$(expr $p + 1).0" $(seq 0 $(expr $lastosd - 1))
 936     done
 937     ceph pg dump pgs
 938
 939     #ceph osd set noout
 940     #kill_daemons $dir TERM osd.$lastosd || return 1
 941
 942     dd if=/dev/urandom of=$dir/datafile bs=1024 count=12
 943     for o in $(seq 1 $ecobjects)
 944     do
 945       for p in $(seq 1 $pools)
 946       do
 947         rados -p "${poolprefix}$p" put obj$o-$p $dir/datafile
 948       done
 949     done
 950
 951     ceph pg dump pgs
 952
 953     # Now backfill lastosd by adding back into the upmap
 954     for p in $(seq 1 $pools)
 955     do
 956         ceph osd pg-upmap "$(expr $p + 1).0" $(seq 0 $lastosd)
 957     done
 958     #activate_osd $dir $lastosd || return 1
 959     #ceph tell osd.0 debug kick_recovery_wq 0
 960
 961     sleep 10
 962     ceph pg dump pgs
 963
 964     wait_for_not_backfilling 240 || return 1
 965     wait_for_not_activating 60 || return 1
 966
 967     ceph pg dump pgs
 968
 969     ERRORS=0
 970     if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep +backfill_toofull | wc -l)" != "1" ];
 971     then
 972       echo "One pool should have been in backfill_toofull"
 973       ERRORS="$(expr $ERRORS + 1)"
 974     fi
 975
 976     if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep active+clean | wc -l)" != "1" ];
 977     then
 978       echo "One didn't finish backfill"
 979       ERRORS="$(expr $ERRORS + 1)"
 980     fi
 981
 982     if [ $ERRORS != "0" ];
 983     then
 984       return 1
 985     fi
 986
 987     delete_pool fillpool
 988     for i in $(seq 1 $pools)
 989     do
 990       delete_pool "${poolprefix}$i"
 991     done
 992     kill_daemons $dir || return 1
 993 }
 994
 995 function SKIP_TEST_ec_backfill_multi_partial() {
 996     local dir=$1
 997     local EC=$2
 998     local pools=2
 999     local OSDS=6
1000
1001     run_mon $dir a || return 1
1002     run_mgr $dir x || return 1
1003     export CEPH_ARGS
1004
1005     for osd in $(seq 0 $(expr $OSDS - 1))
1006     do
1007       run_osd $dir $osd || return 1
1008     done
1009
1010     # Below we need to fit 3200K in 3600K which is 88%
1011     # so set to 90%
1012     ceph osd set-backfillfull-ratio .90
1013
1014     ceph osd set-require-min-compat-client luminous
1015     create_pool fillpool 1 1
1016     ceph osd pool set fillpool size 1
1017
1018     # Partially fill an osd
1019     # We have room for 200 48K ec objects, if we create 4k replicated objects
1020     # there is 3600K - (4K * 200) = 2800K which won't hold 2 k=3 shard
1021     # of 200 12K objects which takes ((12K / 3) + 4K) * 200 = 1600K each.
1022     # On the other OSDs 2 * 1600K = 3200K which is 88% of 3600K.
1023     dd if=/dev/urandom of=$dir/datafile bs=1024 count=4
1024     for o in $(seq 1 $objects)
1025     do
1026       rados -p fillpool put obj$o $dir/datafile
1027     done
1028
1029     local fillosd=$(get_primary fillpool obj1)
1030     osd=$(expr $fillosd + 1)
1031     if [ "$osd" = "$OSDS" ]; then
1032       osd="0"
1033     fi
1034
1035     sleep 5
1036     kill $(cat $dir/osd.$fillosd.pid)
1037     ceph osd out osd.$fillosd
1038     sleep 2
1039     ceph osd erasure-code-profile set ec-profile k=3 m=2 crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1
1040
1041     for p in $(seq 1 $pools)
1042     do
1043         ceph osd pool create "${poolprefix}$p" 1 1 erasure ec-profile
1044     done
1045
1046     # Can't wait for clean here because we created a stale pg
1047     #wait_for_clean || return 1
1048     sleep 5
1049
1050     ceph pg dump pgs
1051
1052     dd if=/dev/urandom of=$dir/datafile bs=1024 count=12
1053     for o in $(seq 1 $objects)
1054     do
1055       for p in $(seq 1 $pools)
1056       do
1057         rados -p "${poolprefix}$p" put obj$o $dir/datafile
1058       done
1059     done
1060
1061     #ceph pg map 2.0 --format=json | jq '.'
1062     kill $(cat $dir/osd.$osd.pid)
1063     ceph osd out osd.$osd
1064
1065     _objectstore_tool_nodown $dir $osd --op export --pgid 2.0 --file $dir/export.out
1066     _objectstore_tool_nodown $dir $fillosd --op import --pgid 2.0 --file $dir/export.out
1067
1068     activate_osd $dir $fillosd || return 1
1069     ceph osd in osd.$fillosd
1070     sleep 15
1071
1072     wait_for_not_backfilling 240 || return 1
1073     wait_for_not_activating 60 || return 1
1074
1075     ERRORS=0
1076     if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep +backfill_toofull | wc -l)" != "1" ];
1077     then
1078       echo "One pool should have been in backfill_toofull"
1079       ERRORS="$(expr $ERRORS + 1)"
1080     fi
1081
1082     if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep active+clean | wc -l)" != "1" ];
1083     then
1084       echo "One didn't finish backfill"
1085       ERRORS="$(expr $ERRORS + 1)"
1086     fi
1087
1088     ceph pg dump pgs
1089
1090     if [ $ERRORS != "0" ];
1091     then
1092       return 1
1093     fi
1094
1095     delete_pool fillpool
1096     for i in $(seq 1 $pools)
1097     do
1098       delete_pool "${poolprefix}$i"
1099     done
1100     kill_daemons $dir || return 1
1101 }
1102
1103 # Create 1 EC pool
1104 # Write 200 12K objects ((12K / 3) + 4K) *200) = 1600K
1105 # Take 1 shard's OSD down (with noout set)
1106 # Remove 50 objects ((12K / 3) + 4k) * 50) = 400K
1107 # Write 150 36K objects (grow 150 objects) 2400K
1108 #       But there is already 1600K usage so backfill
1109 #       would be too full if it didn't account for existing data
1110 # Bring back down OSD so it must backfill
1111 # It should go active+clean taking into account data already there
1112 function TEST_ec_backfill_grow() {
1113     local dir=$1
1114     local poolname="test"
1115     local OSDS=6
1116     local k=3
1117     local m=2
1118     local ecobjects=$(expr $objects / $k)
1119
1120     run_mon $dir a || return 1
1121     run_mgr $dir x || return 1
1122
1123     for osd in $(seq 0 $(expr $OSDS - 1))
1124     do
1125       run_osd $dir $osd || return 1
1126     done
1127
1128     ceph osd set-backfillfull-ratio .85
1129
1130     ceph osd set-require-min-compat-client luminous
1131     ceph osd erasure-code-profile set ec-profile k=$k m=$m crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1
1132     ceph osd pool create $poolname 1 1 erasure ec-profile
1133
1134     wait_for_clean || return 1
1135
1136     dd if=/dev/urandom of=${dir}/12kdata bs=1k count=12
1137     for i in $(seq 1 $ecobjects)
1138     do
1139         rados -p $poolname put obj$i $dir/12kdata
1140     done
1141
1142     local PG=$(get_pg $poolname obj1)
1143     # Remember primary during the backfill
1144     local primary=$(get_primary $poolname obj1)
1145     local otherosd=$(get_not_primary $poolname obj1)
1146
1147     ceph osd set noout
1148     kill_daemons $dir TERM $otherosd || return 1
1149
1150     rmobjects=$(expr $ecobjects / 4)
1151     for i in $(seq 1 $rmobjects)
1152     do
1153         rados -p $poolname rm obj$i
1154     done
1155
1156     dd if=/dev/urandom of=${dir}/36kdata bs=1k count=36
1157     for i in $(seq $(expr $rmobjects + 1) $ecobjects)
1158     do
1159         rados -p $poolname put obj$i $dir/36kdata
1160     done
1161
1162     activate_osd $dir $otherosd || return 1
1163
1164     ceph tell osd.$primary debug kick_recovery_wq 0
1165
1166     sleep 2
1167
1168     wait_for_clean || return 1
1169
1170     delete_pool $poolname
1171     kill_daemons $dir || return 1
1172 }
1173
1174 main osd-backfill-space "$@"
1175
1176 # Local Variables:
1177 # compile-command: "make -j4 && ../qa/run-standalone.sh osd-backfill-space.sh"
1178 # End: