3 # Copyright (C) 2019 Red Hat <contact@redhat.com>
5 # Author: David Zafman <dzafman@redhat.com>
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU Library Public License as published by
9 # the Free Software Foundation; either version 2, or (at your option)
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU Library Public License for more details.
18 source $CEPH_ROOT/qa
/standalone
/ceph-helpers.sh
25 export CEPH_MON
="127.0.0.1:7114" # git grep '\<7114\>' : there must be only one
27 CEPH_ARGS
+="--fsid=$(uuidgen) --auth-supported=none "
28 CEPH_ARGS
+="--mon-host=$CEPH_MON --osd_max_backfills=1 --debug_reserver=20 "
29 CEPH_ARGS
+="--osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10 "
31 export poolprefix
=test
32 export FORCE_PRIO
="254" # See OSD_BACKFILL_PRIORITY_FORCED
33 export DEGRADED_PRIO
="150" # See OSD_BACKFILL_DEGRADED_PRIORITY_BASE + 10
34 export NORMAL_PRIO
="110" # See OSD_BACKFILL_PRIORITY_BASE + 10
36 local funcs
=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
37 for func
in $funcs ; do
38 setup
$dir ||
return 1
39 $func $dir ||
return 1
40 teardown
$dir ||
return 1
45 function TEST_backfill_priority
() {
49 # size 2 -> 1 means degraded by 1, so add 1 to base prio
50 local degraded_prio
=$
(expr $DEGRADED_PRIO + 1)
53 run_mon
$dir a ||
return 1
54 run_mgr
$dir x ||
return 1
57 for osd
in $
(seq 0 $
(expr $OSDS - 1))
59 run_osd
$dir $osd ||
return 1
62 for p
in $
(seq 1 $pools)
64 create_pool
"${poolprefix}$p" 1 1
65 ceph osd pool
set "${poolprefix}$p" size
2
69 wait_for_clean ||
return 1
73 # Find 3 pools with a pg with the same primaries but second
74 # replica on another osd.
90 for p
in $
(seq 1 $pools)
92 ceph pg map
${p}.0 --format=json | jq
'.acting[]' > $dir/acting
93 local test_osd1
=$
(head -1 $dir/acting
)
94 local test_osd2
=$
(tail -1 $dir/acting
)
99 pool1
="${poolprefix}$p"
100 chk_osd1_1
=$test_osd1
101 chk_osd1_2
=$test_osd2
102 elif [ -z "$PG2" -a $chk_osd1_1 = $test_osd1 -a $chk_osd1_2 != $test_osd2 ];
106 pool2
="${poolprefix}$p"
108 elif [ -n "$PG2" -a $chk_osd1_1 = $test_osd1 -a $chk_osd1_2 != $test_osd2 -a "$chk_osd2" != $test_osd2 ];
112 pool3
="${poolprefix}$p"
118 if [ "$pool2" = "" -o "pool3" = "" ];
120 echo "Failure to find appropirate PGs"
124 for p
in $
(seq 1 $pools)
126 if [ $p != $POOLNUM1 -a $p != $POOLNUM2 -a $p != $POOLNUM3 ];
128 delete_pool
${poolprefix}$p
132 ceph osd pool
set $pool2 size
1 --yes-i-really-mean-it
133 ceph osd pool
set $pool3 size
1 --yes-i-really-mean-it
134 wait_for_clean ||
return 1
136 dd if=/dev
/urandom of
=$dir/data bs
=1M count
=10
138 for pname
in $pool1 $pool2 $pool3
140 for i
in $
(seq 1 $objects)
142 rados
-p ${pname} put obj${i}-p${p} $dir/data
147 local otherosd
=$
(get_not_primary
$pool1 obj1-p1
)
152 ceph osd
set nobackfill
155 # Get a pg to want to backfill and quickly force it
157 ceph osd pool
set $pool3 size
2
160 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${chk_osd1_1}) dump_recovery_reservations ||
return 1
162 # 3. Item is in progress, adjust priority with no higher priority waiting
163 for i
in $
(seq 1 $max_tries)
165 if ! ceph pg force-backfill
$PG3 2>&1 |
grep -q "doesn't require backfilling"; then
168 if [ "$i" = "$max_tries" ]; then
169 echo "ERROR: Didn't appear to be able to force-backfill"
170 ERRORS
=$
(expr $ERRORS + 1)
174 flush_pg_stats ||
return 1
175 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${chk_osd1_1}) dump_recovery_reservations ||
return 1
177 ceph osd out osd.
$chk_osd1_2
179 flush_pg_stats ||
return 1
180 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${chk_osd1_1}) dump_recovery_reservations ||
return 1
183 ceph osd pool
set $pool2 size
2
185 flush_pg_stats ||
return 1
186 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${chk_osd1_1}) dump_recovery_reservations
> $dir/out ||
return 1
190 PRIO
=$
(cat $dir/out | jq
"(.local_reservations.queues[].items[] | select(.item == \"${PG1}\")).prio")
191 if [ "$PRIO" != "$NORMAL_PRIO" ];
193 echo "The normal PG ${PG1} doesn't have prio $NORMAL_PRIO queued waiting"
194 ERRORS
=$
(expr $ERRORS + 1)
197 # Using eval will strip double-quotes from item
198 eval ITEM
=$
(cat $dir/out | jq
'.local_reservations.in_progress[0].item')
199 if [ "$ITEM" != ${PG3} ];
201 echo "The force-backfill PG $PG3 didn't become the in progress item"
202 ERRORS
=$
(expr $ERRORS + 1)
204 PRIO
=$
(cat $dir/out | jq
'.local_reservations.in_progress[0].prio')
205 if [ "$PRIO" != $FORCE_PRIO ];
207 echo "The force-backfill PG ${PG3} doesn't have prio $FORCE_PRIO"
208 ERRORS
=$
(expr $ERRORS + 1)
212 # 1. Item is queued, re-queue with new priority
213 for i
in $
(seq 1 $max_tries)
215 if ! ceph pg force-backfill
$PG2 2>&1 |
grep -q "doesn't require backfilling"; then
218 if [ "$i" = "$max_tries" ]; then
219 echo "ERROR: Didn't appear to be able to force-backfill"
220 ERRORS
=$
(expr $ERRORS + 1)
225 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${chk_osd1_1}) dump_recovery_reservations
> $dir/out ||
return 1
227 PRIO
=$
(cat $dir/out | jq
"(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio")
228 if [ "$PRIO" != "$FORCE_PRIO" ];
230 echo "The second force-backfill PG ${PG2} doesn't have prio $FORCE_PRIO"
231 ERRORS
=$
(expr $ERRORS + 1)
233 flush_pg_stats ||
return 1
235 # 4. Item is in progress, if higher priority items waiting prempt item
236 ceph pg cancel-force-backfill
$PG3 ||
return 1
238 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${chk_osd1_1}) dump_recovery_reservations
> $dir/out ||
return 1
240 PRIO
=$
(cat $dir/out | jq
"(.local_reservations.queues[].items[] | select(.item == \"${PG3}\")).prio")
241 if [ "$PRIO" != "$degraded_prio" ];
243 echo "After cancel-force-backfill PG ${PG3} doesn't have prio $degraded_prio"
244 ERRORS
=$
(expr $ERRORS + 1)
247 eval ITEM
=$
(cat $dir/out | jq
'.local_reservations.in_progress[0].item')
248 if [ "$ITEM" != ${PG2} ];
250 echo "The force-recovery PG $PG2 didn't become the in progress item"
251 ERRORS
=$
(expr $ERRORS + 1)
253 PRIO
=$
(cat $dir/out | jq
'.local_reservations.in_progress[0].prio')
254 if [ "$PRIO" != $FORCE_PRIO ];
256 echo "The first force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO"
257 ERRORS
=$
(expr $ERRORS + 1)
261 ceph pg cancel-force-backfill
$PG2 ||
return 1
263 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${chk_osd1_1}) dump_recovery_reservations ||
return 1
265 # 2. Item is queued, re-queue and preempt because new priority higher than an in progress item
266 flush_pg_stats ||
return 1
267 ceph pg force-backfill
$PG3 ||
return 1
270 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${chk_osd1_1}) dump_recovery_reservations
> $dir/out ||
return 1
272 PRIO
=$
(cat $dir/out | jq
"(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio")
273 if [ "$PRIO" != "$degraded_prio" ];
275 echo "After cancel-force-backfill PG ${PG2} doesn't have prio $degraded_prio"
276 ERRORS
=$
(expr $ERRORS + 1)
279 eval ITEM
=$
(cat $dir/out | jq
'.local_reservations.in_progress[0].item')
280 if [ "$ITEM" != ${PG3} ];
282 echo "The force-backfill PG $PG3 didn't get promoted to an in progress item"
283 ERRORS
=$
(expr $ERRORS + 1)
285 PRIO
=$
(cat $dir/out | jq
'.local_reservations.in_progress[0].prio')
286 if [ "$PRIO" != $FORCE_PRIO ];
288 echo "The force-backfill PG ${PG2} doesn't have prio $FORCE_PRIO"
289 ERRORS
=$
(expr $ERRORS + 1)
294 ceph osd
unset nobackfill
296 wait_for_clean
"CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations" ||
return 1
300 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${chk_osd1_1}) dump_pgstate_history
302 if [ $ERRORS != "0" ];
304 echo "$ERRORS error(s) found"
312 kill_daemons
$dir ||
return 1
317 # Show that pool recovery_priority is added to the backfill priority
319 # Create 2 pools with 2 OSDs with different primarys
320 # pool 1 with recovery_priority 1
321 # pool 2 with recovery_priority 2
323 # Start backfill by changing the pool sizes from 1 to 2
324 # Use dump_recovery_reservations to verify priorities
325 function TEST_backfill_pool_priority
() {
327 local pools
=3 # Don't assume the first 2 pools are exact what we want
330 run_mon
$dir a ||
return 1
331 run_mgr
$dir x ||
return 1
334 for osd
in $
(seq 0 $
(expr $OSDS - 1))
336 run_osd
$dir $osd ||
return 1
339 for p
in $
(seq 1 $pools)
341 create_pool
"${poolprefix}$p" 1 1
342 ceph osd pool
set "${poolprefix}$p" size
2
346 wait_for_clean ||
return 1
350 # Find 2 pools with different primaries which
351 # means the replica must be on another osd.
364 for p
in $
(seq 1 $pools)
366 ceph pg map
${p}.0 --format=json | jq
'.acting[]' > $dir/acting
367 local test_osd1
=$
(head -1 $dir/acting
)
368 local test_osd2
=$
(tail -1 $dir/acting
)
373 pool1
="${poolprefix}$p"
374 chk_osd1_1
=$test_osd1
375 chk_osd1_2
=$test_osd2
376 elif [ $chk_osd1_1 != $test_osd1 ];
380 pool2
="${poolprefix}$p"
381 chk_osd2_1
=$test_osd1
382 chk_osd2_2
=$test_osd2
388 if [ "$pool2" = "" ];
390 echo "Failure to find appropirate PGs"
394 for p
in $
(seq 1 $pools)
396 if [ $p != $POOLNUM1 -a $p != $POOLNUM2 ];
398 delete_pool
${poolprefix}$p
404 # size 2 -> 1 means degraded by 1, so add 1 to base prio
405 pool1_prio
=$
(expr $DEGRADED_PRIO + 1 + $pool1_extra_prio)
406 pool2_prio
=$
(expr $DEGRADED_PRIO + 1 + $pool2_extra_prio)
408 ceph osd pool
set $pool1 size
1 --yes-i-really-mean-it
409 ceph osd pool
set $pool1 recovery_priority
$pool1_extra_prio
410 ceph osd pool
set $pool2 size
1 --yes-i-really-mean-it
411 ceph osd pool
set $pool2 recovery_priority
$pool2_extra_prio
412 wait_for_clean ||
return 1
414 dd if=/dev
/urandom of
=$dir/data bs
=1M count
=10
416 for pname
in $pool1 $pool2
418 for i
in $
(seq 1 $objects)
420 rados
-p ${pname} put obj${i}-p${p} $dir/data
425 local otherosd
=$
(get_not_primary
$pool1 obj1-p1
)
430 ceph osd pool
set $pool1 size
2
431 ceph osd pool
set $pool2 size
2
433 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${chk_osd1_1}) dump_recovery_reservations
> $dir/dump.
${chk_osd1_1}.out
434 echo osd.
${chk_osd1_1}
435 cat $dir/dump.
${chk_osd1_1}.out
436 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${chk_osd1_2}) dump_recovery_reservations
> $dir/dump.
${chk_osd1_2}.out
437 echo osd.
${chk_osd1_2}
438 cat $dir/dump.
${chk_osd1_2}.out
440 # Using eval will strip double-quotes from item
441 eval ITEM
=$
(cat $dir/dump.
${chk_osd1_1}.out | jq
'.local_reservations.in_progress[0].item')
442 if [ "$ITEM" != ${PG1} ];
444 echo "The primary PG ${PG1} didn't become the in progress item"
445 ERRORS
=$
(expr $ERRORS + 1)
447 PRIO
=$
(cat $dir/dump.
${chk_osd1_1}.out | jq
'.local_reservations.in_progress[0].prio')
448 if [ "$PRIO" != $pool1_prio ];
450 echo "The primary PG ${PG1} doesn't have prio $pool1_prio"
451 ERRORS
=$
(expr $ERRORS + 1)
455 # Using eval will strip double-quotes from item
456 eval ITEM
=$
(cat $dir/dump.
${chk_osd1_2}.out | jq
'.remote_reservations.in_progress[0].item')
457 if [ "$ITEM" != ${PG1} ];
459 echo "The primary PG ${PG1} didn't become the in progress item on remote"
460 ERRORS
=$
(expr $ERRORS + 1)
462 PRIO
=$
(cat $dir/dump.
${chk_osd1_2}.out | jq
'.remote_reservations.in_progress[0].prio')
463 if [ "$PRIO" != $pool1_prio ];
465 echo "The primary PG ${PG1} doesn't have prio $pool1_prio on remote"
466 ERRORS
=$
(expr $ERRORS + 1)
470 # Using eval will strip double-quotes from item
471 eval ITEM
=$
(cat $dir/dump.
${chk_osd2_1}.out | jq
'.local_reservations.in_progress[0].item')
472 if [ "$ITEM" != ${PG2} ];
474 echo "The primary PG ${PG2} didn't become the in progress item"
475 ERRORS
=$
(expr $ERRORS + 1)
477 PRIO
=$
(cat $dir/dump.
${chk_osd2_1}.out | jq
'.local_reservations.in_progress[0].prio')
478 if [ "$PRIO" != $pool2_prio ];
480 echo "The primary PG ${PG2} doesn't have prio $pool2_prio"
481 ERRORS
=$
(expr $ERRORS + 1)
485 # Using eval will strip double-quotes from item
486 eval ITEM
=$
(cat $dir/dump.
${chk_osd2_2}.out | jq
'.remote_reservations.in_progress[0].item')
487 if [ "$ITEM" != ${PG2} ];
489 echo "The primary PG $PG2 didn't become the in progress item on remote"
490 ERRORS
=$
(expr $ERRORS + 1)
492 PRIO
=$
(cat $dir/dump.
${chk_osd2_2}.out | jq
'.remote_reservations.in_progress[0].prio')
493 if [ "$PRIO" != $pool2_prio ];
495 echo "The primary PG ${PG2} doesn't have prio $pool2_prio on remote"
496 ERRORS
=$
(expr $ERRORS + 1)
500 wait_for_clean ||
return 1
502 if [ $ERRORS != "0" ];
504 echo "$ERRORS error(s) found"
511 kill_daemons
$dir ||
return 1
515 main osd-backfill-prio
"$@"
518 # compile-command: "make -j4 && ../qa/run-standalone.sh osd-backfill-prio.sh"