3 # Copyright (C) 2019 Red Hat <contact@redhat.com>
5 # Author: David Zafman <dzafman@redhat.com>
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU Library Public License as published by
9 # the Free Software Foundation; either version 2, or (at your option)
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU Library Public License for more details.
18 source $CEPH_ROOT/qa
/standalone
/ceph-helpers.sh
25 export CEPH_MON
="127.0.0.1:7114" # git grep '\<7114\>' : there must be only one
27 CEPH_ARGS
+="--fsid=$(uuidgen) --auth-supported=none "
28 CEPH_ARGS
+="--mon-host=$CEPH_MON --osd_max_backfills=1 --debug_reserver=20 "
29 CEPH_ARGS
+="--osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10 "
30 # Set osd op queue = wpq for the tests. Backfill priority is not
31 # considered by mclock_scheduler leading to unexpected results.
32 CEPH_ARGS
+="--osd-op-queue=wpq "
34 export poolprefix
=test
35 export FORCE_PRIO
="254" # See OSD_BACKFILL_PRIORITY_FORCED
36 export DEGRADED_PRIO
="150" # See OSD_BACKFILL_DEGRADED_PRIORITY_BASE + 10
37 export NORMAL_PRIO
="110" # See OSD_BACKFILL_PRIORITY_BASE + 10
39 local funcs
=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
40 for func
in $funcs ; do
41 setup
$dir ||
return 1
42 $func $dir ||
return 1
43 teardown
$dir ||
return 1
48 function TEST_backfill_priority
() {
52 # size 2 -> 1 means degraded by 1, so add 1 to base prio
53 local degraded_prio
=$
(expr $DEGRADED_PRIO + 1)
56 run_mon
$dir a ||
return 1
57 run_mgr
$dir x ||
return 1
60 for osd
in $
(seq 0 $
(expr $OSDS - 1))
62 run_osd
$dir $osd ||
return 1
65 for p
in $
(seq 1 $pools)
67 create_pool
"${poolprefix}$p" 1 1
68 ceph osd pool
set "${poolprefix}$p" size
2
72 wait_for_clean ||
return 1
76 # Find 3 pools with a pg with the same primaries but second
77 # replica on another osd.
93 for p
in $
(seq 1 $pools)
95 ceph pg map
${p}.0 --format=json | jq
'.acting[]' > $dir/acting
96 local test_osd1
=$
(head -1 $dir/acting
)
97 local test_osd2
=$
(tail -1 $dir/acting
)
102 pool1
="${poolprefix}$p"
103 chk_osd1_1
=$test_osd1
104 chk_osd1_2
=$test_osd2
105 elif [ -z "$PG2" -a $chk_osd1_1 = $test_osd1 -a $chk_osd1_2 != $test_osd2 ];
109 pool2
="${poolprefix}$p"
111 elif [ -n "$PG2" -a $chk_osd1_1 = $test_osd1 -a $chk_osd1_2 != $test_osd2 -a "$chk_osd2" != $test_osd2 ];
115 pool3
="${poolprefix}$p"
121 if [ "$pool2" = "" -o "pool3" = "" ];
123 echo "Failure to find appropirate PGs"
127 for p
in $
(seq 1 $pools)
129 if [ $p != $POOLNUM1 -a $p != $POOLNUM2 -a $p != $POOLNUM3 ];
131 delete_pool
${poolprefix}$p
135 ceph osd pool
set $pool2 size
1 --yes-i-really-mean-it
136 ceph osd pool
set $pool3 size
1 --yes-i-really-mean-it
137 wait_for_clean ||
return 1
139 dd if=/dev
/urandom of
=$dir/data bs
=1M count
=10
141 for pname
in $pool1 $pool2 $pool3
143 for i
in $
(seq 1 $objects)
145 rados
-p ${pname} put obj${i}-p${p} $dir/data
150 local otherosd
=$
(get_not_primary
$pool1 obj1-p1
)
155 ceph osd
set nobackfill
158 # Get a pg to want to backfill and quickly force it
160 ceph osd pool
set $pool3 size
2
163 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${chk_osd1_1}) dump_recovery_reservations ||
return 1
165 # 3. Item is in progress, adjust priority with no higher priority waiting
166 for i
in $
(seq 1 $max_tries)
168 if ! ceph pg force-backfill
$PG3 2>&1 |
grep -q "doesn't require backfilling"; then
171 if [ "$i" = "$max_tries" ]; then
172 echo "ERROR: Didn't appear to be able to force-backfill"
173 ERRORS
=$
(expr $ERRORS + 1)
177 flush_pg_stats ||
return 1
178 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${chk_osd1_1}) dump_recovery_reservations ||
return 1
180 ceph osd out osd.
$chk_osd1_2
182 flush_pg_stats ||
return 1
183 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${chk_osd1_1}) dump_recovery_reservations ||
return 1
186 ceph osd pool
set $pool2 size
2
188 flush_pg_stats ||
return 1
189 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${chk_osd1_1}) dump_recovery_reservations
> $dir/out ||
return 1
193 PRIO
=$
(cat $dir/out | jq
"(.local_reservations.queues[].items[] | select(.item == \"${PG1}\")).prio")
194 if [ "$PRIO" != "$NORMAL_PRIO" ];
196 echo "The normal PG ${PG1} doesn't have prio $NORMAL_PRIO queued waiting"
197 ERRORS
=$
(expr $ERRORS + 1)
200 # Using eval will strip double-quotes from item
201 eval ITEM
=$
(cat $dir/out | jq
'.local_reservations.in_progress[0].item')
202 if [ "$ITEM" != ${PG3} ];
204 echo "The force-backfill PG $PG3 didn't become the in progress item"
205 ERRORS
=$
(expr $ERRORS + 1)
207 PRIO
=$
(cat $dir/out | jq
'.local_reservations.in_progress[0].prio')
208 if [ "$PRIO" != $FORCE_PRIO ];
210 echo "The force-backfill PG ${PG3} doesn't have prio $FORCE_PRIO"
211 ERRORS
=$
(expr $ERRORS + 1)
215 # 1. Item is queued, re-queue with new priority
216 for i
in $
(seq 1 $max_tries)
218 if ! ceph pg force-backfill
$PG2 2>&1 |
grep -q "doesn't require backfilling"; then
221 if [ "$i" = "$max_tries" ]; then
222 echo "ERROR: Didn't appear to be able to force-backfill"
223 ERRORS
=$
(expr $ERRORS + 1)
228 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${chk_osd1_1}) dump_recovery_reservations
> $dir/out ||
return 1
230 PRIO
=$
(cat $dir/out | jq
"(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio")
231 if [ "$PRIO" != "$FORCE_PRIO" ];
233 echo "The second force-backfill PG ${PG2} doesn't have prio $FORCE_PRIO"
234 ERRORS
=$
(expr $ERRORS + 1)
236 flush_pg_stats ||
return 1
238 # 4. Item is in progress, if higher priority items waiting prempt item
239 ceph pg cancel-force-backfill
$PG3 ||
return 1
241 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${chk_osd1_1}) dump_recovery_reservations
> $dir/out ||
return 1
243 PRIO
=$
(cat $dir/out | jq
"(.local_reservations.queues[].items[] | select(.item == \"${PG3}\")).prio")
244 if [ "$PRIO" != "$degraded_prio" ];
246 echo "After cancel-force-backfill PG ${PG3} doesn't have prio $degraded_prio"
247 ERRORS
=$
(expr $ERRORS + 1)
250 eval ITEM
=$
(cat $dir/out | jq
'.local_reservations.in_progress[0].item')
251 if [ "$ITEM" != ${PG2} ];
253 echo "The force-recovery PG $PG2 didn't become the in progress item"
254 ERRORS
=$
(expr $ERRORS + 1)
256 PRIO
=$
(cat $dir/out | jq
'.local_reservations.in_progress[0].prio')
257 if [ "$PRIO" != $FORCE_PRIO ];
259 echo "The first force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO"
260 ERRORS
=$
(expr $ERRORS + 1)
264 ceph pg cancel-force-backfill
$PG2 ||
return 1
266 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${chk_osd1_1}) dump_recovery_reservations ||
return 1
268 # 2. Item is queued, re-queue and preempt because new priority higher than an in progress item
269 flush_pg_stats ||
return 1
270 ceph pg force-backfill
$PG3 ||
return 1
273 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${chk_osd1_1}) dump_recovery_reservations
> $dir/out ||
return 1
275 PRIO
=$
(cat $dir/out | jq
"(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio")
276 if [ "$PRIO" != "$degraded_prio" ];
278 echo "After cancel-force-backfill PG ${PG2} doesn't have prio $degraded_prio"
279 ERRORS
=$
(expr $ERRORS + 1)
282 eval ITEM
=$
(cat $dir/out | jq
'.local_reservations.in_progress[0].item')
283 if [ "$ITEM" != ${PG3} ];
285 echo "The force-backfill PG $PG3 didn't get promoted to an in progress item"
286 ERRORS
=$
(expr $ERRORS + 1)
288 PRIO
=$
(cat $dir/out | jq
'.local_reservations.in_progress[0].prio')
289 if [ "$PRIO" != $FORCE_PRIO ];
291 echo "The force-backfill PG ${PG2} doesn't have prio $FORCE_PRIO"
292 ERRORS
=$
(expr $ERRORS + 1)
297 ceph osd
unset nobackfill
299 wait_for_clean
"CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations" ||
return 1
303 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${chk_osd1_1}) dump_pgstate_history
305 if [ $ERRORS != "0" ];
307 echo "$ERRORS error(s) found"
315 kill_daemons
$dir ||
return 1
320 # Show that pool recovery_priority is added to the backfill priority
322 # Create 2 pools with 2 OSDs with different primarys
323 # pool 1 with recovery_priority 1
324 # pool 2 with recovery_priority 2
326 # Start backfill by changing the pool sizes from 1 to 2
327 # Use dump_recovery_reservations to verify priorities
328 function TEST_backfill_pool_priority
() {
330 local pools
=3 # Don't assume the first 2 pools are exact what we want
333 run_mon
$dir a ||
return 1
334 run_mgr
$dir x ||
return 1
337 for osd
in $
(seq 0 $
(expr $OSDS - 1))
339 run_osd
$dir $osd ||
return 1
342 for p
in $
(seq 1 $pools)
344 create_pool
"${poolprefix}$p" 1 1
345 ceph osd pool
set "${poolprefix}$p" size
2
349 wait_for_clean ||
return 1
353 # Find 2 pools with different primaries which
354 # means the replica must be on another osd.
367 for p
in $
(seq 1 $pools)
369 ceph pg map
${p}.0 --format=json | jq
'.acting[]' > $dir/acting
370 local test_osd1
=$
(head -1 $dir/acting
)
371 local test_osd2
=$
(tail -1 $dir/acting
)
376 pool1
="${poolprefix}$p"
377 chk_osd1_1
=$test_osd1
378 chk_osd1_2
=$test_osd2
379 elif [ $chk_osd1_1 != $test_osd1 ];
383 pool2
="${poolprefix}$p"
384 chk_osd2_1
=$test_osd1
385 chk_osd2_2
=$test_osd2
391 if [ "$pool2" = "" ];
393 echo "Failure to find appropirate PGs"
397 for p
in $
(seq 1 $pools)
399 if [ $p != $POOLNUM1 -a $p != $POOLNUM2 ];
401 delete_pool
${poolprefix}$p
407 # size 2 -> 1 means degraded by 1, so add 1 to base prio
408 pool1_prio
=$
(expr $DEGRADED_PRIO + 1 + $pool1_extra_prio)
409 pool2_prio
=$
(expr $DEGRADED_PRIO + 1 + $pool2_extra_prio)
411 ceph osd pool
set $pool1 size
1 --yes-i-really-mean-it
412 ceph osd pool
set $pool1 recovery_priority
$pool1_extra_prio
413 ceph osd pool
set $pool2 size
1 --yes-i-really-mean-it
414 ceph osd pool
set $pool2 recovery_priority
$pool2_extra_prio
415 wait_for_clean ||
return 1
417 dd if=/dev
/urandom of
=$dir/data bs
=1M count
=10
419 for pname
in $pool1 $pool2
421 for i
in $
(seq 1 $objects)
423 rados
-p ${pname} put obj${i}-p${p} $dir/data
428 local otherosd
=$
(get_not_primary
$pool1 obj1-p1
)
433 ceph osd pool
set $pool1 size
2
434 ceph osd pool
set $pool2 size
2
436 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${chk_osd1_1}) dump_recovery_reservations
> $dir/dump.
${chk_osd1_1}.out
437 echo osd.
${chk_osd1_1}
438 cat $dir/dump.
${chk_osd1_1}.out
439 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${chk_osd1_2}) dump_recovery_reservations
> $dir/dump.
${chk_osd1_2}.out
440 echo osd.
${chk_osd1_2}
441 cat $dir/dump.
${chk_osd1_2}.out
443 # Using eval will strip double-quotes from item
444 eval ITEM
=$
(cat $dir/dump.
${chk_osd1_1}.out | jq
'.local_reservations.in_progress[0].item')
445 if [ "$ITEM" != ${PG1} ];
447 echo "The primary PG ${PG1} didn't become the in progress item"
448 ERRORS
=$
(expr $ERRORS + 1)
450 PRIO
=$
(cat $dir/dump.
${chk_osd1_1}.out | jq
'.local_reservations.in_progress[0].prio')
451 if [ "$PRIO" != $pool1_prio ];
453 echo "The primary PG ${PG1} doesn't have prio $pool1_prio"
454 ERRORS
=$
(expr $ERRORS + 1)
458 # Using eval will strip double-quotes from item
459 eval ITEM
=$
(cat $dir/dump.
${chk_osd1_2}.out | jq
'.remote_reservations.in_progress[0].item')
460 if [ "$ITEM" != ${PG1} ];
462 echo "The primary PG ${PG1} didn't become the in progress item on remote"
463 ERRORS
=$
(expr $ERRORS + 1)
465 PRIO
=$
(cat $dir/dump.
${chk_osd1_2}.out | jq
'.remote_reservations.in_progress[0].prio')
466 if [ "$PRIO" != $pool1_prio ];
468 echo "The primary PG ${PG1} doesn't have prio $pool1_prio on remote"
469 ERRORS
=$
(expr $ERRORS + 1)
473 # Using eval will strip double-quotes from item
474 eval ITEM
=$
(cat $dir/dump.
${chk_osd2_1}.out | jq
'.local_reservations.in_progress[0].item')
475 if [ "$ITEM" != ${PG2} ];
477 echo "The primary PG ${PG2} didn't become the in progress item"
478 ERRORS
=$
(expr $ERRORS + 1)
480 PRIO
=$
(cat $dir/dump.
${chk_osd2_1}.out | jq
'.local_reservations.in_progress[0].prio')
481 if [ "$PRIO" != $pool2_prio ];
483 echo "The primary PG ${PG2} doesn't have prio $pool2_prio"
484 ERRORS
=$
(expr $ERRORS + 1)
488 # Using eval will strip double-quotes from item
489 eval ITEM
=$
(cat $dir/dump.
${chk_osd2_2}.out | jq
'.remote_reservations.in_progress[0].item')
490 if [ "$ITEM" != ${PG2} ];
492 echo "The primary PG $PG2 didn't become the in progress item on remote"
493 ERRORS
=$
(expr $ERRORS + 1)
495 PRIO
=$
(cat $dir/dump.
${chk_osd2_2}.out | jq
'.remote_reservations.in_progress[0].prio')
496 if [ "$PRIO" != $pool2_prio ];
498 echo "The primary PG ${PG2} doesn't have prio $pool2_prio on remote"
499 ERRORS
=$
(expr $ERRORS + 1)
503 wait_for_clean ||
return 1
505 if [ $ERRORS != "0" ];
507 echo "$ERRORS error(s) found"
514 kill_daemons
$dir ||
return 1
518 main osd-backfill-prio
"$@"
521 # compile-command: "make -j4 && ../qa/run-standalone.sh osd-backfill-prio.sh"