3 # Copyright (C) 2019 Red Hat <contact@redhat.com>
5 # Author: David Zafman <dzafman@redhat.com>
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU Library Public License as published by
9 # the Free Software Foundation; either version 2, or (at your option)
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU Library Public License for more details.
18 source $CEPH_ROOT/qa
/standalone
/ceph-helpers.sh
25 export CEPH_MON
="127.0.0.1:7114" # git grep '\<7114\>' : there must be only one
27 CEPH_ARGS
+="--fsid=$(uuidgen) --auth-supported=none "
28 CEPH_ARGS
+="--mon-host=$CEPH_MON --osd_max_backfills=1 --debug_reserver=20 "
29 # Set osd op queue = wpq for the tests. Recovery priority is not
30 # considered by mclock_scheduler leading to unexpected results.
31 CEPH_ARGS
+="--osd-op-queue=wpq "
33 export poolprefix
=test
34 export FORCE_PRIO
="255" # See OSD_RECOVERY_PRIORITY_FORCED
35 export NORMAL_PRIO
="190" # See OSD_RECOVERY_PRIORITY_BASE + 10
37 local funcs
=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
38 for func
in $funcs ; do
39 setup
$dir ||
return 1
40 $func $dir ||
return 1
41 teardown
$dir ||
return 1
46 function TEST_recovery_priority
() {
52 run_mon
$dir a ||
return 1
53 run_mgr
$dir x ||
return 1
56 for osd
in $
(seq 0 $
(expr $OSDS - 1))
58 run_osd
$dir $osd ||
return 1
61 for p
in $
(seq 1 $pools)
63 create_pool
"${poolprefix}$p" 1 1
64 ceph osd pool
set "${poolprefix}$p" size
2
68 wait_for_clean ||
return 1
72 # Find 3 pools with a pg with the same primaries but second
73 # replica on another osd.
89 for p
in $
(seq 1 $pools)
91 ceph pg map
${p}.0 --format=json | jq
'.acting[]' > $dir/acting
92 local test_osd1
=$
(head -1 $dir/acting
)
93 local test_osd2
=$
(tail -1 $dir/acting
)
98 pool1
="${poolprefix}$p"
100 chk_osd1_2
=$test_osd2
101 elif [ -z "$PG2" -a $chk_osd1_1 = $test_osd1 -a $chk_osd1_2 != $test_osd2 ];
105 pool2
="${poolprefix}$p"
107 elif [ -n "$PG2" -a $chk_osd1_1 = $test_osd1 -a $chk_osd1_2 != $test_osd2 -a "$chk_osd2" != $test_osd2 ];
111 pool3
="${poolprefix}$p"
117 if [ "$pool2" = "" -o "pool3" = "" ];
119 echo "Failure to find appropirate PGs"
123 for p
in $
(seq 1 $pools)
125 if [ $p != $POOLNUM1 -a $p != $POOLNUM2 -a $p != $POOLNUM3 ];
127 delete_pool
${poolprefix}$p
131 ceph osd pool
set $pool2 size
1 --yes-i-really-mean-it
132 ceph osd pool
set $pool3 size
1 --yes-i-really-mean-it
133 wait_for_clean ||
return 1
135 dd if=/dev
/urandom of
=$dir/data bs
=1M count
=10
137 for pname
in $pool1 $pool2 $pool3
139 for i
in $
(seq 1 $objects)
141 rados
-p ${pname} put obj${i}-p${p} $dir/data
146 local otherosd
=$
(get_not_primary
$pool1 obj1-p1
)
151 ceph osd
set norecover
154 # Get a pg to want to recover and quickly force it
156 ceph osd pool
set $pool3 size
2
158 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${chk_osd1_1}) dump_recovery_reservations ||
return 1
160 # 3. Item is in progress, adjust priority with no higher priority waiting
161 for i
in $
(seq 1 $max_tries)
163 if ! ceph pg force-recovery
$PG3 2>&1 |
grep -q "doesn't require recovery"; then
166 if [ "$i" = "$max_tries" ]; then
167 echo "ERROR: Didn't appear to be able to force-recovery"
168 ERRORS
=$
(expr $ERRORS + 1)
172 flush_pg_stats ||
return 1
173 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${chk_osd1_1}) dump_recovery_reservations ||
return 1
175 ceph osd out osd.
$chk_osd1_2
177 flush_pg_stats ||
return 1
178 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${chk_osd1_1}) dump_recovery_reservations ||
return 1
181 ceph osd pool
set $pool2 size
2
183 flush_pg_stats ||
return 1
184 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${chk_osd1_1}) dump_recovery_reservations
> $dir/out ||
return 1
188 PRIO
=$
(cat $dir/out | jq
"(.local_reservations.queues[].items[] | select(.item == \"${PG1}\")).prio")
189 if [ "$PRIO" != "$NORMAL_PRIO" ];
191 echo "The normal PG ${PG1} doesn't have prio $NORMAL_PRIO queued waiting"
192 ERRORS
=$
(expr $ERRORS + 1)
195 # Using eval will strip double-quotes from item
196 eval ITEM
=$
(cat $dir/out | jq
'.local_reservations.in_progress[0].item')
197 if [ "$ITEM" != ${PG3} ];
199 echo "The first force-recovery PG $PG3 didn't become the in progress item"
200 ERRORS
=$
(expr $ERRORS + 1)
202 PRIO
=$
(cat $dir/out | jq
'.local_reservations.in_progress[0].prio')
203 if [ "$PRIO" != $FORCE_PRIO ];
205 echo "The first force-recovery PG ${PG3} doesn't have prio $FORCE_PRIO"
206 ERRORS
=$
(expr $ERRORS + 1)
210 # 1. Item is queued, re-queue with new priority
211 for i
in $
(seq 1 $max_tries)
213 if ! ceph pg force-recovery
$PG2 2>&1 |
grep -q "doesn't require recovery"; then
216 if [ "$i" = "$max_tries" ]; then
217 echo "ERROR: Didn't appear to be able to force-recovery"
218 ERRORS
=$
(expr $ERRORS + 1)
223 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${chk_osd1_1}) dump_recovery_reservations
> $dir/out ||
return 1
225 PRIO
=$
(cat $dir/out | jq
"(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio")
226 if [ "$PRIO" != "$FORCE_PRIO" ];
228 echo "The second force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO"
229 ERRORS
=$
(expr $ERRORS + 1)
231 flush_pg_stats ||
return 1
233 # 4. Item is in progress, if higher priority items waiting prempt item
234 #ceph osd unset norecover
235 ceph pg cancel-force-recovery
$PG3 ||
return 1
237 #ceph osd set norecover
238 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${chk_osd1_1}) dump_recovery_reservations
> $dir/out ||
return 1
240 PRIO
=$
(cat $dir/out | jq
"(.local_reservations.queues[].items[] | select(.item == \"${PG3}\")).prio")
241 if [ "$PRIO" != "$NORMAL_PRIO" ];
243 echo "After cancel-recovery PG ${PG3} doesn't have prio $NORMAL_PRIO"
244 ERRORS
=$
(expr $ERRORS + 1)
247 eval ITEM
=$
(cat $dir/out | jq
'.local_reservations.in_progress[0].item')
248 if [ "$ITEM" != ${PG2} ];
250 echo "The force-recovery PG $PG2 didn't become the in progress item"
251 ERRORS
=$
(expr $ERRORS + 1)
253 PRIO
=$
(cat $dir/out | jq
'.local_reservations.in_progress[0].prio')
254 if [ "$PRIO" != $FORCE_PRIO ];
256 echo "The first force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO"
257 ERRORS
=$
(expr $ERRORS + 1)
261 ceph pg cancel-force-recovery
$PG2 ||
return 1
263 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${chk_osd1_1}) dump_recovery_reservations ||
return 1
265 # 2. Item is queued, re-queue and preempt because new priority higher than an in progress item
266 flush_pg_stats ||
return 1
267 ceph pg force-recovery
$PG3 ||
return 1
270 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${chk_osd1_1}) dump_recovery_reservations
> $dir/out ||
return 1
272 PRIO
=$
(cat $dir/out | jq
"(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio")
273 if [ "$PRIO" != "$NORMAL_PRIO" ];
275 echo "After cancel-force-recovery PG ${PG3} doesn't have prio $NORMAL_PRIO"
276 ERRORS
=$
(expr $ERRORS + 1)
279 eval ITEM
=$
(cat $dir/out | jq
'.local_reservations.in_progress[0].item')
280 if [ "$ITEM" != ${PG3} ];
282 echo "The force-recovery PG $PG3 didn't get promoted to an in progress item"
283 ERRORS
=$
(expr $ERRORS + 1)
285 PRIO
=$
(cat $dir/out | jq
'.local_reservations.in_progress[0].prio')
286 if [ "$PRIO" != $FORCE_PRIO ];
288 echo "The force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO"
289 ERRORS
=$
(expr $ERRORS + 1)
294 ceph osd
unset norecover
296 wait_for_clean
"CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations" ||
return 1
300 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${chk_osd1_1}) dump_pgstate_history
302 if [ $ERRORS != "0" ];
304 echo "$ERRORS error(s) found"
312 kill_daemons
$dir ||
return 1
317 # Show that pool recovery_priority is added to recovery priority
319 # Create 2 pools with 2 OSDs with different primarys
320 # pool 1 with recovery_priority 1
321 # pool 2 with recovery_priority 2
323 # Start recovery by changing the pool sizes from 1 to 2
324 # Use dump_recovery_reservations to verify priorities
325 function TEST_recovery_pool_priority
() {
327 local pools
=3 # Don't assume the first 2 pools are exact what we want
330 run_mon
$dir a ||
return 1
331 run_mgr
$dir x ||
return 1
334 for osd
in $
(seq 0 $
(expr $OSDS - 1))
336 run_osd
$dir $osd ||
return 1
339 for p
in $
(seq 1 $pools)
341 create_pool
"${poolprefix}$p" 1 1
342 ceph osd pool
set "${poolprefix}$p" size
2
346 wait_for_clean ||
return 1
350 # Find 2 pools with different primaries which
351 # means the replica must be on another osd.
364 for p
in $
(seq 1 $pools)
366 ceph pg map
${p}.0 --format=json | jq
'.acting[]' > $dir/acting
367 local test_osd1
=$
(head -1 $dir/acting
)
368 local test_osd2
=$
(tail -1 $dir/acting
)
373 pool1
="${poolprefix}$p"
374 chk_osd1_1
=$test_osd1
375 chk_osd1_2
=$test_osd2
376 elif [ $chk_osd1_1 != $test_osd1 ];
380 pool2
="${poolprefix}$p"
381 chk_osd2_1
=$test_osd1
382 chk_osd2_2
=$test_osd2
388 if [ "$pool2" = "" ];
390 echo "Failure to find appropirate PGs"
394 for p
in $
(seq 1 $pools)
396 if [ $p != $POOLNUM1 -a $p != $POOLNUM2 ];
398 delete_pool
${poolprefix}$p
404 pool1_prio
=$
(expr $NORMAL_PRIO + $pool1_extra_prio)
405 pool2_prio
=$
(expr $NORMAL_PRIO + $pool2_extra_prio)
407 ceph osd pool
set $pool1 size
1 --yes-i-really-mean-it
408 ceph osd pool
set $pool1 recovery_priority
$pool1_extra_prio
409 ceph osd pool
set $pool2 size
1 --yes-i-really-mean-it
410 ceph osd pool
set $pool2 recovery_priority
$pool2_extra_prio
411 wait_for_clean ||
return 1
413 dd if=/dev
/urandom of
=$dir/data bs
=1M count
=10
415 for pname
in $pool1 $pool2
417 for i
in $
(seq 1 $objects)
419 rados
-p ${pname} put obj${i}-p${p} $dir/data
424 local otherosd
=$
(get_not_primary
$pool1 obj1-p1
)
429 ceph osd pool
set $pool1 size
2
430 ceph osd pool
set $pool2 size
2
432 # Wait for both PGs to be in recovering state
435 # Wait for recovery to start
440 if test $
(ceph
--format json pg dump pgs |
441 jq
'.pg_stats | .[] | .state | contains("recovering")' |
grep -c true
) == "2"
446 if test "$count" -eq "10"
448 echo "Recovery never started on both PGs"
451 count
=$
(expr $count + 1)
456 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${chk_osd1_1}) dump_recovery_reservations
> $dir/dump.
${chk_osd1_1}.out
457 echo osd.
${chk_osd1_1}
458 cat $dir/dump.
${chk_osd1_1}.out
459 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${chk_osd1_2}) dump_recovery_reservations
> $dir/dump.
${chk_osd1_2}.out
460 echo osd.
${chk_osd1_2}
461 cat $dir/dump.
${chk_osd1_2}.out
463 # Using eval will strip double-quotes from item
464 eval ITEM
=$
(cat $dir/dump.
${chk_osd1_1}.out | jq
'.local_reservations.in_progress[0].item')
465 if [ "$ITEM" != ${PG1} ];
467 echo "The primary PG for $pool1 didn't become the in progress item"
468 ERRORS
=$
(expr $ERRORS + 1)
470 PRIO
=$
(cat $dir/dump.
${chk_osd1_1}.out | jq
'.local_reservations.in_progress[0].prio')
471 if [ "$PRIO" != $pool1_prio ];
473 echo "The primary PG ${PG1} doesn't have prio $pool1_prio"
474 ERRORS
=$
(expr $ERRORS + 1)
478 # Using eval will strip double-quotes from item
479 eval ITEM
=$
(cat $dir/dump.
${chk_osd1_2}.out | jq
'.remote_reservations.in_progress[0].item')
480 if [ "$ITEM" != ${PG1} ];
482 echo "The primary PG for $pool1 didn't become the in progress item on remote"
483 ERRORS
=$
(expr $ERRORS + 1)
485 PRIO
=$
(cat $dir/dump.
${chk_osd1_2}.out | jq
'.remote_reservations.in_progress[0].prio')
486 if [ "$PRIO" != $pool1_prio ];
488 echo "The primary PG ${PG1} doesn't have prio $pool1_prio on remote"
489 ERRORS
=$
(expr $ERRORS + 1)
493 # Using eval will strip double-quotes from item
494 eval ITEM
=$
(cat $dir/dump.
${chk_osd2_1}.out | jq
'.local_reservations.in_progress[0].item')
495 if [ "$ITEM" != ${PG2} ];
497 echo "The primary PG for $pool2 didn't become the in progress item"
498 ERRORS
=$
(expr $ERRORS + 1)
500 PRIO
=$
(cat $dir/dump.
${chk_osd2_1}.out | jq
'.local_reservations.in_progress[0].prio')
501 if [ "$PRIO" != $pool2_prio ];
503 echo "The primary PG ${PG2} doesn't have prio $pool2_prio"
504 ERRORS
=$
(expr $ERRORS + 1)
508 # Using eval will strip double-quotes from item
509 eval ITEM
=$
(cat $dir/dump.
${chk_osd2_2}.out | jq
'.remote_reservations.in_progress[0].item')
510 if [ "$ITEM" != ${PG2} ];
512 echo "The primary PG $PG2 didn't become the in progress item on remote"
513 ERRORS
=$
(expr $ERRORS + 1)
515 PRIO
=$
(cat $dir/dump.
${chk_osd2_2}.out | jq
'.remote_reservations.in_progress[0].prio')
516 if [ "$PRIO" != $pool2_prio ];
518 echo "The primary PG ${PG2} doesn't have prio $pool2_prio on remote"
519 ERRORS
=$
(expr $ERRORS + 1)
523 wait_for_clean ||
return 1
525 if [ $ERRORS != "0" ];
527 echo "$ERRORS error(s) found"
534 kill_daemons
$dir ||
return 1
538 main osd-recovery-prio
"$@"
541 # compile-command: "make -j4 && ../qa/run-standalone.sh osd-recovery-prio.sh"