3 # Copyright (C) 2019 Red Hat <contact@redhat.com>
5 # Author: David Zafman <dzafman@redhat.com>
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU Library Public License as published by
9 # the Free Software Foundation; either version 2, or (at your option)
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU Library Public License for more details.
18 source $CEPH_ROOT/qa
/standalone
/ceph-helpers.sh
25 export CEPH_MON
="127.0.0.1:7114" # git grep '\<7114\>' : there must be only one
27 CEPH_ARGS
+="--fsid=$(uuidgen) --auth-supported=none "
28 CEPH_ARGS
+="--mon-host=$CEPH_MON --osd_max_backfills=1 --debug_reserver=20"
30 export poolprefix
=test
31 export FORCE_PRIO
="255" # See OSD_RECOVERY_PRIORITY_FORCED
32 export NORMAL_PRIO
="190" # See OSD_RECOVERY_PRIORITY_BASE + 10
34 local funcs
=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
35 for func
in $funcs ; do
36 setup
$dir ||
return 1
37 $func $dir ||
return 1
38 teardown
$dir ||
return 1
43 function TEST_recovery_priority
() {
49 run_mon
$dir a ||
return 1
50 run_mgr
$dir x ||
return 1
53 for osd
in $
(seq 0 $
(expr $OSDS - 1))
55 run_osd
$dir $osd ||
return 1
58 for p
in $
(seq 1 $pools)
60 create_pool
"${poolprefix}$p" 1 1
61 ceph osd pool
set "${poolprefix}$p" size
2
65 wait_for_clean ||
return 1
69 # Find 3 pools with a pg with the same primaries but second
70 # replica on another osd.
86 for p
in $
(seq 1 $pools)
88 ceph pg map
${p}.0 --format=json | jq
'.acting[]' > $dir/acting
89 local test_osd1
=$
(head -1 $dir/acting
)
90 local test_osd2
=$
(tail -1 $dir/acting
)
95 pool1
="${poolprefix}$p"
98 elif [ -z "$PG2" -a $chk_osd1_1 = $test_osd1 -a $chk_osd1_2 != $test_osd2 ];
102 pool2
="${poolprefix}$p"
104 elif [ -n "$PG2" -a $chk_osd1_1 = $test_osd1 -a $chk_osd1_2 != $test_osd2 -a "$chk_osd2" != $test_osd2 ];
108 pool3
="${poolprefix}$p"
114 if [ "$pool2" = "" -o "pool3" = "" ];
116 echo "Failure to find appropirate PGs"
120 for p
in $
(seq 1 $pools)
122 if [ $p != $POOLNUM1 -a $p != $POOLNUM2 -a $p != $POOLNUM3 ];
124 delete_pool
${poolprefix}$p
128 ceph osd pool
set $pool2 size
1 --yes-i-really-mean-it
129 ceph osd pool
set $pool3 size
1 --yes-i-really-mean-it
130 wait_for_clean ||
return 1
132 dd if=/dev
/urandom of
=$dir/data bs
=1M count
=10
134 for pname
in $pool1 $pool2 $pool3
136 for i
in $
(seq 1 $objects)
138 rados
-p ${pname} put obj${i}-p${p} $dir/data
143 local otherosd
=$
(get_not_primary
$pool1 obj1-p1
)
148 ceph osd
set norecover
151 # Get a pg to want to recover and quickly force it
153 ceph osd pool
set $pool3 size
2
155 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${chk_osd1_1}) dump_recovery_reservations ||
return 1
157 # 3. Item is in progress, adjust priority with no higher priority waiting
158 for i
in $
(seq 1 $max_tries)
160 if ! ceph pg force-recovery
$PG3 2>&1 |
grep -q "doesn't require recovery"; then
163 if [ "$i" = "$max_tries" ]; then
164 echo "ERROR: Didn't appear to be able to force-recovery"
165 ERRORS
=$
(expr $ERRORS + 1)
169 flush_pg_stats ||
return 1
170 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${chk_osd1_1}) dump_recovery_reservations ||
return 1
172 ceph osd out osd.
$chk_osd1_2
174 flush_pg_stats ||
return 1
175 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${chk_osd1_1}) dump_recovery_reservations ||
return 1
178 ceph osd pool
set $pool2 size
2
180 flush_pg_stats ||
return 1
181 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${chk_osd1_1}) dump_recovery_reservations
> $dir/out ||
return 1
185 PRIO
=$
(cat $dir/out | jq
"(.local_reservations.queues[].items[] | select(.item == \"${PG1}\")).prio")
186 if [ "$PRIO" != "$NORMAL_PRIO" ];
188 echo "The normal PG ${PG1} doesn't have prio $NORMAL_PRIO queued waiting"
189 ERRORS
=$
(expr $ERRORS + 1)
192 # Using eval will strip double-quotes from item
193 eval ITEM
=$
(cat $dir/out | jq
'.local_reservations.in_progress[0].item')
194 if [ "$ITEM" != ${PG3} ];
196 echo "The first force-recovery PG $PG3 didn't become the in progress item"
197 ERRORS
=$
(expr $ERRORS + 1)
199 PRIO
=$
(cat $dir/out | jq
'.local_reservations.in_progress[0].prio')
200 if [ "$PRIO" != $FORCE_PRIO ];
202 echo "The first force-recovery PG ${PG3} doesn't have prio $FORCE_PRIO"
203 ERRORS
=$
(expr $ERRORS + 1)
207 # 1. Item is queued, re-queue with new priority
208 for i
in $
(seq 1 $max_tries)
210 if ! ceph pg force-recovery
$PG2 2>&1 |
grep -q "doesn't require recovery"; then
213 if [ "$i" = "$max_tries" ]; then
214 echo "ERROR: Didn't appear to be able to force-recovery"
215 ERRORS
=$
(expr $ERRORS + 1)
220 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${chk_osd1_1}) dump_recovery_reservations
> $dir/out ||
return 1
222 PRIO
=$
(cat $dir/out | jq
"(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio")
223 if [ "$PRIO" != "$FORCE_PRIO" ];
225 echo "The second force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO"
226 ERRORS
=$
(expr $ERRORS + 1)
228 flush_pg_stats ||
return 1
230 # 4. Item is in progress, if higher priority items waiting prempt item
231 #ceph osd unset norecover
232 ceph pg cancel-force-recovery
$PG3 ||
return 1
234 #ceph osd set norecover
235 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${chk_osd1_1}) dump_recovery_reservations
> $dir/out ||
return 1
237 PRIO
=$
(cat $dir/out | jq
"(.local_reservations.queues[].items[] | select(.item == \"${PG3}\")).prio")
238 if [ "$PRIO" != "$NORMAL_PRIO" ];
240 echo "After cancel-recovery PG ${PG3} doesn't have prio $NORMAL_PRIO"
241 ERRORS
=$
(expr $ERRORS + 1)
244 eval ITEM
=$
(cat $dir/out | jq
'.local_reservations.in_progress[0].item')
245 if [ "$ITEM" != ${PG2} ];
247 echo "The force-recovery PG $PG2 didn't become the in progress item"
248 ERRORS
=$
(expr $ERRORS + 1)
250 PRIO
=$
(cat $dir/out | jq
'.local_reservations.in_progress[0].prio')
251 if [ "$PRIO" != $FORCE_PRIO ];
253 echo "The first force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO"
254 ERRORS
=$
(expr $ERRORS + 1)
258 ceph pg cancel-force-recovery
$PG2 ||
return 1
260 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${chk_osd1_1}) dump_recovery_reservations ||
return 1
262 # 2. Item is queued, re-queue and preempt because new priority higher than an in progress item
263 flush_pg_stats ||
return 1
264 ceph pg force-recovery
$PG3 ||
return 1
267 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${chk_osd1_1}) dump_recovery_reservations
> $dir/out ||
return 1
269 PRIO
=$
(cat $dir/out | jq
"(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio")
270 if [ "$PRIO" != "$NORMAL_PRIO" ];
272 echo "After cancel-force-recovery PG ${PG3} doesn't have prio $NORMAL_PRIO"
273 ERRORS
=$
(expr $ERRORS + 1)
276 eval ITEM
=$
(cat $dir/out | jq
'.local_reservations.in_progress[0].item')
277 if [ "$ITEM" != ${PG3} ];
279 echo "The force-recovery PG $PG3 didn't get promoted to an in progress item"
280 ERRORS
=$
(expr $ERRORS + 1)
282 PRIO
=$
(cat $dir/out | jq
'.local_reservations.in_progress[0].prio')
283 if [ "$PRIO" != $FORCE_PRIO ];
285 echo "The force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO"
286 ERRORS
=$
(expr $ERRORS + 1)
291 ceph osd
unset norecover
293 wait_for_clean
"CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations" ||
return 1
297 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${chk_osd1_1}) dump_pgstate_history
299 if [ $ERRORS != "0" ];
301 echo "$ERRORS error(s) found"
309 kill_daemons
$dir ||
return 1
314 # Show that pool recovery_priority is added to recovery priority
316 # Create 2 pools with 2 OSDs with different primarys
317 # pool 1 with recovery_priority 1
318 # pool 2 with recovery_priority 2
320 # Start recovery by changing the pool sizes from 1 to 2
321 # Use dump_recovery_reservations to verify priorities
322 function TEST_recovery_pool_priority
() {
324 local pools
=3 # Don't assume the first 2 pools are exact what we want
327 run_mon
$dir a ||
return 1
328 run_mgr
$dir x ||
return 1
331 for osd
in $
(seq 0 $
(expr $OSDS - 1))
333 run_osd
$dir $osd ||
return 1
336 for p
in $
(seq 1 $pools)
338 create_pool
"${poolprefix}$p" 1 1
339 ceph osd pool
set "${poolprefix}$p" size
2
343 wait_for_clean ||
return 1
347 # Find 2 pools with different primaries which
348 # means the replica must be on another osd.
361 for p
in $
(seq 1 $pools)
363 ceph pg map
${p}.0 --format=json | jq
'.acting[]' > $dir/acting
364 local test_osd1
=$
(head -1 $dir/acting
)
365 local test_osd2
=$
(tail -1 $dir/acting
)
370 pool1
="${poolprefix}$p"
371 chk_osd1_1
=$test_osd1
372 chk_osd1_2
=$test_osd2
373 elif [ $chk_osd1_1 != $test_osd1 ];
377 pool2
="${poolprefix}$p"
378 chk_osd2_1
=$test_osd1
379 chk_osd2_2
=$test_osd2
385 if [ "$pool2" = "" ];
387 echo "Failure to find appropirate PGs"
391 for p
in $
(seq 1 $pools)
393 if [ $p != $POOLNUM1 -a $p != $POOLNUM2 ];
395 delete_pool
${poolprefix}$p
401 pool1_prio
=$
(expr $NORMAL_PRIO + $pool1_extra_prio)
402 pool2_prio
=$
(expr $NORMAL_PRIO + $pool2_extra_prio)
404 ceph osd pool
set $pool1 size
1 --yes-i-really-mean-it
405 ceph osd pool
set $pool1 recovery_priority
$pool1_extra_prio
406 ceph osd pool
set $pool2 size
1 --yes-i-really-mean-it
407 ceph osd pool
set $pool2 recovery_priority
$pool2_extra_prio
408 wait_for_clean ||
return 1
410 dd if=/dev
/urandom of
=$dir/data bs
=1M count
=10
412 for pname
in $pool1 $pool2
414 for i
in $
(seq 1 $objects)
416 rados
-p ${pname} put obj${i}-p${p} $dir/data
421 local otherosd
=$
(get_not_primary
$pool1 obj1-p1
)
426 ceph osd pool
set $pool1 size
2
427 ceph osd pool
set $pool2 size
2
429 # Wait for both PGs to be in recovering state
432 # Wait for recovery to start
437 if test $
(ceph
--format json pg dump pgs |
438 jq
'.pg_stats | .[] | .state | contains("recovering")' |
grep -c true
) == "2"
443 if test "$count" -eq "10"
445 echo "Recovery never started on both PGs"
448 count
=$
(expr $count + 1)
453 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${chk_osd1_1}) dump_recovery_reservations
> $dir/dump.
${chk_osd1_1}.out
454 echo osd.
${chk_osd1_1}
455 cat $dir/dump.
${chk_osd1_1}.out
456 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${chk_osd1_2}) dump_recovery_reservations
> $dir/dump.
${chk_osd1_2}.out
457 echo osd.
${chk_osd1_2}
458 cat $dir/dump.
${chk_osd1_2}.out
460 # Using eval will strip double-quotes from item
461 eval ITEM
=$
(cat $dir/dump.
${chk_osd1_1}.out | jq
'.local_reservations.in_progress[0].item')
462 if [ "$ITEM" != ${PG1} ];
464 echo "The primary PG for $pool1 didn't become the in progress item"
465 ERRORS
=$
(expr $ERRORS + 1)
467 PRIO
=$
(cat $dir/dump.
${chk_osd1_1}.out | jq
'.local_reservations.in_progress[0].prio')
468 if [ "$PRIO" != $pool1_prio ];
470 echo "The primary PG ${PG1} doesn't have prio $pool1_prio"
471 ERRORS
=$
(expr $ERRORS + 1)
475 # Using eval will strip double-quotes from item
476 eval ITEM
=$
(cat $dir/dump.
${chk_osd1_2}.out | jq
'.remote_reservations.in_progress[0].item')
477 if [ "$ITEM" != ${PG1} ];
479 echo "The primary PG for $pool1 didn't become the in progress item on remote"
480 ERRORS
=$
(expr $ERRORS + 1)
482 PRIO
=$
(cat $dir/dump.
${chk_osd1_2}.out | jq
'.remote_reservations.in_progress[0].prio')
483 if [ "$PRIO" != $pool1_prio ];
485 echo "The primary PG ${PG1} doesn't have prio $pool1_prio on remote"
486 ERRORS
=$
(expr $ERRORS + 1)
490 # Using eval will strip double-quotes from item
491 eval ITEM
=$
(cat $dir/dump.
${chk_osd2_1}.out | jq
'.local_reservations.in_progress[0].item')
492 if [ "$ITEM" != ${PG2} ];
494 echo "The primary PG for $pool2 didn't become the in progress item"
495 ERRORS
=$
(expr $ERRORS + 1)
497 PRIO
=$
(cat $dir/dump.
${chk_osd2_1}.out | jq
'.local_reservations.in_progress[0].prio')
498 if [ "$PRIO" != $pool2_prio ];
500 echo "The primary PG ${PG2} doesn't have prio $pool2_prio"
501 ERRORS
=$
(expr $ERRORS + 1)
505 # Using eval will strip double-quotes from item
506 eval ITEM
=$
(cat $dir/dump.
${chk_osd2_2}.out | jq
'.remote_reservations.in_progress[0].item')
507 if [ "$ITEM" != ${PG2} ];
509 echo "The primary PG $PG2 didn't become the in progress item on remote"
510 ERRORS
=$
(expr $ERRORS + 1)
512 PRIO
=$
(cat $dir/dump.
${chk_osd2_2}.out | jq
'.remote_reservations.in_progress[0].prio')
513 if [ "$PRIO" != $pool2_prio ];
515 echo "The primary PG ${PG2} doesn't have prio $pool2_prio on remote"
516 ERRORS
=$
(expr $ERRORS + 1)
520 wait_for_clean ||
return 1
522 if [ $ERRORS != "0" ];
524 echo "$ERRORS error(s) found"
531 kill_daemons
$dir ||
return 1
535 main osd-recovery-prio
"$@"
538 # compile-command: "make -j4 && ../qa/run-standalone.sh osd-recovery-prio.sh"