]> git.proxmox.com Git - ceph.git/blob - ceph/qa/standalone/osd/osd-recovery-prio.sh
import quincy beta 17.1.0
[ceph.git] / ceph / qa / standalone / osd / osd-recovery-prio.sh
1 #!/usr/bin/env bash
2 #
3 # Copyright (C) 2019 Red Hat <contact@redhat.com>
4 #
5 # Author: David Zafman <dzafman@redhat.com>
6 #
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU Library Public License as published by
9 # the Free Software Foundation; either version 2, or (at your option)
10 # any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU Library Public License for more details.
16 #
17
18 source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
19
20 function run() {
21 local dir=$1
22 shift
23
24 # Fix port????
25 export CEPH_MON="127.0.0.1:7114" # git grep '\<7114\>' : there must be only one
26 export CEPH_ARGS
27 CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
28 CEPH_ARGS+="--mon-host=$CEPH_MON --osd_max_backfills=1 --debug_reserver=20 "
29 # Set osd op queue = wpq for the tests. Recovery priority is not
30 # considered by mclock_scheduler leading to unexpected results.
31 CEPH_ARGS+="--osd-op-queue=wpq "
32 export objects=200
33 export poolprefix=test
34 export FORCE_PRIO="255" # See OSD_RECOVERY_PRIORITY_FORCED
35 export NORMAL_PRIO="190" # See OSD_RECOVERY_PRIORITY_BASE + 10
36
37 local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
38 for func in $funcs ; do
39 setup $dir || return 1
40 $func $dir || return 1
41 teardown $dir || return 1
42 done
43 }
44
45
46 function TEST_recovery_priority() {
47 local dir=$1
48 local pools=10
49 local OSDS=5
50 local max_tries=10
51
52 run_mon $dir a || return 1
53 run_mgr $dir x || return 1
54 export CEPH_ARGS
55
56 for osd in $(seq 0 $(expr $OSDS - 1))
57 do
58 run_osd $dir $osd || return 1
59 done
60
61 for p in $(seq 1 $pools)
62 do
63 create_pool "${poolprefix}$p" 1 1
64 ceph osd pool set "${poolprefix}$p" size 2
65 done
66 sleep 5
67
68 wait_for_clean || return 1
69
70 ceph pg dump pgs
71
72 # Find 3 pools with a pg with the same primaries but second
73 # replica on another osd.
74 local PG1
75 local POOLNUM1
76 local pool1
77 local chk_osd1_1
78 local chk_osd1_2
79
80 local PG2
81 local POOLNUM2
82 local pool2
83 local chk_osd2
84
85 local PG3
86 local POOLNUM3
87 local pool3
88
89 for p in $(seq 1 $pools)
90 do
91 ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting
92 local test_osd1=$(head -1 $dir/acting)
93 local test_osd2=$(tail -1 $dir/acting)
94 if [ -z "$PG1" ];
95 then
96 PG1="${p}.0"
97 POOLNUM1=$p
98 pool1="${poolprefix}$p"
99 chk_osd1_1=$test_osd1
100 chk_osd1_2=$test_osd2
101 elif [ -z "$PG2" -a $chk_osd1_1 = $test_osd1 -a $chk_osd1_2 != $test_osd2 ];
102 then
103 PG2="${p}.0"
104 POOLNUM2=$p
105 pool2="${poolprefix}$p"
106 chk_osd2=$test_osd2
107 elif [ -n "$PG2" -a $chk_osd1_1 = $test_osd1 -a $chk_osd1_2 != $test_osd2 -a "$chk_osd2" != $test_osd2 ];
108 then
109 PG3="${p}.0"
110 POOLNUM3=$p
111 pool3="${poolprefix}$p"
112 break
113 fi
114 done
115 rm -f $dir/acting
116
117 if [ "$pool2" = "" -o "pool3" = "" ];
118 then
119 echo "Failure to find appropirate PGs"
120 return 1
121 fi
122
123 for p in $(seq 1 $pools)
124 do
125 if [ $p != $POOLNUM1 -a $p != $POOLNUM2 -a $p != $POOLNUM3 ];
126 then
127 delete_pool ${poolprefix}$p
128 fi
129 done
130
131 ceph osd pool set $pool2 size 1 --yes-i-really-mean-it
132 ceph osd pool set $pool3 size 1 --yes-i-really-mean-it
133 wait_for_clean || return 1
134
135 dd if=/dev/urandom of=$dir/data bs=1M count=10
136 p=1
137 for pname in $pool1 $pool2 $pool3
138 do
139 for i in $(seq 1 $objects)
140 do
141 rados -p ${pname} put obj${i}-p${p} $dir/data
142 done
143 p=$(expr $p + 1)
144 done
145
146 local otherosd=$(get_not_primary $pool1 obj1-p1)
147
148 ceph pg dump pgs
149 ERRORS=0
150
151 ceph osd set norecover
152 ceph osd set noout
153
154 # Get a pg to want to recover and quickly force it
155 # to be preempted.
156 ceph osd pool set $pool3 size 2
157 sleep 2
158 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1
159
160 # 3. Item is in progress, adjust priority with no higher priority waiting
161 for i in $(seq 1 $max_tries)
162 do
163 if ! ceph pg force-recovery $PG3 2>&1 | grep -q "doesn't require recovery"; then
164 break
165 fi
166 if [ "$i" = "$max_tries" ]; then
167 echo "ERROR: Didn't appear to be able to force-recovery"
168 ERRORS=$(expr $ERRORS + 1)
169 fi
170 sleep 2
171 done
172 flush_pg_stats || return 1
173 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1
174
175 ceph osd out osd.$chk_osd1_2
176 sleep 2
177 flush_pg_stats || return 1
178 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1
179 ceph pg dump pgs
180
181 ceph osd pool set $pool2 size 2
182 sleep 2
183 flush_pg_stats || return 1
184 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1
185 cat $dir/out
186 ceph pg dump pgs
187
188 PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG1}\")).prio")
189 if [ "$PRIO" != "$NORMAL_PRIO" ];
190 then
191 echo "The normal PG ${PG1} doesn't have prio $NORMAL_PRIO queued waiting"
192 ERRORS=$(expr $ERRORS + 1)
193 fi
194
195 # Using eval will strip double-quotes from item
196 eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item')
197 if [ "$ITEM" != ${PG3} ];
198 then
199 echo "The first force-recovery PG $PG3 didn't become the in progress item"
200 ERRORS=$(expr $ERRORS + 1)
201 else
202 PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio')
203 if [ "$PRIO" != $FORCE_PRIO ];
204 then
205 echo "The first force-recovery PG ${PG3} doesn't have prio $FORCE_PRIO"
206 ERRORS=$(expr $ERRORS + 1)
207 fi
208 fi
209
210 # 1. Item is queued, re-queue with new priority
211 for i in $(seq 1 $max_tries)
212 do
213 if ! ceph pg force-recovery $PG2 2>&1 | grep -q "doesn't require recovery"; then
214 break
215 fi
216 if [ "$i" = "$max_tries" ]; then
217 echo "ERROR: Didn't appear to be able to force-recovery"
218 ERRORS=$(expr $ERRORS + 1)
219 fi
220 sleep 2
221 done
222 sleep 2
223 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1
224 cat $dir/out
225 PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio")
226 if [ "$PRIO" != "$FORCE_PRIO" ];
227 then
228 echo "The second force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO"
229 ERRORS=$(expr $ERRORS + 1)
230 fi
231 flush_pg_stats || return 1
232
233 # 4. Item is in progress, if higher priority items waiting prempt item
234 #ceph osd unset norecover
235 ceph pg cancel-force-recovery $PG3 || return 1
236 sleep 2
237 #ceph osd set norecover
238 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1
239 cat $dir/out
240 PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG3}\")).prio")
241 if [ "$PRIO" != "$NORMAL_PRIO" ];
242 then
243 echo "After cancel-recovery PG ${PG3} doesn't have prio $NORMAL_PRIO"
244 ERRORS=$(expr $ERRORS + 1)
245 fi
246
247 eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item')
248 if [ "$ITEM" != ${PG2} ];
249 then
250 echo "The force-recovery PG $PG2 didn't become the in progress item"
251 ERRORS=$(expr $ERRORS + 1)
252 else
253 PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio')
254 if [ "$PRIO" != $FORCE_PRIO ];
255 then
256 echo "The first force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO"
257 ERRORS=$(expr $ERRORS + 1)
258 fi
259 fi
260
261 ceph pg cancel-force-recovery $PG2 || return 1
262 sleep 5
263 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1
264
265 # 2. Item is queued, re-queue and preempt because new priority higher than an in progress item
266 flush_pg_stats || return 1
267 ceph pg force-recovery $PG3 || return 1
268 sleep 2
269
270 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1
271 cat $dir/out
272 PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio")
273 if [ "$PRIO" != "$NORMAL_PRIO" ];
274 then
275 echo "After cancel-force-recovery PG ${PG3} doesn't have prio $NORMAL_PRIO"
276 ERRORS=$(expr $ERRORS + 1)
277 fi
278
279 eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item')
280 if [ "$ITEM" != ${PG3} ];
281 then
282 echo "The force-recovery PG $PG3 didn't get promoted to an in progress item"
283 ERRORS=$(expr $ERRORS + 1)
284 else
285 PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio')
286 if [ "$PRIO" != $FORCE_PRIO ];
287 then
288 echo "The force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO"
289 ERRORS=$(expr $ERRORS + 1)
290 fi
291 fi
292
293 ceph osd unset noout
294 ceph osd unset norecover
295
296 wait_for_clean "CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations" || return 1
297
298 ceph pg dump pgs
299
300 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_pgstate_history
301
302 if [ $ERRORS != "0" ];
303 then
304 echo "$ERRORS error(s) found"
305 else
306 echo TEST PASSED
307 fi
308
309 delete_pool $pool1
310 delete_pool $pool2
311 delete_pool $pool3
312 kill_daemons $dir || return 1
313 return $ERRORS
314 }
315
316 #
317 # Show that pool recovery_priority is added to recovery priority
318 #
319 # Create 2 pools with 2 OSDs with different primarys
320 # pool 1 with recovery_priority 1
321 # pool 2 with recovery_priority 2
322 #
323 # Start recovery by changing the pool sizes from 1 to 2
324 # Use dump_recovery_reservations to verify priorities
325 function TEST_recovery_pool_priority() {
326 local dir=$1
327 local pools=3 # Don't assume the first 2 pools are exact what we want
328 local OSDS=2
329
330 run_mon $dir a || return 1
331 run_mgr $dir x || return 1
332 export CEPH_ARGS
333
334 for osd in $(seq 0 $(expr $OSDS - 1))
335 do
336 run_osd $dir $osd || return 1
337 done
338
339 for p in $(seq 1 $pools)
340 do
341 create_pool "${poolprefix}$p" 1 1
342 ceph osd pool set "${poolprefix}$p" size 2
343 done
344 sleep 5
345
346 wait_for_clean || return 1
347
348 ceph pg dump pgs
349
350 # Find 2 pools with different primaries which
351 # means the replica must be on another osd.
352 local PG1
353 local POOLNUM1
354 local pool1
355 local chk_osd1_1
356 local chk_osd1_2
357
358 local PG2
359 local POOLNUM2
360 local pool2
361 local chk_osd2_1
362 local chk_osd2_2
363
364 for p in $(seq 1 $pools)
365 do
366 ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting
367 local test_osd1=$(head -1 $dir/acting)
368 local test_osd2=$(tail -1 $dir/acting)
369 if [ -z "$PG1" ];
370 then
371 PG1="${p}.0"
372 POOLNUM1=$p
373 pool1="${poolprefix}$p"
374 chk_osd1_1=$test_osd1
375 chk_osd1_2=$test_osd2
376 elif [ $chk_osd1_1 != $test_osd1 ];
377 then
378 PG2="${p}.0"
379 POOLNUM2=$p
380 pool2="${poolprefix}$p"
381 chk_osd2_1=$test_osd1
382 chk_osd2_2=$test_osd2
383 break
384 fi
385 done
386 rm -f $dir/acting
387
388 if [ "$pool2" = "" ];
389 then
390 echo "Failure to find appropirate PGs"
391 return 1
392 fi
393
394 for p in $(seq 1 $pools)
395 do
396 if [ $p != $POOLNUM1 -a $p != $POOLNUM2 ];
397 then
398 delete_pool ${poolprefix}$p
399 fi
400 done
401
402 pool1_extra_prio=1
403 pool2_extra_prio=2
404 pool1_prio=$(expr $NORMAL_PRIO + $pool1_extra_prio)
405 pool2_prio=$(expr $NORMAL_PRIO + $pool2_extra_prio)
406
407 ceph osd pool set $pool1 size 1 --yes-i-really-mean-it
408 ceph osd pool set $pool1 recovery_priority $pool1_extra_prio
409 ceph osd pool set $pool2 size 1 --yes-i-really-mean-it
410 ceph osd pool set $pool2 recovery_priority $pool2_extra_prio
411 wait_for_clean || return 1
412
413 dd if=/dev/urandom of=$dir/data bs=1M count=10
414 p=1
415 for pname in $pool1 $pool2
416 do
417 for i in $(seq 1 $objects)
418 do
419 rados -p ${pname} put obj${i}-p${p} $dir/data
420 done
421 p=$(expr $p + 1)
422 done
423
424 local otherosd=$(get_not_primary $pool1 obj1-p1)
425
426 ceph pg dump pgs
427 ERRORS=0
428
429 ceph osd pool set $pool1 size 2
430 ceph osd pool set $pool2 size 2
431
432 # Wait for both PGs to be in recovering state
433 ceph pg dump pgs
434
435 # Wait for recovery to start
436 set -o pipefail
437 count=0
438 while(true)
439 do
440 if test $(ceph --format json pg dump pgs |
441 jq '.pg_stats | .[] | .state | contains("recovering")' | grep -c true) == "2"
442 then
443 break
444 fi
445 sleep 2
446 if test "$count" -eq "10"
447 then
448 echo "Recovery never started on both PGs"
449 return 1
450 fi
451 count=$(expr $count + 1)
452 done
453 set +o pipefail
454 ceph pg dump pgs
455
456 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/dump.${chk_osd1_1}.out
457 echo osd.${chk_osd1_1}
458 cat $dir/dump.${chk_osd1_1}.out
459 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_2}) dump_recovery_reservations > $dir/dump.${chk_osd1_2}.out
460 echo osd.${chk_osd1_2}
461 cat $dir/dump.${chk_osd1_2}.out
462
463 # Using eval will strip double-quotes from item
464 eval ITEM=$(cat $dir/dump.${chk_osd1_1}.out | jq '.local_reservations.in_progress[0].item')
465 if [ "$ITEM" != ${PG1} ];
466 then
467 echo "The primary PG for $pool1 didn't become the in progress item"
468 ERRORS=$(expr $ERRORS + 1)
469 else
470 PRIO=$(cat $dir/dump.${chk_osd1_1}.out | jq '.local_reservations.in_progress[0].prio')
471 if [ "$PRIO" != $pool1_prio ];
472 then
473 echo "The primary PG ${PG1} doesn't have prio $pool1_prio"
474 ERRORS=$(expr $ERRORS + 1)
475 fi
476 fi
477
478 # Using eval will strip double-quotes from item
479 eval ITEM=$(cat $dir/dump.${chk_osd1_2}.out | jq '.remote_reservations.in_progress[0].item')
480 if [ "$ITEM" != ${PG1} ];
481 then
482 echo "The primary PG for $pool1 didn't become the in progress item on remote"
483 ERRORS=$(expr $ERRORS + 1)
484 else
485 PRIO=$(cat $dir/dump.${chk_osd1_2}.out | jq '.remote_reservations.in_progress[0].prio')
486 if [ "$PRIO" != $pool1_prio ];
487 then
488 echo "The primary PG ${PG1} doesn't have prio $pool1_prio on remote"
489 ERRORS=$(expr $ERRORS + 1)
490 fi
491 fi
492
493 # Using eval will strip double-quotes from item
494 eval ITEM=$(cat $dir/dump.${chk_osd2_1}.out | jq '.local_reservations.in_progress[0].item')
495 if [ "$ITEM" != ${PG2} ];
496 then
497 echo "The primary PG for $pool2 didn't become the in progress item"
498 ERRORS=$(expr $ERRORS + 1)
499 else
500 PRIO=$(cat $dir/dump.${chk_osd2_1}.out | jq '.local_reservations.in_progress[0].prio')
501 if [ "$PRIO" != $pool2_prio ];
502 then
503 echo "The primary PG ${PG2} doesn't have prio $pool2_prio"
504 ERRORS=$(expr $ERRORS + 1)
505 fi
506 fi
507
508 # Using eval will strip double-quotes from item
509 eval ITEM=$(cat $dir/dump.${chk_osd2_2}.out | jq '.remote_reservations.in_progress[0].item')
510 if [ "$ITEM" != ${PG2} ];
511 then
512 echo "The primary PG $PG2 didn't become the in progress item on remote"
513 ERRORS=$(expr $ERRORS + 1)
514 else
515 PRIO=$(cat $dir/dump.${chk_osd2_2}.out | jq '.remote_reservations.in_progress[0].prio')
516 if [ "$PRIO" != $pool2_prio ];
517 then
518 echo "The primary PG ${PG2} doesn't have prio $pool2_prio on remote"
519 ERRORS=$(expr $ERRORS + 1)
520 fi
521 fi
522
523 wait_for_clean || return 1
524
525 if [ $ERRORS != "0" ];
526 then
527 echo "$ERRORS error(s) found"
528 else
529 echo TEST PASSED
530 fi
531
532 delete_pool $pool1
533 delete_pool $pool2
534 kill_daemons $dir || return 1
535 return $ERRORS
536 }
537
538 main osd-recovery-prio "$@"
539
540 # Local Variables:
541 # compile-command: "make -j4 && ../qa/run-standalone.sh osd-recovery-prio.sh"
542 # End: