]> git.proxmox.com Git - ceph.git/blob - ceph/qa/standalone/osd/osd-recovery-prio.sh
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / qa / standalone / osd / osd-recovery-prio.sh
1 #!/usr/bin/env bash
2 #
3 # Copyright (C) 2019 Red Hat <contact@redhat.com>
4 #
5 # Author: David Zafman <dzafman@redhat.com>
6 #
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU Library Public License as published by
9 # the Free Software Foundation; either version 2, or (at your option)
10 # any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU Library Public License for more details.
16 #
17
18 source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
19
20 function run() {
21 local dir=$1
22 shift
23
24 # Fix port????
25 export CEPH_MON="127.0.0.1:7114" # git grep '\<7114\>' : there must be only one
26 export CEPH_ARGS
27 CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
28 CEPH_ARGS+="--mon-host=$CEPH_MON --osd_max_backfills=1 --debug_reserver=20"
29 export objects=200
30 export poolprefix=test
31 export FORCE_PRIO="255" # See OSD_RECOVERY_PRIORITY_FORCED
32 export NORMAL_PRIO="190" # See OSD_RECOVERY_PRIORITY_BASE + 10
33
34 local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
35 for func in $funcs ; do
36 setup $dir || return 1
37 $func $dir || return 1
38 teardown $dir || return 1
39 done
40 }
41
42
43 function TEST_recovery_priority() {
44 local dir=$1
45 local pools=10
46 local OSDS=5
47 local max_tries=10
48
49 run_mon $dir a || return 1
50 run_mgr $dir x || return 1
51 export CEPH_ARGS
52
53 for osd in $(seq 0 $(expr $OSDS - 1))
54 do
55 run_osd $dir $osd || return 1
56 done
57
58 for p in $(seq 1 $pools)
59 do
60 create_pool "${poolprefix}$p" 1 1
61 ceph osd pool set "${poolprefix}$p" size 2
62 done
63 sleep 5
64
65 wait_for_clean || return 1
66
67 ceph pg dump pgs
68
69 # Find 3 pools with a pg with the same primaries but second
70 # replica on another osd.
71 local PG1
72 local POOLNUM1
73 local pool1
74 local chk_osd1_1
75 local chk_osd1_2
76
77 local PG2
78 local POOLNUM2
79 local pool2
80 local chk_osd2
81
82 local PG3
83 local POOLNUM3
84 local pool3
85
86 for p in $(seq 1 $pools)
87 do
88 ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting
89 local test_osd1=$(head -1 $dir/acting)
90 local test_osd2=$(tail -1 $dir/acting)
91 if [ -z "$PG1" ];
92 then
93 PG1="${p}.0"
94 POOLNUM1=$p
95 pool1="${poolprefix}$p"
96 chk_osd1_1=$test_osd1
97 chk_osd1_2=$test_osd2
98 elif [ -z "$PG2" -a $chk_osd1_1 = $test_osd1 -a $chk_osd1_2 != $test_osd2 ];
99 then
100 PG2="${p}.0"
101 POOLNUM2=$p
102 pool2="${poolprefix}$p"
103 chk_osd2=$test_osd2
104 elif [ -n "$PG2" -a $chk_osd1_1 = $test_osd1 -a $chk_osd1_2 != $test_osd2 -a "$chk_osd2" != $test_osd2 ];
105 then
106 PG3="${p}.0"
107 POOLNUM3=$p
108 pool3="${poolprefix}$p"
109 break
110 fi
111 done
112 rm -f $dir/acting
113
114 if [ "$pool2" = "" -o "pool3" = "" ];
115 then
116 echo "Failure to find appropirate PGs"
117 return 1
118 fi
119
120 for p in $(seq 1 $pools)
121 do
122 if [ $p != $POOLNUM1 -a $p != $POOLNUM2 -a $p != $POOLNUM3 ];
123 then
124 delete_pool ${poolprefix}$p
125 fi
126 done
127
128 ceph osd pool set $pool2 size 1 --yes-i-really-mean-it
129 ceph osd pool set $pool3 size 1 --yes-i-really-mean-it
130 wait_for_clean || return 1
131
132 dd if=/dev/urandom of=$dir/data bs=1M count=10
133 p=1
134 for pname in $pool1 $pool2 $pool3
135 do
136 for i in $(seq 1 $objects)
137 do
138 rados -p ${pname} put obj${i}-p${p} $dir/data
139 done
140 p=$(expr $p + 1)
141 done
142
143 local otherosd=$(get_not_primary $pool1 obj1-p1)
144
145 ceph pg dump pgs
146 ERRORS=0
147
148 ceph osd set norecover
149 ceph osd set noout
150
151 # Get a pg to want to recover and quickly force it
152 # to be preempted.
153 ceph osd pool set $pool3 size 2
154 sleep 2
155 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1
156
157 # 3. Item is in progress, adjust priority with no higher priority waiting
158 for i in $(seq 1 $max_tries)
159 do
160 if ! ceph pg force-recovery $PG3 2>&1 | grep -q "doesn't require recovery"; then
161 break
162 fi
163 if [ "$i" = "$max_tries" ]; then
164 echo "ERROR: Didn't appear to be able to force-recovery"
165 ERRORS=$(expr $ERRORS + 1)
166 fi
167 sleep 2
168 done
169 flush_pg_stats || return 1
170 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1
171
172 ceph osd out osd.$chk_osd1_2
173 sleep 2
174 flush_pg_stats || return 1
175 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1
176 ceph pg dump pgs
177
178 ceph osd pool set $pool2 size 2
179 sleep 2
180 flush_pg_stats || return 1
181 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1
182 cat $dir/out
183 ceph pg dump pgs
184
185 PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG1}\")).prio")
186 if [ "$PRIO" != "$NORMAL_PRIO" ];
187 then
188 echo "The normal PG ${PG1} doesn't have prio $NORMAL_PRIO queued waiting"
189 ERRORS=$(expr $ERRORS + 1)
190 fi
191
192 # Using eval will strip double-quotes from item
193 eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item')
194 if [ "$ITEM" != ${PG3} ];
195 then
196 echo "The first force-recovery PG $PG3 didn't become the in progress item"
197 ERRORS=$(expr $ERRORS + 1)
198 else
199 PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio')
200 if [ "$PRIO" != $FORCE_PRIO ];
201 then
202 echo "The first force-recovery PG ${PG3} doesn't have prio $FORCE_PRIO"
203 ERRORS=$(expr $ERRORS + 1)
204 fi
205 fi
206
207 # 1. Item is queued, re-queue with new priority
208 for i in $(seq 1 $max_tries)
209 do
210 if ! ceph pg force-recovery $PG2 2>&1 | grep -q "doesn't require recovery"; then
211 break
212 fi
213 if [ "$i" = "$max_tries" ]; then
214 echo "ERROR: Didn't appear to be able to force-recovery"
215 ERRORS=$(expr $ERRORS + 1)
216 fi
217 sleep 2
218 done
219 sleep 2
220 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1
221 cat $dir/out
222 PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio")
223 if [ "$PRIO" != "$FORCE_PRIO" ];
224 then
225 echo "The second force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO"
226 ERRORS=$(expr $ERRORS + 1)
227 fi
228 flush_pg_stats || return 1
229
230 # 4. Item is in progress, if higher priority items waiting prempt item
231 #ceph osd unset norecover
232 ceph pg cancel-force-recovery $PG3 || return 1
233 sleep 2
234 #ceph osd set norecover
235 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1
236 cat $dir/out
237 PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG3}\")).prio")
238 if [ "$PRIO" != "$NORMAL_PRIO" ];
239 then
240 echo "After cancel-recovery PG ${PG3} doesn't have prio $NORMAL_PRIO"
241 ERRORS=$(expr $ERRORS + 1)
242 fi
243
244 eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item')
245 if [ "$ITEM" != ${PG2} ];
246 then
247 echo "The force-recovery PG $PG2 didn't become the in progress item"
248 ERRORS=$(expr $ERRORS + 1)
249 else
250 PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio')
251 if [ "$PRIO" != $FORCE_PRIO ];
252 then
253 echo "The first force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO"
254 ERRORS=$(expr $ERRORS + 1)
255 fi
256 fi
257
258 ceph pg cancel-force-recovery $PG2 || return 1
259 sleep 5
260 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1
261
262 # 2. Item is queued, re-queue and preempt because new priority higher than an in progress item
263 flush_pg_stats || return 1
264 ceph pg force-recovery $PG3 || return 1
265 sleep 2
266
267 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1
268 cat $dir/out
269 PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio")
270 if [ "$PRIO" != "$NORMAL_PRIO" ];
271 then
272 echo "After cancel-force-recovery PG ${PG3} doesn't have prio $NORMAL_PRIO"
273 ERRORS=$(expr $ERRORS + 1)
274 fi
275
276 eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item')
277 if [ "$ITEM" != ${PG3} ];
278 then
279 echo "The force-recovery PG $PG3 didn't get promoted to an in progress item"
280 ERRORS=$(expr $ERRORS + 1)
281 else
282 PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio')
283 if [ "$PRIO" != $FORCE_PRIO ];
284 then
285 echo "The force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO"
286 ERRORS=$(expr $ERRORS + 1)
287 fi
288 fi
289
290 ceph osd unset noout
291 ceph osd unset norecover
292
293 wait_for_clean "CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations" || return 1
294
295 ceph pg dump pgs
296
297 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_pgstate_history
298
299 if [ $ERRORS != "0" ];
300 then
301 echo "$ERRORS error(s) found"
302 else
303 echo TEST PASSED
304 fi
305
306 delete_pool $pool1
307 delete_pool $pool2
308 delete_pool $pool3
309 kill_daemons $dir || return 1
310 return $ERRORS
311 }
312
313 #
314 # Show that pool recovery_priority is added to recovery priority
315 #
316 # Create 2 pools with 2 OSDs with different primarys
317 # pool 1 with recovery_priority 1
318 # pool 2 with recovery_priority 2
319 #
320 # Start recovery by changing the pool sizes from 1 to 2
321 # Use dump_recovery_reservations to verify priorities
322 function TEST_recovery_pool_priority() {
323 local dir=$1
324 local pools=3 # Don't assume the first 2 pools are exact what we want
325 local OSDS=2
326
327 run_mon $dir a || return 1
328 run_mgr $dir x || return 1
329 export CEPH_ARGS
330
331 for osd in $(seq 0 $(expr $OSDS - 1))
332 do
333 run_osd $dir $osd || return 1
334 done
335
336 for p in $(seq 1 $pools)
337 do
338 create_pool "${poolprefix}$p" 1 1
339 ceph osd pool set "${poolprefix}$p" size 2
340 done
341 sleep 5
342
343 wait_for_clean || return 1
344
345 ceph pg dump pgs
346
347 # Find 2 pools with different primaries which
348 # means the replica must be on another osd.
349 local PG1
350 local POOLNUM1
351 local pool1
352 local chk_osd1_1
353 local chk_osd1_2
354
355 local PG2
356 local POOLNUM2
357 local pool2
358 local chk_osd2_1
359 local chk_osd2_2
360
361 for p in $(seq 1 $pools)
362 do
363 ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting
364 local test_osd1=$(head -1 $dir/acting)
365 local test_osd2=$(tail -1 $dir/acting)
366 if [ -z "$PG1" ];
367 then
368 PG1="${p}.0"
369 POOLNUM1=$p
370 pool1="${poolprefix}$p"
371 chk_osd1_1=$test_osd1
372 chk_osd1_2=$test_osd2
373 elif [ $chk_osd1_1 != $test_osd1 ];
374 then
375 PG2="${p}.0"
376 POOLNUM2=$p
377 pool2="${poolprefix}$p"
378 chk_osd2_1=$test_osd1
379 chk_osd2_2=$test_osd2
380 break
381 fi
382 done
383 rm -f $dir/acting
384
385 if [ "$pool2" = "" ];
386 then
387 echo "Failure to find appropirate PGs"
388 return 1
389 fi
390
391 for p in $(seq 1 $pools)
392 do
393 if [ $p != $POOLNUM1 -a $p != $POOLNUM2 ];
394 then
395 delete_pool ${poolprefix}$p
396 fi
397 done
398
399 pool1_extra_prio=1
400 pool2_extra_prio=2
401 pool1_prio=$(expr $NORMAL_PRIO + $pool1_extra_prio)
402 pool2_prio=$(expr $NORMAL_PRIO + $pool2_extra_prio)
403
404 ceph osd pool set $pool1 size 1 --yes-i-really-mean-it
405 ceph osd pool set $pool1 recovery_priority $pool1_extra_prio
406 ceph osd pool set $pool2 size 1 --yes-i-really-mean-it
407 ceph osd pool set $pool2 recovery_priority $pool2_extra_prio
408 wait_for_clean || return 1
409
410 dd if=/dev/urandom of=$dir/data bs=1M count=10
411 p=1
412 for pname in $pool1 $pool2
413 do
414 for i in $(seq 1 $objects)
415 do
416 rados -p ${pname} put obj${i}-p${p} $dir/data
417 done
418 p=$(expr $p + 1)
419 done
420
421 local otherosd=$(get_not_primary $pool1 obj1-p1)
422
423 ceph pg dump pgs
424 ERRORS=0
425
426 ceph osd pool set $pool1 size 2
427 ceph osd pool set $pool2 size 2
428
429 # Wait for both PGs to be in recovering state
430 ceph pg dump pgs
431
432 # Wait for recovery to start
433 set -o pipefail
434 count=0
435 while(true)
436 do
437 if test $(ceph --format json pg dump pgs |
438 jq '.pg_stats | .[] | .state | contains("recovering")' | grep -c true) == "2"
439 then
440 break
441 fi
442 sleep 2
443 if test "$count" -eq "10"
444 then
445 echo "Recovery never started on both PGs"
446 return 1
447 fi
448 count=$(expr $count + 1)
449 done
450 set +o pipefail
451 ceph pg dump pgs
452
453 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/dump.${chk_osd1_1}.out
454 echo osd.${chk_osd1_1}
455 cat $dir/dump.${chk_osd1_1}.out
456 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_2}) dump_recovery_reservations > $dir/dump.${chk_osd1_2}.out
457 echo osd.${chk_osd1_2}
458 cat $dir/dump.${chk_osd1_2}.out
459
460 # Using eval will strip double-quotes from item
461 eval ITEM=$(cat $dir/dump.${chk_osd1_1}.out | jq '.local_reservations.in_progress[0].item')
462 if [ "$ITEM" != ${PG1} ];
463 then
464 echo "The primary PG for $pool1 didn't become the in progress item"
465 ERRORS=$(expr $ERRORS + 1)
466 else
467 PRIO=$(cat $dir/dump.${chk_osd1_1}.out | jq '.local_reservations.in_progress[0].prio')
468 if [ "$PRIO" != $pool1_prio ];
469 then
470 echo "The primary PG ${PG1} doesn't have prio $pool1_prio"
471 ERRORS=$(expr $ERRORS + 1)
472 fi
473 fi
474
475 # Using eval will strip double-quotes from item
476 eval ITEM=$(cat $dir/dump.${chk_osd1_2}.out | jq '.remote_reservations.in_progress[0].item')
477 if [ "$ITEM" != ${PG1} ];
478 then
479 echo "The primary PG for $pool1 didn't become the in progress item on remote"
480 ERRORS=$(expr $ERRORS + 1)
481 else
482 PRIO=$(cat $dir/dump.${chk_osd1_2}.out | jq '.remote_reservations.in_progress[0].prio')
483 if [ "$PRIO" != $pool1_prio ];
484 then
485 echo "The primary PG ${PG1} doesn't have prio $pool1_prio on remote"
486 ERRORS=$(expr $ERRORS + 1)
487 fi
488 fi
489
490 # Using eval will strip double-quotes from item
491 eval ITEM=$(cat $dir/dump.${chk_osd2_1}.out | jq '.local_reservations.in_progress[0].item')
492 if [ "$ITEM" != ${PG2} ];
493 then
494 echo "The primary PG for $pool2 didn't become the in progress item"
495 ERRORS=$(expr $ERRORS + 1)
496 else
497 PRIO=$(cat $dir/dump.${chk_osd2_1}.out | jq '.local_reservations.in_progress[0].prio')
498 if [ "$PRIO" != $pool2_prio ];
499 then
500 echo "The primary PG ${PG2} doesn't have prio $pool2_prio"
501 ERRORS=$(expr $ERRORS + 1)
502 fi
503 fi
504
505 # Using eval will strip double-quotes from item
506 eval ITEM=$(cat $dir/dump.${chk_osd2_2}.out | jq '.remote_reservations.in_progress[0].item')
507 if [ "$ITEM" != ${PG2} ];
508 then
509 echo "The primary PG $PG2 didn't become the in progress item on remote"
510 ERRORS=$(expr $ERRORS + 1)
511 else
512 PRIO=$(cat $dir/dump.${chk_osd2_2}.out | jq '.remote_reservations.in_progress[0].prio')
513 if [ "$PRIO" != $pool2_prio ];
514 then
515 echo "The primary PG ${PG2} doesn't have prio $pool2_prio on remote"
516 ERRORS=$(expr $ERRORS + 1)
517 fi
518 fi
519
520 wait_for_clean || return 1
521
522 if [ $ERRORS != "0" ];
523 then
524 echo "$ERRORS error(s) found"
525 else
526 echo TEST PASSED
527 fi
528
529 delete_pool $pool1
530 delete_pool $pool2
531 kill_daemons $dir || return 1
532 return $ERRORS
533 }
534
535 main osd-recovery-prio "$@"
536
537 # Local Variables:
538 # compile-command: "make -j4 && ../qa/run-standalone.sh osd-recovery-prio.sh"
539 # End: