]> git.proxmox.com Git - ceph.git/blob - ceph/qa/standalone/osd-backfill/osd-backfill-prio.sh
import quincy beta 17.1.0
[ceph.git] / ceph / qa / standalone / osd-backfill / osd-backfill-prio.sh
1 #!/usr/bin/env bash
2 #
3 # Copyright (C) 2019 Red Hat <contact@redhat.com>
4 #
5 # Author: David Zafman <dzafman@redhat.com>
6 #
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU Library Public License as published by
9 # the Free Software Foundation; either version 2, or (at your option)
10 # any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU Library Public License for more details.
16 #
17
18 source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
19
20 function run() {
21 local dir=$1
22 shift
23
24 # Fix port????
25 export CEPH_MON="127.0.0.1:7114" # git grep '\<7114\>' : there must be only one
26 export CEPH_ARGS
27 CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
28 CEPH_ARGS+="--mon-host=$CEPH_MON --osd_max_backfills=1 --debug_reserver=20 "
29 CEPH_ARGS+="--osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10 "
30 # Set osd op queue = wpq for the tests. Backfill priority is not
31 # considered by mclock_scheduler leading to unexpected results.
32 CEPH_ARGS+="--osd-op-queue=wpq "
33 export objects=50
34 export poolprefix=test
35 export FORCE_PRIO="254" # See OSD_BACKFILL_PRIORITY_FORCED
36 export DEGRADED_PRIO="150" # See OSD_BACKFILL_DEGRADED_PRIORITY_BASE + 10
37 export NORMAL_PRIO="110" # See OSD_BACKFILL_PRIORITY_BASE + 10
38
39 local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
40 for func in $funcs ; do
41 setup $dir || return 1
42 $func $dir || return 1
43 teardown $dir || return 1
44 done
45 }
46
47
48 function TEST_backfill_priority() {
49 local dir=$1
50 local pools=10
51 local OSDS=5
52 # size 2 -> 1 means degraded by 1, so add 1 to base prio
53 local degraded_prio=$(expr $DEGRADED_PRIO + 1)
54 local max_tries=10
55
56 run_mon $dir a || return 1
57 run_mgr $dir x || return 1
58 export CEPH_ARGS
59
60 for osd in $(seq 0 $(expr $OSDS - 1))
61 do
62 run_osd $dir $osd || return 1
63 done
64
65 for p in $(seq 1 $pools)
66 do
67 create_pool "${poolprefix}$p" 1 1
68 ceph osd pool set "${poolprefix}$p" size 2
69 done
70 sleep 5
71
72 wait_for_clean || return 1
73
74 ceph pg dump pgs
75
76 # Find 3 pools with a pg with the same primaries but second
77 # replica on another osd.
78 local PG1
79 local POOLNUM1
80 local pool1
81 local chk_osd1_1
82 local chk_osd1_2
83
84 local PG2
85 local POOLNUM2
86 local pool2
87 local chk_osd2
88
89 local PG3
90 local POOLNUM3
91 local pool3
92
93 for p in $(seq 1 $pools)
94 do
95 ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting
96 local test_osd1=$(head -1 $dir/acting)
97 local test_osd2=$(tail -1 $dir/acting)
98 if [ -z "$PG1" ];
99 then
100 PG1="${p}.0"
101 POOLNUM1=$p
102 pool1="${poolprefix}$p"
103 chk_osd1_1=$test_osd1
104 chk_osd1_2=$test_osd2
105 elif [ -z "$PG2" -a $chk_osd1_1 = $test_osd1 -a $chk_osd1_2 != $test_osd2 ];
106 then
107 PG2="${p}.0"
108 POOLNUM2=$p
109 pool2="${poolprefix}$p"
110 chk_osd2=$test_osd2
111 elif [ -n "$PG2" -a $chk_osd1_1 = $test_osd1 -a $chk_osd1_2 != $test_osd2 -a "$chk_osd2" != $test_osd2 ];
112 then
113 PG3="${p}.0"
114 POOLNUM3=$p
115 pool3="${poolprefix}$p"
116 break
117 fi
118 done
119 rm -f $dir/acting
120
121 if [ "$pool2" = "" -o "pool3" = "" ];
122 then
123 echo "Failure to find appropirate PGs"
124 return 1
125 fi
126
127 for p in $(seq 1 $pools)
128 do
129 if [ $p != $POOLNUM1 -a $p != $POOLNUM2 -a $p != $POOLNUM3 ];
130 then
131 delete_pool ${poolprefix}$p
132 fi
133 done
134
135 ceph osd pool set $pool2 size 1 --yes-i-really-mean-it
136 ceph osd pool set $pool3 size 1 --yes-i-really-mean-it
137 wait_for_clean || return 1
138
139 dd if=/dev/urandom of=$dir/data bs=1M count=10
140 p=1
141 for pname in $pool1 $pool2 $pool3
142 do
143 for i in $(seq 1 $objects)
144 do
145 rados -p ${pname} put obj${i}-p${p} $dir/data
146 done
147 p=$(expr $p + 1)
148 done
149
150 local otherosd=$(get_not_primary $pool1 obj1-p1)
151
152 ceph pg dump pgs
153 ERRORS=0
154
155 ceph osd set nobackfill
156 ceph osd set noout
157
158 # Get a pg to want to backfill and quickly force it
159 # to be preempted.
160 ceph osd pool set $pool3 size 2
161 sleep 2
162
163 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1
164
165 # 3. Item is in progress, adjust priority with no higher priority waiting
166 for i in $(seq 1 $max_tries)
167 do
168 if ! ceph pg force-backfill $PG3 2>&1 | grep -q "doesn't require backfilling"; then
169 break
170 fi
171 if [ "$i" = "$max_tries" ]; then
172 echo "ERROR: Didn't appear to be able to force-backfill"
173 ERRORS=$(expr $ERRORS + 1)
174 fi
175 sleep 2
176 done
177 flush_pg_stats || return 1
178 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1
179
180 ceph osd out osd.$chk_osd1_2
181 sleep 2
182 flush_pg_stats || return 1
183 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1
184 ceph pg dump pgs
185
186 ceph osd pool set $pool2 size 2
187 sleep 2
188 flush_pg_stats || return 1
189 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1
190 cat $dir/out
191 ceph pg dump pgs
192
193 PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG1}\")).prio")
194 if [ "$PRIO" != "$NORMAL_PRIO" ];
195 then
196 echo "The normal PG ${PG1} doesn't have prio $NORMAL_PRIO queued waiting"
197 ERRORS=$(expr $ERRORS + 1)
198 fi
199
200 # Using eval will strip double-quotes from item
201 eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item')
202 if [ "$ITEM" != ${PG3} ];
203 then
204 echo "The force-backfill PG $PG3 didn't become the in progress item"
205 ERRORS=$(expr $ERRORS + 1)
206 else
207 PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio')
208 if [ "$PRIO" != $FORCE_PRIO ];
209 then
210 echo "The force-backfill PG ${PG3} doesn't have prio $FORCE_PRIO"
211 ERRORS=$(expr $ERRORS + 1)
212 fi
213 fi
214
215 # 1. Item is queued, re-queue with new priority
216 for i in $(seq 1 $max_tries)
217 do
218 if ! ceph pg force-backfill $PG2 2>&1 | grep -q "doesn't require backfilling"; then
219 break
220 fi
221 if [ "$i" = "$max_tries" ]; then
222 echo "ERROR: Didn't appear to be able to force-backfill"
223 ERRORS=$(expr $ERRORS + 1)
224 fi
225 sleep 2
226 done
227 sleep 2
228 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1
229 cat $dir/out
230 PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio")
231 if [ "$PRIO" != "$FORCE_PRIO" ];
232 then
233 echo "The second force-backfill PG ${PG2} doesn't have prio $FORCE_PRIO"
234 ERRORS=$(expr $ERRORS + 1)
235 fi
236 flush_pg_stats || return 1
237
238 # 4. Item is in progress, if higher priority items waiting prempt item
239 ceph pg cancel-force-backfill $PG3 || return 1
240 sleep 2
241 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1
242 cat $dir/out
243 PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG3}\")).prio")
244 if [ "$PRIO" != "$degraded_prio" ];
245 then
246 echo "After cancel-force-backfill PG ${PG3} doesn't have prio $degraded_prio"
247 ERRORS=$(expr $ERRORS + 1)
248 fi
249
250 eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item')
251 if [ "$ITEM" != ${PG2} ];
252 then
253 echo "The force-recovery PG $PG2 didn't become the in progress item"
254 ERRORS=$(expr $ERRORS + 1)
255 else
256 PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio')
257 if [ "$PRIO" != $FORCE_PRIO ];
258 then
259 echo "The first force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO"
260 ERRORS=$(expr $ERRORS + 1)
261 fi
262 fi
263
264 ceph pg cancel-force-backfill $PG2 || return 1
265 sleep 5
266 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1
267
268 # 2. Item is queued, re-queue and preempt because new priority higher than an in progress item
269 flush_pg_stats || return 1
270 ceph pg force-backfill $PG3 || return 1
271 sleep 2
272
273 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1
274 cat $dir/out
275 PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio")
276 if [ "$PRIO" != "$degraded_prio" ];
277 then
278 echo "After cancel-force-backfill PG ${PG2} doesn't have prio $degraded_prio"
279 ERRORS=$(expr $ERRORS + 1)
280 fi
281
282 eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item')
283 if [ "$ITEM" != ${PG3} ];
284 then
285 echo "The force-backfill PG $PG3 didn't get promoted to an in progress item"
286 ERRORS=$(expr $ERRORS + 1)
287 else
288 PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio')
289 if [ "$PRIO" != $FORCE_PRIO ];
290 then
291 echo "The force-backfill PG ${PG2} doesn't have prio $FORCE_PRIO"
292 ERRORS=$(expr $ERRORS + 1)
293 fi
294 fi
295
296 ceph osd unset noout
297 ceph osd unset nobackfill
298
299 wait_for_clean "CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations" || return 1
300
301 ceph pg dump pgs
302
303 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_pgstate_history
304
305 if [ $ERRORS != "0" ];
306 then
307 echo "$ERRORS error(s) found"
308 else
309 echo TEST PASSED
310 fi
311
312 delete_pool $pool1
313 delete_pool $pool2
314 delete_pool $pool3
315 kill_daemons $dir || return 1
316 return $ERRORS
317 }
318
319 #
320 # Show that pool recovery_priority is added to the backfill priority
321 #
322 # Create 2 pools with 2 OSDs with different primarys
323 # pool 1 with recovery_priority 1
324 # pool 2 with recovery_priority 2
325 #
326 # Start backfill by changing the pool sizes from 1 to 2
327 # Use dump_recovery_reservations to verify priorities
328 function TEST_backfill_pool_priority() {
329 local dir=$1
330 local pools=3 # Don't assume the first 2 pools are exact what we want
331 local OSDS=2
332
333 run_mon $dir a || return 1
334 run_mgr $dir x || return 1
335 export CEPH_ARGS
336
337 for osd in $(seq 0 $(expr $OSDS - 1))
338 do
339 run_osd $dir $osd || return 1
340 done
341
342 for p in $(seq 1 $pools)
343 do
344 create_pool "${poolprefix}$p" 1 1
345 ceph osd pool set "${poolprefix}$p" size 2
346 done
347 sleep 5
348
349 wait_for_clean || return 1
350
351 ceph pg dump pgs
352
353 # Find 2 pools with different primaries which
354 # means the replica must be on another osd.
355 local PG1
356 local POOLNUM1
357 local pool1
358 local chk_osd1_1
359 local chk_osd1_2
360
361 local PG2
362 local POOLNUM2
363 local pool2
364 local chk_osd2_1
365 local chk_osd2_2
366
367 for p in $(seq 1 $pools)
368 do
369 ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting
370 local test_osd1=$(head -1 $dir/acting)
371 local test_osd2=$(tail -1 $dir/acting)
372 if [ -z "$PG1" ];
373 then
374 PG1="${p}.0"
375 POOLNUM1=$p
376 pool1="${poolprefix}$p"
377 chk_osd1_1=$test_osd1
378 chk_osd1_2=$test_osd2
379 elif [ $chk_osd1_1 != $test_osd1 ];
380 then
381 PG2="${p}.0"
382 POOLNUM2=$p
383 pool2="${poolprefix}$p"
384 chk_osd2_1=$test_osd1
385 chk_osd2_2=$test_osd2
386 break
387 fi
388 done
389 rm -f $dir/acting
390
391 if [ "$pool2" = "" ];
392 then
393 echo "Failure to find appropirate PGs"
394 return 1
395 fi
396
397 for p in $(seq 1 $pools)
398 do
399 if [ $p != $POOLNUM1 -a $p != $POOLNUM2 ];
400 then
401 delete_pool ${poolprefix}$p
402 fi
403 done
404
405 pool1_extra_prio=1
406 pool2_extra_prio=2
407 # size 2 -> 1 means degraded by 1, so add 1 to base prio
408 pool1_prio=$(expr $DEGRADED_PRIO + 1 + $pool1_extra_prio)
409 pool2_prio=$(expr $DEGRADED_PRIO + 1 + $pool2_extra_prio)
410
411 ceph osd pool set $pool1 size 1 --yes-i-really-mean-it
412 ceph osd pool set $pool1 recovery_priority $pool1_extra_prio
413 ceph osd pool set $pool2 size 1 --yes-i-really-mean-it
414 ceph osd pool set $pool2 recovery_priority $pool2_extra_prio
415 wait_for_clean || return 1
416
417 dd if=/dev/urandom of=$dir/data bs=1M count=10
418 p=1
419 for pname in $pool1 $pool2
420 do
421 for i in $(seq 1 $objects)
422 do
423 rados -p ${pname} put obj${i}-p${p} $dir/data
424 done
425 p=$(expr $p + 1)
426 done
427
428 local otherosd=$(get_not_primary $pool1 obj1-p1)
429
430 ceph pg dump pgs
431 ERRORS=0
432
433 ceph osd pool set $pool1 size 2
434 ceph osd pool set $pool2 size 2
435 sleep 5
436 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/dump.${chk_osd1_1}.out
437 echo osd.${chk_osd1_1}
438 cat $dir/dump.${chk_osd1_1}.out
439 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_2}) dump_recovery_reservations > $dir/dump.${chk_osd1_2}.out
440 echo osd.${chk_osd1_2}
441 cat $dir/dump.${chk_osd1_2}.out
442
443 # Using eval will strip double-quotes from item
444 eval ITEM=$(cat $dir/dump.${chk_osd1_1}.out | jq '.local_reservations.in_progress[0].item')
445 if [ "$ITEM" != ${PG1} ];
446 then
447 echo "The primary PG ${PG1} didn't become the in progress item"
448 ERRORS=$(expr $ERRORS + 1)
449 else
450 PRIO=$(cat $dir/dump.${chk_osd1_1}.out | jq '.local_reservations.in_progress[0].prio')
451 if [ "$PRIO" != $pool1_prio ];
452 then
453 echo "The primary PG ${PG1} doesn't have prio $pool1_prio"
454 ERRORS=$(expr $ERRORS + 1)
455 fi
456 fi
457
458 # Using eval will strip double-quotes from item
459 eval ITEM=$(cat $dir/dump.${chk_osd1_2}.out | jq '.remote_reservations.in_progress[0].item')
460 if [ "$ITEM" != ${PG1} ];
461 then
462 echo "The primary PG ${PG1} didn't become the in progress item on remote"
463 ERRORS=$(expr $ERRORS + 1)
464 else
465 PRIO=$(cat $dir/dump.${chk_osd1_2}.out | jq '.remote_reservations.in_progress[0].prio')
466 if [ "$PRIO" != $pool1_prio ];
467 then
468 echo "The primary PG ${PG1} doesn't have prio $pool1_prio on remote"
469 ERRORS=$(expr $ERRORS + 1)
470 fi
471 fi
472
473 # Using eval will strip double-quotes from item
474 eval ITEM=$(cat $dir/dump.${chk_osd2_1}.out | jq '.local_reservations.in_progress[0].item')
475 if [ "$ITEM" != ${PG2} ];
476 then
477 echo "The primary PG ${PG2} didn't become the in progress item"
478 ERRORS=$(expr $ERRORS + 1)
479 else
480 PRIO=$(cat $dir/dump.${chk_osd2_1}.out | jq '.local_reservations.in_progress[0].prio')
481 if [ "$PRIO" != $pool2_prio ];
482 then
483 echo "The primary PG ${PG2} doesn't have prio $pool2_prio"
484 ERRORS=$(expr $ERRORS + 1)
485 fi
486 fi
487
488 # Using eval will strip double-quotes from item
489 eval ITEM=$(cat $dir/dump.${chk_osd2_2}.out | jq '.remote_reservations.in_progress[0].item')
490 if [ "$ITEM" != ${PG2} ];
491 then
492 echo "The primary PG $PG2 didn't become the in progress item on remote"
493 ERRORS=$(expr $ERRORS + 1)
494 else
495 PRIO=$(cat $dir/dump.${chk_osd2_2}.out | jq '.remote_reservations.in_progress[0].prio')
496 if [ "$PRIO" != $pool2_prio ];
497 then
498 echo "The primary PG ${PG2} doesn't have prio $pool2_prio on remote"
499 ERRORS=$(expr $ERRORS + 1)
500 fi
501 fi
502
503 wait_for_clean || return 1
504
505 if [ $ERRORS != "0" ];
506 then
507 echo "$ERRORS error(s) found"
508 else
509 echo TEST PASSED
510 fi
511
512 delete_pool $pool1
513 delete_pool $pool2
514 kill_daemons $dir || return 1
515 return $ERRORS
516 }
517
518 main osd-backfill-prio "$@"
519
520 # Local Variables:
521 # compile-command: "make -j4 && ../qa/run-standalone.sh osd-backfill-prio.sh"
522 # End: