]> git.proxmox.com Git - ceph.git/blob - ceph/qa/standalone/osd/osd-backfill-prio.sh
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / qa / standalone / osd / osd-backfill-prio.sh
1 #!/usr/bin/env bash
2 #
3 # Copyright (C) 2019 Red Hat <contact@redhat.com>
4 #
5 # Author: David Zafman <dzafman@redhat.com>
6 #
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU Library Public License as published by
9 # the Free Software Foundation; either version 2, or (at your option)
10 # any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU Library Public License for more details.
16 #
17
18 source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
19
20 function run() {
21 local dir=$1
22 shift
23
24 # Fix port????
25 export CEPH_MON="127.0.0.1:7114" # git grep '\<7114\>' : there must be only one
26 export CEPH_ARGS
27 CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
28 CEPH_ARGS+="--mon-host=$CEPH_MON --osd_max_backfills=1 --debug_reserver=20 "
29 CEPH_ARGS+="--osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10 "
30 export objects=50
31 export poolprefix=test
32 export FORCE_PRIO="254" # See OSD_BACKFILL_PRIORITY_FORCED
33 export DEGRADED_PRIO="150" # See OSD_BACKFILL_DEGRADED_PRIORITY_BASE + 10
34 export NORMAL_PRIO="110" # See OSD_BACKFILL_PRIORITY_BASE + 10
35
36 local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
37 for func in $funcs ; do
38 setup $dir || return 1
39 $func $dir || return 1
40 teardown $dir || return 1
41 done
42 }
43
44
45 function TEST_backfill_priority() {
46 local dir=$1
47 local pools=10
48 local OSDS=5
49 # size 2 -> 1 means degraded by 1, so add 1 to base prio
50 local degraded_prio=$(expr $DEGRADED_PRIO + 1)
51 local max_tries=10
52
53 run_mon $dir a || return 1
54 run_mgr $dir x || return 1
55 export CEPH_ARGS
56
57 for osd in $(seq 0 $(expr $OSDS - 1))
58 do
59 run_osd $dir $osd || return 1
60 done
61
62 for p in $(seq 1 $pools)
63 do
64 create_pool "${poolprefix}$p" 1 1
65 ceph osd pool set "${poolprefix}$p" size 2
66 done
67 sleep 5
68
69 wait_for_clean || return 1
70
71 ceph pg dump pgs
72
73 # Find 3 pools with a pg with the same primaries but second
74 # replica on another osd.
75 local PG1
76 local POOLNUM1
77 local pool1
78 local chk_osd1_1
79 local chk_osd1_2
80
81 local PG2
82 local POOLNUM2
83 local pool2
84 local chk_osd2
85
86 local PG3
87 local POOLNUM3
88 local pool3
89
90 for p in $(seq 1 $pools)
91 do
92 ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting
93 local test_osd1=$(head -1 $dir/acting)
94 local test_osd2=$(tail -1 $dir/acting)
95 if [ -z "$PG1" ];
96 then
97 PG1="${p}.0"
98 POOLNUM1=$p
99 pool1="${poolprefix}$p"
100 chk_osd1_1=$test_osd1
101 chk_osd1_2=$test_osd2
102 elif [ -z "$PG2" -a $chk_osd1_1 = $test_osd1 -a $chk_osd1_2 != $test_osd2 ];
103 then
104 PG2="${p}.0"
105 POOLNUM2=$p
106 pool2="${poolprefix}$p"
107 chk_osd2=$test_osd2
108 elif [ -n "$PG2" -a $chk_osd1_1 = $test_osd1 -a $chk_osd1_2 != $test_osd2 -a "$chk_osd2" != $test_osd2 ];
109 then
110 PG3="${p}.0"
111 POOLNUM3=$p
112 pool3="${poolprefix}$p"
113 break
114 fi
115 done
116 rm -f $dir/acting
117
118 if [ "$pool2" = "" -o "pool3" = "" ];
119 then
120 echo "Failure to find appropirate PGs"
121 return 1
122 fi
123
124 for p in $(seq 1 $pools)
125 do
126 if [ $p != $POOLNUM1 -a $p != $POOLNUM2 -a $p != $POOLNUM3 ];
127 then
128 delete_pool ${poolprefix}$p
129 fi
130 done
131
132 ceph osd pool set $pool2 size 1 --yes-i-really-mean-it
133 ceph osd pool set $pool3 size 1 --yes-i-really-mean-it
134 wait_for_clean || return 1
135
136 dd if=/dev/urandom of=$dir/data bs=1M count=10
137 p=1
138 for pname in $pool1 $pool2 $pool3
139 do
140 for i in $(seq 1 $objects)
141 do
142 rados -p ${pname} put obj${i}-p${p} $dir/data
143 done
144 p=$(expr $p + 1)
145 done
146
147 local otherosd=$(get_not_primary $pool1 obj1-p1)
148
149 ceph pg dump pgs
150 ERRORS=0
151
152 ceph osd set nobackfill
153 ceph osd set noout
154
155 # Get a pg to want to backfill and quickly force it
156 # to be preempted.
157 ceph osd pool set $pool3 size 2
158 sleep 2
159
160 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1
161
162 # 3. Item is in progress, adjust priority with no higher priority waiting
163 for i in $(seq 1 $max_tries)
164 do
165 if ! ceph pg force-backfill $PG3 2>&1 | grep -q "doesn't require backfilling"; then
166 break
167 fi
168 if [ "$i" = "$max_tries" ]; then
169 echo "ERROR: Didn't appear to be able to force-backfill"
170 ERRORS=$(expr $ERRORS + 1)
171 fi
172 sleep 2
173 done
174 flush_pg_stats || return 1
175 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1
176
177 ceph osd out osd.$chk_osd1_2
178 sleep 2
179 flush_pg_stats || return 1
180 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1
181 ceph pg dump pgs
182
183 ceph osd pool set $pool2 size 2
184 sleep 2
185 flush_pg_stats || return 1
186 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1
187 cat $dir/out
188 ceph pg dump pgs
189
190 PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG1}\")).prio")
191 if [ "$PRIO" != "$NORMAL_PRIO" ];
192 then
193 echo "The normal PG ${PG1} doesn't have prio $NORMAL_PRIO queued waiting"
194 ERRORS=$(expr $ERRORS + 1)
195 fi
196
197 # Using eval will strip double-quotes from item
198 eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item')
199 if [ "$ITEM" != ${PG3} ];
200 then
201 echo "The force-backfill PG $PG3 didn't become the in progress item"
202 ERRORS=$(expr $ERRORS + 1)
203 else
204 PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio')
205 if [ "$PRIO" != $FORCE_PRIO ];
206 then
207 echo "The force-backfill PG ${PG3} doesn't have prio $FORCE_PRIO"
208 ERRORS=$(expr $ERRORS + 1)
209 fi
210 fi
211
212 # 1. Item is queued, re-queue with new priority
213 for i in $(seq 1 $max_tries)
214 do
215 if ! ceph pg force-backfill $PG2 2>&1 | grep -q "doesn't require backfilling"; then
216 break
217 fi
218 if [ "$i" = "$max_tries" ]; then
219 echo "ERROR: Didn't appear to be able to force-backfill"
220 ERRORS=$(expr $ERRORS + 1)
221 fi
222 sleep 2
223 done
224 sleep 2
225 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1
226 cat $dir/out
227 PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio")
228 if [ "$PRIO" != "$FORCE_PRIO" ];
229 then
230 echo "The second force-backfill PG ${PG2} doesn't have prio $FORCE_PRIO"
231 ERRORS=$(expr $ERRORS + 1)
232 fi
233 flush_pg_stats || return 1
234
235 # 4. Item is in progress, if higher priority items waiting prempt item
236 ceph pg cancel-force-backfill $PG3 || return 1
237 sleep 2
238 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1
239 cat $dir/out
240 PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG3}\")).prio")
241 if [ "$PRIO" != "$degraded_prio" ];
242 then
243 echo "After cancel-force-backfill PG ${PG3} doesn't have prio $degraded_prio"
244 ERRORS=$(expr $ERRORS + 1)
245 fi
246
247 eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item')
248 if [ "$ITEM" != ${PG2} ];
249 then
250 echo "The force-recovery PG $PG2 didn't become the in progress item"
251 ERRORS=$(expr $ERRORS + 1)
252 else
253 PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio')
254 if [ "$PRIO" != $FORCE_PRIO ];
255 then
256 echo "The first force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO"
257 ERRORS=$(expr $ERRORS + 1)
258 fi
259 fi
260
261 ceph pg cancel-force-backfill $PG2 || return 1
262 sleep 5
263 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1
264
265 # 2. Item is queued, re-queue and preempt because new priority higher than an in progress item
266 flush_pg_stats || return 1
267 ceph pg force-backfill $PG3 || return 1
268 sleep 2
269
270 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1
271 cat $dir/out
272 PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio")
273 if [ "$PRIO" != "$degraded_prio" ];
274 then
275 echo "After cancel-force-backfill PG ${PG2} doesn't have prio $degraded_prio"
276 ERRORS=$(expr $ERRORS + 1)
277 fi
278
279 eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item')
280 if [ "$ITEM" != ${PG3} ];
281 then
282 echo "The force-backfill PG $PG3 didn't get promoted to an in progress item"
283 ERRORS=$(expr $ERRORS + 1)
284 else
285 PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio')
286 if [ "$PRIO" != $FORCE_PRIO ];
287 then
288 echo "The force-backfill PG ${PG2} doesn't have prio $FORCE_PRIO"
289 ERRORS=$(expr $ERRORS + 1)
290 fi
291 fi
292
293 ceph osd unset noout
294 ceph osd unset nobackfill
295
296 wait_for_clean "CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations" || return 1
297
298 ceph pg dump pgs
299
300 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_pgstate_history
301
302 if [ $ERRORS != "0" ];
303 then
304 echo "$ERRORS error(s) found"
305 else
306 echo TEST PASSED
307 fi
308
309 delete_pool $pool1
310 delete_pool $pool2
311 delete_pool $pool3
312 kill_daemons $dir || return 1
313 return $ERRORS
314 }
315
316 #
317 # Show that pool recovery_priority is added to the backfill priority
318 #
319 # Create 2 pools with 2 OSDs with different primarys
320 # pool 1 with recovery_priority 1
321 # pool 2 with recovery_priority 2
322 #
323 # Start backfill by changing the pool sizes from 1 to 2
324 # Use dump_recovery_reservations to verify priorities
325 function TEST_backfill_pool_priority() {
326 local dir=$1
327 local pools=3 # Don't assume the first 2 pools are exact what we want
328 local OSDS=2
329
330 run_mon $dir a || return 1
331 run_mgr $dir x || return 1
332 export CEPH_ARGS
333
334 for osd in $(seq 0 $(expr $OSDS - 1))
335 do
336 run_osd $dir $osd || return 1
337 done
338
339 for p in $(seq 1 $pools)
340 do
341 create_pool "${poolprefix}$p" 1 1
342 ceph osd pool set "${poolprefix}$p" size 2
343 done
344 sleep 5
345
346 wait_for_clean || return 1
347
348 ceph pg dump pgs
349
350 # Find 2 pools with different primaries which
351 # means the replica must be on another osd.
352 local PG1
353 local POOLNUM1
354 local pool1
355 local chk_osd1_1
356 local chk_osd1_2
357
358 local PG2
359 local POOLNUM2
360 local pool2
361 local chk_osd2_1
362 local chk_osd2_2
363
364 for p in $(seq 1 $pools)
365 do
366 ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting
367 local test_osd1=$(head -1 $dir/acting)
368 local test_osd2=$(tail -1 $dir/acting)
369 if [ -z "$PG1" ];
370 then
371 PG1="${p}.0"
372 POOLNUM1=$p
373 pool1="${poolprefix}$p"
374 chk_osd1_1=$test_osd1
375 chk_osd1_2=$test_osd2
376 elif [ $chk_osd1_1 != $test_osd1 ];
377 then
378 PG2="${p}.0"
379 POOLNUM2=$p
380 pool2="${poolprefix}$p"
381 chk_osd2_1=$test_osd1
382 chk_osd2_2=$test_osd2
383 break
384 fi
385 done
386 rm -f $dir/acting
387
388 if [ "$pool2" = "" ];
389 then
390 echo "Failure to find appropirate PGs"
391 return 1
392 fi
393
394 for p in $(seq 1 $pools)
395 do
396 if [ $p != $POOLNUM1 -a $p != $POOLNUM2 ];
397 then
398 delete_pool ${poolprefix}$p
399 fi
400 done
401
402 pool1_extra_prio=1
403 pool2_extra_prio=2
404 # size 2 -> 1 means degraded by 1, so add 1 to base prio
405 pool1_prio=$(expr $DEGRADED_PRIO + 1 + $pool1_extra_prio)
406 pool2_prio=$(expr $DEGRADED_PRIO + 1 + $pool2_extra_prio)
407
408 ceph osd pool set $pool1 size 1 --yes-i-really-mean-it
409 ceph osd pool set $pool1 recovery_priority $pool1_extra_prio
410 ceph osd pool set $pool2 size 1 --yes-i-really-mean-it
411 ceph osd pool set $pool2 recovery_priority $pool2_extra_prio
412 wait_for_clean || return 1
413
414 dd if=/dev/urandom of=$dir/data bs=1M count=10
415 p=1
416 for pname in $pool1 $pool2
417 do
418 for i in $(seq 1 $objects)
419 do
420 rados -p ${pname} put obj${i}-p${p} $dir/data
421 done
422 p=$(expr $p + 1)
423 done
424
425 local otherosd=$(get_not_primary $pool1 obj1-p1)
426
427 ceph pg dump pgs
428 ERRORS=0
429
430 ceph osd pool set $pool1 size 2
431 ceph osd pool set $pool2 size 2
432 sleep 5
433 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/dump.${chk_osd1_1}.out
434 echo osd.${chk_osd1_1}
435 cat $dir/dump.${chk_osd1_1}.out
436 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_2}) dump_recovery_reservations > $dir/dump.${chk_osd1_2}.out
437 echo osd.${chk_osd1_2}
438 cat $dir/dump.${chk_osd1_2}.out
439
440 # Using eval will strip double-quotes from item
441 eval ITEM=$(cat $dir/dump.${chk_osd1_1}.out | jq '.local_reservations.in_progress[0].item')
442 if [ "$ITEM" != ${PG1} ];
443 then
444 echo "The primary PG ${PG1} didn't become the in progress item"
445 ERRORS=$(expr $ERRORS + 1)
446 else
447 PRIO=$(cat $dir/dump.${chk_osd1_1}.out | jq '.local_reservations.in_progress[0].prio')
448 if [ "$PRIO" != $pool1_prio ];
449 then
450 echo "The primary PG ${PG1} doesn't have prio $pool1_prio"
451 ERRORS=$(expr $ERRORS + 1)
452 fi
453 fi
454
455 # Using eval will strip double-quotes from item
456 eval ITEM=$(cat $dir/dump.${chk_osd1_2}.out | jq '.remote_reservations.in_progress[0].item')
457 if [ "$ITEM" != ${PG1} ];
458 then
459 echo "The primary PG ${PG1} didn't become the in progress item on remote"
460 ERRORS=$(expr $ERRORS + 1)
461 else
462 PRIO=$(cat $dir/dump.${chk_osd1_2}.out | jq '.remote_reservations.in_progress[0].prio')
463 if [ "$PRIO" != $pool1_prio ];
464 then
465 echo "The primary PG ${PG1} doesn't have prio $pool1_prio on remote"
466 ERRORS=$(expr $ERRORS + 1)
467 fi
468 fi
469
470 # Using eval will strip double-quotes from item
471 eval ITEM=$(cat $dir/dump.${chk_osd2_1}.out | jq '.local_reservations.in_progress[0].item')
472 if [ "$ITEM" != ${PG2} ];
473 then
474 echo "The primary PG ${PG2} didn't become the in progress item"
475 ERRORS=$(expr $ERRORS + 1)
476 else
477 PRIO=$(cat $dir/dump.${chk_osd2_1}.out | jq '.local_reservations.in_progress[0].prio')
478 if [ "$PRIO" != $pool2_prio ];
479 then
480 echo "The primary PG ${PG2} doesn't have prio $pool2_prio"
481 ERRORS=$(expr $ERRORS + 1)
482 fi
483 fi
484
485 # Using eval will strip double-quotes from item
486 eval ITEM=$(cat $dir/dump.${chk_osd2_2}.out | jq '.remote_reservations.in_progress[0].item')
487 if [ "$ITEM" != ${PG2} ];
488 then
489 echo "The primary PG $PG2 didn't become the in progress item on remote"
490 ERRORS=$(expr $ERRORS + 1)
491 else
492 PRIO=$(cat $dir/dump.${chk_osd2_2}.out | jq '.remote_reservations.in_progress[0].prio')
493 if [ "$PRIO" != $pool2_prio ];
494 then
495 echo "The primary PG ${PG2} doesn't have prio $pool2_prio on remote"
496 ERRORS=$(expr $ERRORS + 1)
497 fi
498 fi
499
500 wait_for_clean || return 1
501
502 if [ $ERRORS != "0" ];
503 then
504 echo "$ERRORS error(s) found"
505 else
506 echo TEST PASSED
507 fi
508
509 delete_pool $pool1
510 delete_pool $pool2
511 kill_daemons $dir || return 1
512 return $ERRORS
513 }
514
515 main osd-backfill-prio "$@"
516
517 # Local Variables:
518 # compile-command: "make -j4 && ../qa/run-standalone.sh osd-backfill-prio.sh"
519 # End: