]> git.proxmox.com Git - ceph.git/blob - ceph/qa/standalone/osd/osd-recovery-prio.sh
import ceph 12.2.12
[ceph.git] / ceph / qa / standalone / osd / osd-recovery-prio.sh
1 #!/usr/bin/env bash
2 #
3 # Copyright (C) 2019 Red Hat <contact@redhat.com>
4 #
5 # Author: David Zafman <dzafman@redhat.com>
6 #
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU Library Public License as published by
9 # the Free Software Foundation; either version 2, or (at your option)
10 # any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU Library Public License for more details.
16 #
17
18 source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
19
20 function run() {
21 local dir=$1
22 shift
23
24 # Fix port????
25 export CEPH_MON="127.0.0.1:7114" # git grep '\<7114\>' : there must be only one
26 export CEPH_ARGS
27 CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
28 CEPH_ARGS+="--mon-host=$CEPH_MON --osd_max_backfills=1 --debug_reserver=20"
29 export objects=200
30 export poolprefix=test
31 export FORCE_PRIO="255" # See OSD_RECOVERY_PRIORITY_FORCED
32 export NORMAL_PRIO="180" # See OSD_RECOVERY_PRIORITY_BASE
33
34 local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
35 for func in $funcs ; do
36 setup $dir || return 1
37 $func $dir || return 1
38 teardown $dir || return 1
39 done
40 }
41
42
43 function TEST_recovery_priority() {
44 local dir=$1
45 local pools=10
46 local OSDS=5
47
48 run_mon $dir a || return 1
49 run_mgr $dir x || return 1
50 export CEPH_ARGS
51
52 for osd in $(seq 0 $(expr $OSDS - 1))
53 do
54 run_osd $dir $osd || return 1
55 done
56
57 for p in $(seq 1 $pools)
58 do
59 create_pool "${poolprefix}$p" 1 1
60 ceph osd pool set "${poolprefix}$p" size 2
61 done
62 sleep 5
63
64 wait_for_clean || return 1
65
66 ceph pg dump pgs
67
68 # Find 3 pools with a pg with the same primaries but second
69 # replica on another osd.
70 local PG1
71 local POOLNUM1
72 local pool1
73 local chk_osd1_1
74 local chk_osd1_2
75
76 local PG2
77 local POOLNUM2
78 local pool2
79 local chk_osd2
80
81 local PG3
82 local POOLNUM3
83 local pool3
84
85 for p in $(seq 1 $pools)
86 do
87 ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting
88 local test_osd1=$(head -1 $dir/acting)
89 local test_osd2=$(tail -1 $dir/acting)
90 if [ -z "$PG1" ];
91 then
92 PG1="${p}.0"
93 POOLNUM1=$p
94 pool1="${poolprefix}$p"
95 chk_osd1_1=$test_osd1
96 chk_osd1_2=$test_osd2
97 elif [ -z "$PG2" -a $chk_osd1_1 = $test_osd1 -a $chk_osd1_2 != $test_osd2 ];
98 then
99 PG2="${p}.0"
100 POOLNUM2=$p
101 pool2="${poolprefix}$p"
102 chk_osd2=$test_osd2
103 elif [ -n "$PG2" -a $chk_osd1_1 = $test_osd1 -a $chk_osd1_2 != $test_osd2 -a "$chk_osd2" != $test_osd2 ];
104 then
105 PG3="${p}.0"
106 POOLNUM3=$p
107 pool3="${poolprefix}$p"
108 break
109 fi
110 done
111 rm -f $dir/acting
112
113 if [ "$pool2" = "" -o "pool3" = "" ];
114 then
115 echo "Failure to find appropirate PGs"
116 return 1
117 fi
118
119 for p in $(seq 1 $pools)
120 do
121 if [ $p != $POOLNUM1 -a $p != $POOLNUM2 -a $p != $POOLNUM3 ];
122 then
123 delete_pool ${poolprefix}$p
124 fi
125 done
126
127 ceph osd pool set $pool2 size 1
128 ceph osd pool set $pool3 size 1
129 wait_for_clean || return 1
130
131 dd if=/dev/urandom of=$dir/data bs=1M count=10
132 p=1
133 for pname in $pool1 $pool2 $pool3
134 do
135 for i in $(seq 1 $objects)
136 do
137 rados -p ${pname} put obj${i}-p${p} $dir/data
138 done
139 p=$(expr $p + 1)
140 done
141
142 local otherosd=$(get_not_primary $pool1 obj1-p1)
143
144 ceph pg dump pgs
145 ERRORS=0
146
147 ceph osd set norecover
148 ceph osd set noout
149
150 # Get a pg to want to recover and quickly force it
151 # to be preempted.
152 ceph osd pool set $pool3 size 2
153 sleep 2
154 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations || return 1
155
156 # 3. Item is in progress, adjust priority with no higher priority waiting
157 while(ceph pg force-recovery $PG3 2>&1 | grep -q "doesn't require recovery")
158 do
159 sleep 2
160 done
161 flush_pg_stats || return 1
162 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations || return 1
163
164 ceph osd out osd.$chk_osd1_2
165 sleep 2
166 flush_pg_stats || return 1
167 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations || return 1
168 ceph pg dump pgs
169
170 ceph osd pool set $pool2 size 2
171 sleep 2
172 flush_pg_stats || return 1
173 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations > $dir/out || return 1
174 cat $dir/out
175 ceph pg dump pgs
176
177 PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG1}\")).prio")
178 if [ "$PRIO" != "$NORMAL_PRIO" ];
179 then
180 echo "The normal PG ${PG1} doesn't have prio $NORMAL_PRIO queued waiting"
181 ERRORS=$(expr $ERRORS + 1)
182 fi
183
184 # Using eval will strip double-quotes from item
185 eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item')
186 if [ "$ITEM" != ${PG3} ];
187 then
188 echo "The first force-recovery PG $PG3 didn't become the in progress item"
189 ERRORS=$(expr $ERRORS + 1)
190 else
191 PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio')
192 if [ "$PRIO" != $FORCE_PRIO ];
193 then
194 echo "The first force-recovery PG ${PG3} doesn't have prio $FORCE_PRIO"
195 ERRORS=$(expr $ERRORS + 1)
196 fi
197 fi
198
199 # 1. Item is queued, re-queue with new priority
200 while(ceph pg force-recovery $PG2 2>&1 | grep -q "doesn't require recovery")
201 do
202 sleep 2
203 done
204 sleep 2
205 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations > $dir/out || return 1
206 cat $dir/out
207 PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio")
208 if [ "$PRIO" != "$FORCE_PRIO" ];
209 then
210 echo "The second force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO"
211 ERRORS=$(expr $ERRORS + 1)
212 fi
213 flush_pg_stats || return 1
214
215 # 4. Item is in progress, if higher priority items waiting prempt item
216 #ceph osd unset norecover
217 ceph pg cancel-force-recovery $PG3 || return 1
218 sleep 2
219 #ceph osd set norecover
220 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations > $dir/out || return 1
221 cat $dir/out
222 PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG3}\")).prio")
223 if [ "$PRIO" != "$NORMAL_PRIO" ];
224 then
225 echo "After cancel-recovery PG ${PG3} doesn't have prio $NORMAL_PRIO"
226 ERRORS=$(expr $ERRORS + 1)
227 fi
228
229 eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item')
230 if [ "$ITEM" != ${PG2} ];
231 then
232 echo "The force-recovery PG $PG2 didn't become the in progress item"
233 ERRORS=$(expr $ERRORS + 1)
234 else
235 PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio')
236 if [ "$PRIO" != $FORCE_PRIO ];
237 then
238 echo "The first force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO"
239 ERRORS=$(expr $ERRORS + 1)
240 fi
241 fi
242
243 ceph pg cancel-force-recovery $PG2 || return 1
244 sleep 5
245 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations || return 1
246
247 # 2. Item is queued, re-queue and preempt because new priority higher than an in progress item
248 flush_pg_stats || return 1
249 ceph pg force-recovery $PG3 || return 1
250 sleep 2
251
252 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations > $dir/out || return 1
253 cat $dir/out
254 PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio")
255 if [ "$PRIO" != "$NORMAL_PRIO" ];
256 then
257 echo "After cancel-force-recovery PG ${PG3} doesn't have prio $NORMAL_PRIO"
258 ERRORS=$(expr $ERRORS + 1)
259 fi
260
261 eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item')
262 if [ "$ITEM" != ${PG3} ];
263 then
264 echo "The force-recovery PG $PG3 didn't get promoted to an in progress item"
265 ERRORS=$(expr $ERRORS + 1)
266 else
267 PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio')
268 if [ "$PRIO" != $FORCE_PRIO ];
269 then
270 echo "The force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO"
271 ERRORS=$(expr $ERRORS + 1)
272 fi
273 fi
274
275 ceph osd unset noout
276 ceph osd unset norecover
277
278 wait_for_clean "CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations" || return 1
279
280 ceph pg dump pgs
281
282 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_pgstate_history
283
284 if [ $ERRORS != "0" ];
285 then
286 echo "$ERRORS error(s) found"
287 else
288 echo TEST PASSED
289 fi
290
291 delete_pool $pool1
292 delete_pool $pool2
293 delete_pool $pool3
294 kill_daemons $dir || return 1
295 return $ERRORS
296 }
297
298 #
299 # Show that pool recovery_priority is added to recovery priority
300 #
301 # Create 2 pools with 2 OSDs with different primarys
302 # pool 1 with recovery_priority 1
303 # pool 2 with recovery_priority 2
304 #
305 # Start recovery by changing the pool sizes from 1 to 2
306 # Use dump_reservations to verify priorities
307 function TEST_recovery_pool_priority() {
308 local dir=$1
309 local pools=3 # Don't assume the first 2 pools are exact what we want
310 local OSDS=2
311
312 run_mon $dir a || return 1
313 run_mgr $dir x || return 1
314 export CEPH_ARGS
315
316 for osd in $(seq 0 $(expr $OSDS - 1))
317 do
318 run_osd $dir $osd || return 1
319 done
320
321 for p in $(seq 1 $pools)
322 do
323 create_pool "${poolprefix}$p" 1 1
324 ceph osd pool set "${poolprefix}$p" size 2
325 done
326 sleep 5
327
328 wait_for_clean || return 1
329
330 ceph pg dump pgs
331
332 # Find 2 pools with different primaries which
333 # means the replica must be on another osd.
334 local PG1
335 local POOLNUM1
336 local pool1
337 local chk_osd1_1
338 local chk_osd1_2
339
340 local PG2
341 local POOLNUM2
342 local pool2
343 local chk_osd2_1
344 local chk_osd2_2
345
346 for p in $(seq 1 $pools)
347 do
348 ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting
349 local test_osd1=$(head -1 $dir/acting)
350 local test_osd2=$(tail -1 $dir/acting)
351 if [ -z "$PG1" ];
352 then
353 PG1="${p}.0"
354 POOLNUM1=$p
355 pool1="${poolprefix}$p"
356 chk_osd1_1=$test_osd1
357 chk_osd1_2=$test_osd2
358 elif [ $chk_osd1_1 != $test_osd1 ];
359 then
360 PG2="${p}.0"
361 POOLNUM2=$p
362 pool2="${poolprefix}$p"
363 chk_osd2_1=$test_osd1
364 chk_osd2_2=$test_osd2
365 break
366 fi
367 done
368 rm -f $dir/acting
369
370 if [ "$pool2" = "" ];
371 then
372 echo "Failure to find appropirate PGs"
373 return 1
374 fi
375
376 for p in $(seq 1 $pools)
377 do
378 if [ $p != $POOLNUM1 -a $p != $POOLNUM2 ];
379 then
380 delete_pool ${poolprefix}$p
381 fi
382 done
383
384 pool1_extra_prio=1
385 pool2_extra_prio=2
386 pool1_prio=$(expr $NORMAL_PRIO + $pool1_extra_prio)
387 pool2_prio=$(expr $NORMAL_PRIO + $pool2_extra_prio)
388
389 ceph osd pool set $pool1 size 1
390 ceph osd pool set $pool1 recovery_priority $pool1_extra_prio
391 ceph osd pool set $pool2 size 1
392 ceph osd pool set $pool2 recovery_priority $pool2_extra_prio
393 wait_for_clean || return 1
394
395 dd if=/dev/urandom of=$dir/data bs=1M count=10
396 p=1
397 for pname in $pool1 $pool2
398 do
399 for i in $(seq 1 $objects)
400 do
401 rados -p ${pname} put obj${i}-p${p} $dir/data
402 done
403 p=$(expr $p + 1)
404 done
405
406 local otherosd=$(get_not_primary $pool1 obj1-p1)
407
408 ceph pg dump pgs
409 ERRORS=0
410
411 ceph osd pool set $pool1 size 2
412 ceph osd pool set $pool2 size 2
413 sleep 10
414 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations > $dir/dump.${chk_osd1_1}.out
415 echo osd.${chk_osd1_1}
416 cat $dir/dump.${chk_osd1_1}.out
417 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_2}) dump_reservations > $dir/dump.${chk_osd1_2}.out
418 echo osd.${chk_osd1_2}
419 cat $dir/dump.${chk_osd1_2}.out
420
421 # Using eval will strip double-quotes from item
422 eval ITEM=$(cat $dir/dump.${chk_osd1_1}.out | jq '.local_reservations.in_progress[0].item')
423 if [ "$ITEM" != ${PG1} ];
424 then
425 echo "The primary PG for $pool1 didn't become the in progress item"
426 ERRORS=$(expr $ERRORS + 1)
427 else
428 PRIO=$(cat $dir/dump.${chk_osd1_1}.out | jq '.local_reservations.in_progress[0].prio')
429 if [ "$PRIO" != $pool1_prio ];
430 then
431 echo "The primary PG ${PG1} doesn't have prio $pool1_prio"
432 ERRORS=$(expr $ERRORS + 1)
433 fi
434 fi
435
436 # Using eval will strip double-quotes from item
437 eval ITEM=$(cat $dir/dump.${chk_osd1_2}.out | jq '.remote_reservations.in_progress[0].item')
438 if [ "$ITEM" != ${PG1} ];
439 then
440 echo "The primary PG for $pool1 didn't become the in progress item on remote"
441 ERRORS=$(expr $ERRORS + 1)
442 else
443 PRIO=$(cat $dir/dump.${chk_osd1_2}.out | jq '.remote_reservations.in_progress[0].prio')
444 if [ "$PRIO" != $pool1_prio ];
445 then
446 echo "The primary PG ${PG1} doesn't have prio $pool1_prio on remote"
447 ERRORS=$(expr $ERRORS + 1)
448 fi
449 fi
450
451 # Using eval will strip double-quotes from item
452 eval ITEM=$(cat $dir/dump.${chk_osd2_1}.out | jq '.local_reservations.in_progress[0].item')
453 if [ "$ITEM" != ${PG2} ];
454 then
455 echo "The primary PG for $pool2 didn't become the in progress item"
456 ERRORS=$(expr $ERRORS + 1)
457 else
458 PRIO=$(cat $dir/dump.${chk_osd2_1}.out | jq '.local_reservations.in_progress[0].prio')
459 if [ "$PRIO" != $pool2_prio ];
460 then
461 echo "The primary PG ${PG2} doesn't have prio $pool2_prio"
462 ERRORS=$(expr $ERRORS + 1)
463 fi
464 fi
465
466 # Using eval will strip double-quotes from item
467 eval ITEM=$(cat $dir/dump.${chk_osd2_2}.out | jq '.remote_reservations.in_progress[0].item')
468 if [ "$ITEM" != ${PG2} ];
469 then
470 echo "The primary PG $PG2 didn't become the in progress item on remote"
471 ERRORS=$(expr $ERRORS + 1)
472 else
473 PRIO=$(cat $dir/dump.${chk_osd2_2}.out | jq '.remote_reservations.in_progress[0].prio')
474 if [ "$PRIO" != $pool2_prio ];
475 then
476 echo "The primary PG ${PG2} doesn't have prio $pool2_prio on remote"
477 ERRORS=$(expr $ERRORS + 1)
478 fi
479 fi
480
481 wait_for_clean || return 1
482
483 if [ $ERRORS != "0" ];
484 then
485 echo "$ERRORS error(s) found"
486 else
487 echo TEST PASSED
488 fi
489
490 delete_pool $pool1
491 delete_pool $pool2
492 kill_daemons $dir || return 1
493 return $ERRORS
494 }
495
496 main osd-recovery-prio "$@"
497
498 # Local Variables:
499 # compile-command: "make -j4 && ../qa/run-standalone.sh osd-recovery-prio.sh"
500 # End: