3 # Copyright (C) 2017 Red Hat <contact@redhat.com>
5 # Author: David Zafman <dzafman@redhat.com>
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU Library Public License as published by
9 # the Free Software Foundation; either version 2, or (at your option)
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU Library Public License for more details.
18 source $CEPH_ROOT/qa
/standalone
/ceph-helpers.sh
25 export CEPH_MON
="127.0.0.1:7115" # git grep '\<7115\>' : there must be only one
27 CEPH_ARGS
+="--fsid=$(uuidgen) --auth-supported=none "
28 CEPH_ARGS
+="--mon-host=$CEPH_MON "
33 local funcs
=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
34 for func
in $funcs ; do
35 setup
$dir ||
return 1
36 $func $dir ||
return 1
37 teardown
$dir ||
return 1
41 function below_margin
() {
46 return $
(( $check <= $target && $check >= $target - $margin ?
0 : 1 ))
49 function above_margin
() {
54 return $
(( $check >= $target && $check <= $target + $margin ?
0 : 1 ))
60 local degraded_start
=$3
62 local misplaced_start
=$5
63 local misplaced_end
=$6
67 if [ "$type" = "erasure" ];
72 UPACT
=$
(grep "pg[[]${PG}.*recovering.*_update_calc_stats " $log |
tail -1 |
sed "s/.*[)] \([[][^ p]*\).*$/\1/")
74 # Check 3rd line at start because of false recovery starts
75 FIRST
=$
(grep "pg[[]${PG}.*recovering.*_update_calc_stats degraded " $log | grep -F " ${UPACT}${addp}" |
head -1 |
sed "s/.* \([0-9]*\)$/\1/")
76 below_margin
$FIRST $degraded_start ||
return 1
77 LAST
=$
(grep "pg[[]${PG}.*recovering.*_update_calc_stats degraded " $log |
tail -1 |
sed "s/.* \([0-9]*\)$/\1/")
78 above_margin
$LAST $degraded_end ||
return 1
80 # Check 3rd line at start because of false recovery starts
81 FIRST
=$
(grep "pg[[]${PG}.*recovering.*_update_calc_stats misplaced " $log | grep -F " ${UPACT}${addp}" |
head -1 |
sed "s/.* \([0-9]*\)$/\1/")
82 below_margin
$FIRST $misplaced_start ||
return 1
83 LAST
=$
(grep "pg[[]${PG}.*recovering.*_update_calc_stats misplaced " $log |
tail -1 |
sed "s/.* \([0-9]*\)$/\1/")
84 above_margin
$LAST $misplaced_end ||
return 1
89 # active+recovering+degraded
91 # PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
92 # 1.0 500 0 500 0 0 0 500 500 active+recovering+degraded 2017-11-17 19:27:36.493828 28'500 32:603 [1,2,4] 1 [1,2,4] 1 0'0 2017-11-17 19:27:05.915467 0'0 2017-11-17 19:27:05.915467
93 function do_recovery_out1
() {
98 run_mon
$dir a ||
return 1
99 run_mgr
$dir x ||
return 1
100 run_osd
$dir 0 ||
return 1
101 run_osd
$dir 1 ||
return 1
102 run_osd
$dir 2 ||
return 1
103 run_osd
$dir 3 ||
return 1
104 run_osd
$dir 4 ||
return 1
105 run_osd
$dir 5 ||
return 1
107 if [ $type = "erasure" ];
109 ceph osd erasure-code-profile
set myprofile plugin
=jerasure technique
=reed_sol_van k
=2 m
=1 crush-failure-domain
=osd
110 create_pool
$poolname 1 1 $type myprofile
112 create_pool
$poolname 1 1 $type
115 wait_for_clean ||
return 1
117 for i
in $
(seq 1 $objects)
119 rados
-p $poolname put obj
$i /dev
/null
122 local primary
=$
(get_primary
$poolname obj1
)
123 local PG
=$
(get_pg
$poolname obj1
)
124 # Only 2 OSDs so only 1 not primary
125 local otherosd
=$
(get_not_primary
$poolname obj1
)
127 ceph osd
set norecover
128 kill $
(cat $dir/osd.
${otherosd}.pid
)
129 ceph osd down osd.
${otherosd}
130 ceph osd out osd.
${otherosd}
131 ceph osd
unset norecover
132 ceph tell osd.$
(get_primary
$poolname obj1
) debug kick_recovery_wq
0
135 wait_for_clean ||
return 1
137 local log
=$dir/osd.
${primary}.log
138 check
$PG $log $objects 0 0 0 $type ||
return 1
140 delete_pool
$poolname
141 kill_daemons
$dir ||
return 1
144 function TEST_recovery_replicated_out1
() {
147 do_recovery_out1
$dir replicated ||
return 1
150 function TEST_recovery_erasure_out1
() {
153 do_recovery_out1
$dir erasure ||
return 1
156 # [0, 1] -> [2,3,4,5]
158 # missing on primary 500 -> 0
160 # PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
161 # 1.0 500 500 2000 0 0 0 500 500 active+recovering+degraded 2017-10-27 09:38:37.453438 22'500 25:394 [2,4,3,5] 2 [2,4,3,5] 2 0'0 2017-10-27 09:37:58.046748 0'0 2017-10-27 09:37:58.046748
162 function TEST_recovery_sizeup
() {
165 run_mon
$dir a ||
return 1
166 run_mgr
$dir x ||
return 1
167 run_osd
$dir 0 ||
return 1
168 run_osd
$dir 1 ||
return 1
169 run_osd
$dir 2 ||
return 1
170 run_osd
$dir 3 ||
return 1
171 run_osd
$dir 4 ||
return 1
172 run_osd
$dir 5 ||
return 1
174 create_pool
$poolname 1 1
175 ceph osd pool
set $poolname size
2
177 wait_for_clean ||
return 1
179 for i
in $
(seq 1 $objects)
181 rados
-p $poolname put obj
$i /dev
/null
184 local primary
=$
(get_primary
$poolname obj1
)
185 local PG
=$
(get_pg
$poolname obj1
)
186 # Only 2 OSDs so only 1 not primary
187 local otherosd
=$
(get_not_primary
$poolname obj1
)
189 ceph osd
set norecover
190 ceph osd out osd.
$primary osd.
$otherosd
191 ceph osd pool
set test size
4
192 ceph osd
unset norecover
193 ceph tell osd.$
(get_primary
$poolname obj1
) debug kick_recovery_wq
0
196 wait_for_clean ||
return 1
199 primary
=$
(get_primary
$poolname obj1
)
201 local degraded
=$
(expr $objects \
* 4)
202 local log
=$dir/osd.
${primary}.log
203 check
$PG $log $degraded 0 0 0 ||
return 1
205 UPACT
=$
(grep "pg[[]${PG}.*recovering.*_update_calc_stats " $log |
tail -1 |
sed "s/.*[)] \([[][^ p]*\).*$/\1/")
207 # This is the value of set into MISSING_ON_PRIMARY
208 FIRST
=$
(grep "pg[[]${PG}.*recovering.*_update_calc_stats missing shard $primary " $log |
grep -F " $UPACT " |
head -1 |
sed "s/.* \([0-9]*\)$/\1/")
209 below_margin
$FIRST $objects ||
return 1
210 LAST
=$
(grep "pg[[]${PG}.*recovering.*_update_calc_stats missing shard $primary " $log |
tail -1 |
sed "s/.* \([0-9]*\)$/\1/")
211 above_margin
$LAST 0 ||
return 1
213 delete_pool
$poolname
214 kill_daemons
$dir ||
return 1
217 # [0, 1, 2, 4] -> [3, 5]
219 # missing on primary 500 -> 0
220 # active+recovering+degraded
222 # PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
223 # 1.0 500 500 1000 0 0 0 500 500 active+recovering+degraded 2017-10-27 09:34:50.012261 22'500 27:118 [3,5] 3 [3,5] 3 0'0 2017-10-27 09:34:08.617248 0'0 2017-10-27 09:34:08.617248
224 function TEST_recovery_sizedown
() {
227 run_mon
$dir a ||
return 1
228 run_mgr
$dir x ||
return 1
229 run_osd
$dir 0 ||
return 1
230 run_osd
$dir 1 ||
return 1
231 run_osd
$dir 2 ||
return 1
232 run_osd
$dir 3 ||
return 1
233 run_osd
$dir 4 ||
return 1
234 run_osd
$dir 5 ||
return 1
236 create_pool
$poolname 1 1
237 ceph osd pool
set $poolname size
4
239 wait_for_clean ||
return 1
241 for i
in $
(seq 1 $objects)
243 rados
-p $poolname put obj
$i /dev
/null
246 local primary
=$
(get_primary
$poolname obj1
)
247 local PG
=$
(get_pg
$poolname obj1
)
248 # Only 2 OSDs so only 1 not primary
249 local allosds
=$
(get_osds
$poolname obj1
)
251 ceph osd
set norecover
254 ceph osd out osd.
$osd
257 ceph osd pool
set test size
2
258 ceph osd
unset norecover
259 ceph tell osd.$
(get_primary
$poolname obj1
) debug kick_recovery_wq
0
262 wait_for_clean ||
return 1
265 primary
=$
(get_primary
$poolname obj1
)
267 local degraded
=$
(expr $objects \
* 2)
268 local log
=$dir/osd.
${primary}.log
269 check
$PG $log $degraded 0 0 0 ||
return 1
271 UPACT
=$
(grep "pg[[]${PG}.*recovering.*_update_calc_stats " $log |
tail -1 |
sed "s/.*[)] \([[][^ p]*\).*$/\1/")
273 # This is the value of set into MISSING_ON_PRIMARY
274 FIRST
=$
(grep "pg[[]${PG}.*recovering.*_update_calc_stats missing shard $primary " $log |
grep -F " $UPACT " |
head -1 |
sed "s/.* \([0-9]*\)$/\1/")
275 below_margin
$FIRST $objects ||
return 1
276 LAST
=$
(grep "pg[[]${PG}.*recovering.*_update_calc_stats missing shard $primary " $log |
tail -1 |
sed "s/.* \([0-9]*\)$/\1/")
277 above_margin
$LAST 0 ||
return 1
279 delete_pool
$poolname
280 kill_daemons
$dir ||
return 1
284 # degraded 200 -> 100
285 # active+recovering+undersized+degraded
287 # PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
288 # 1.0 100 0 200 0 0 0 100 100 active+recovering+undersized+degraded 2017-11-17 17:16:15.302943 13'500 16:643 [1,2] 1 [1,2] 1 0'0 2017-11-17 17:15:34.985563 0'0 2017-11-17 17:15:34.985563
289 function TEST_recovery_undersized
() {
292 run_mon
$dir a ||
return 1
293 run_mgr
$dir x ||
return 1
294 run_osd
$dir 0 ||
return 1
295 run_osd
$dir 1 ||
return 1
296 run_osd
$dir 2 ||
return 1
298 create_pool
$poolname 1 1
299 ceph osd pool
set $poolname size
1
301 wait_for_clean ||
return 1
303 for i
in $
(seq 1 $objects)
305 rados
-p $poolname put obj
$i /dev
/null
308 local primary
=$
(get_primary
$poolname obj1
)
309 local PG
=$
(get_pg
$poolname obj1
)
311 ceph osd
set norecover
312 # Mark any osd not the primary (only 1 replica so also has no replica)
315 if [ $i = $primary ];
322 ceph osd pool
set test size
3
323 ceph osd
unset norecover
324 ceph tell osd.$
(get_primary
$poolname obj1
) debug kick_recovery_wq
0
325 # Give extra sleep time because code below doesn't have the sophistication of wait_for_clean()
329 # Wait for recovery to finish
330 # Can't use wait_for_clean() because state goes from active+recovering+undersized+degraded
331 # to active+undersized+degraded
334 if ceph pg dump pgs |
grep ^
$PG |
grep -qv recovering
340 echo "Timeout waiting for recovery to finish"
347 primary
=$
(get_primary
$poolname obj1
)
348 local log
=$dir/osd.
${primary}.log
350 UPACT
=$
(grep "pg[[]${PG}.*recovering.*_update_calc_stats " $log |
tail -1 |
sed "s/.*[)] \([[][^ p]*\).*$/\1/")
352 local degraded
=$
(expr $objects \
* 2)
353 FIRST
=$
(grep "pg[[]${PG}.*recovering.*_update_calc_stats degraded " $log |
grep -F " $UPACT " |
head -1 |
sed "s/.* \([0-9]*\)$/\1/")
354 below_margin
$FIRST $degraded ||
return 1
355 LAST
=$
(grep "pg[[]${PG}.*recovering.*_update_calc_stats degraded " $log |
tail -1 |
sed "s/.* \([0-9]*\)$/\1/")
356 above_margin
$LAST $objects ||
return 1
358 delete_pool
$poolname
359 kill_daemons
$dir ||
return 1
362 # [1,0,2] -> [1,3,NONE]/[1,3,2]
364 # misplaced 100 -> 100
365 # active+recovering+degraded+remapped
367 # PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
368 # 1.0 100 0 100 100 0 0 100 100 active+recovering+degraded+remapped 2017-11-27 21:24:20.851243 18'500 23:618 [1,3,NONE] 1 [1,3,2] 1 0'0 2017-11-27 21:23:39.395242 0'0 2017-11-27 21:23:39.395242
369 function TEST_recovery_erasure_remapped
() {
372 run_mon
$dir a ||
return 1
373 run_mgr
$dir x ||
return 1
374 run_osd
$dir 0 ||
return 1
375 run_osd
$dir 1 ||
return 1
376 run_osd
$dir 2 ||
return 1
377 run_osd
$dir 3 ||
return 1
379 ceph osd erasure-code-profile
set myprofile plugin
=jerasure technique
=reed_sol_van k
=2 m
=1 crush-failure-domain
=osd
380 create_pool
$poolname 1 1 erasure myprofile
381 ceph osd pool
set $poolname min_size
2
383 wait_for_clean ||
return 1
385 for i
in $
(seq 1 $objects)
387 rados
-p $poolname put obj
$i /dev
/null
390 local primary
=$
(get_primary
$poolname obj1
)
391 local PG
=$
(get_pg
$poolname obj1
)
392 local otherosd
=$
(get_not_primary
$poolname obj1
)
394 ceph osd
set norecover
395 kill $
(cat $dir/osd.
${otherosd}.pid
)
396 ceph osd down osd.
${otherosd}
397 ceph osd out osd.
${otherosd}
399 # Mark osd not the primary and not down/out osd as just out
402 if [ $i = $primary ];
406 if [ $i = $otherosd ];
413 ceph osd
unset norecover
414 ceph tell osd.$
(get_primary
$poolname obj1
) debug kick_recovery_wq
0
417 wait_for_clean ||
return 1
419 local log
=$dir/osd.
${primary}.log
420 check
$PG $log $objects 0 $objects $objects erasure ||
return 1
422 delete_pool
$poolname
423 kill_daemons
$dir ||
return 1
426 main osd-recovery-stats
"$@"
429 # compile-command: "make -j4 && ../qa/run-standalone.sh osd-recovery-stats.sh"