]> git.proxmox.com Git - ceph.git/blob - ceph/qa/standalone/osd/osd-backfill-stats.sh
87c6218fe0165b1b4aaacf037f25e878876ca904
[ceph.git] / ceph / qa / standalone / osd / osd-backfill-stats.sh
1 #!/usr/bin/env bash
2 #
3 # Copyright (C) 2017 Red Hat <contact@redhat.com>
4 #
5 # Author: David Zafman <dzafman@redhat.com>
6 #
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU Library Public License as published by
9 # the Free Software Foundation; either version 2, or (at your option)
10 # any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU Library Public License for more details.
16 #
17
18 source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
19
20 function run() {
21 local dir=$1
22 shift
23
24 # Fix port????
25 export CEPH_MON="127.0.0.1:7114" # git grep '\<7114\>' : there must be only one
26 export CEPH_ARGS
27 CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
28 CEPH_ARGS+="--mon-host=$CEPH_MON "
29 CEPH_ARGS+="--osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10 "
30 export margin=10
31 export objects=200
32 export poolname=test
33
34 local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
35 for func in $funcs ; do
36 setup $dir || return 1
37 $func $dir || return 1
38 teardown $dir || return 1
39 done
40 }
41
42 function below_margin() {
43 local -i check=$1
44 shift
45 local -i target=$1
46
47 return $(( $check <= $target && $check >= $target - $margin ? 0 : 1 ))
48 }
49
50 function above_margin() {
51 local -i check=$1
52 shift
53 local -i target=$1
54
55 return $(( $check >= $target && $check <= $target + $margin ? 0 : 1 ))
56 }
57
58 FIND_UPACT='grep "pg[[]${PG}.*backfilling.*update_calc_stats " $log | tail -1 | sed "s/.*[)] \([[][^ p]*\).*$/\1/"'
59 FIND_FIRST='grep "pg[[]${PG}.*backfilling.*update_calc_stats $which " $log | grep -F " ${UPACT}${addp}" | grep -v est | head -1 | sed "s/.* \([0-9]*\)$/\1/"'
60 FIND_LAST='grep "pg[[]${PG}.*backfilling.*update_calc_stats $which " $log | tail -1 | sed "s/.* \([0-9]*\)$/\1/"'
61
62 function check() {
63 local dir=$1
64 local PG=$2
65 local primary=$3
66 local type=$4
67 local degraded_start=$5
68 local degraded_end=$6
69 local misplaced_start=$7
70 local misplaced_end=$8
71 local primary_start=${9:-}
72 local primary_end=${10:-}
73 local check_setup=${11:-true}
74
75 local log=$(grep -l +backfilling $dir/osd.$primary.log)
76 if [ $check_setup = "true" ];
77 then
78 local alllogs=$(grep -l +backfilling $dir/osd.*.log)
79 if [ "$(echo "$alllogs" | wc -w)" != "1" ];
80 then
81 echo "Test setup failure, a single OSD should have performed backfill"
82 return 1
83 fi
84 fi
85
86 local addp=" "
87 if [ "$type" = "erasure" ];
88 then
89 addp="p"
90 fi
91
92 UPACT=$(eval $FIND_UPACT)
93 [ -n "$UPACT" ] || return 1
94
95 # Check 3rd line at start because of false recovery starts
96 local which="degraded"
97 FIRST=$(eval $FIND_FIRST)
98 [ -n "$FIRST" ] || return 1
99 below_margin $FIRST $degraded_start || return 1
100 LAST=$(eval $FIND_LAST)
101 [ -n "$LAST" ] || return 1
102 above_margin $LAST $degraded_end || return 1
103
104 # Check 3rd line at start because of false recovery starts
105 which="misplaced"
106 FIRST=$(eval $FIND_FIRST)
107 [ -n "$FIRST" ] || return 1
108 below_margin $FIRST $misplaced_start || return 1
109 LAST=$(eval $FIND_LAST)
110 [ -n "$LAST" ] || return 1
111 above_margin $LAST $misplaced_end || return 1
112
113 # This is the value of set into MISSING_ON_PRIMARY
114 if [ -n "$primary_start" ];
115 then
116 which="shard $primary"
117 FIRST=$(eval $FIND_FIRST)
118 [ -n "$FIRST" ] || return 1
119 below_margin $FIRST $primary_start || return 1
120 LAST=$(eval $FIND_LAST)
121 [ -n "$LAST" ] || return 1
122 above_margin $LAST $primary_end || return 1
123 fi
124 }
125
126 # [1] -> [1, 0, 2]
127 # degraded 1000 -> 0
128 # state: active+undersized+degraded+remapped+backfilling
129
130 # PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
131 # 1.0 500 0 1000 0 0 0 100 100 active+undersized+degraded+remapped+backfilling 2017-10-27 09:44:23.531466 22'500 26:617 [1,0,2] 1 [1] 1 0'0 2017-10-27 09:43:44.654882 0'0 2017-10-27 09:43:44.654882
132 function TEST_backfill_sizeup() {
133 local dir=$1
134
135 run_mon $dir a || return 1
136 run_mgr $dir x || return 1
137 export CEPH_ARGS
138 run_osd $dir 0 || return 1
139 run_osd $dir 1 || return 1
140 run_osd $dir 2 || return 1
141 run_osd $dir 3 || return 1
142 run_osd $dir 4 || return 1
143 run_osd $dir 5 || return 1
144
145 create_pool $poolname 1 1
146 ceph osd pool set $poolname size 1
147
148 wait_for_clean || return 1
149
150 for i in $(seq 1 $objects)
151 do
152 rados -p $poolname put obj$i /dev/null
153 done
154
155 ceph osd set nobackfill
156 ceph osd pool set $poolname size 3
157 sleep 2
158 ceph osd unset nobackfill
159
160 wait_for_clean || return 1
161
162 local primary=$(get_primary $poolname obj1)
163 local PG=$(get_pg $poolname obj1)
164
165 local degraded=$(expr $objects \* 2)
166 check $dir $PG $primary replicated $degraded 0 0 0 || return 1
167
168 delete_pool $poolname
169 kill_daemons $dir || return 1
170 }
171
172
173
174 # [1] -> [0, 2, 4]
175 # degraded 1000 -> 0
176 # misplaced 500 -> 0
177 # state: active+undersized+degraded+remapped+backfilling
178
179 # PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
180 # 1.0 500 0 1000 500 0 0 100 100 active+undersized+degraded+remapped+backfilling 2017-10-27 09:48:53.326849 22'500 26:603 [0,2,4] 0 [1] 1 0'0 2017-10-27 09:48:13.236253 0'0 2017-10-27 09:48:13.236253
181 function TEST_backfill_sizeup_out() {
182 local dir=$1
183
184 run_mon $dir a || return 1
185 run_mgr $dir x || return 1
186 run_osd $dir 0 || return 1
187 run_osd $dir 1 || return 1
188 run_osd $dir 2 || return 1
189 run_osd $dir 3 || return 1
190 run_osd $dir 4 || return 1
191 run_osd $dir 5 || return 1
192
193 create_pool $poolname 1 1
194 ceph osd pool set $poolname size 1
195
196 wait_for_clean || return 1
197
198 for i in $(seq 1 $objects)
199 do
200 rados -p $poolname put obj$i /dev/null
201 done
202
203 local PG=$(get_pg $poolname obj1)
204 # Remember primary during the backfill
205 local primary=$(get_primary $poolname obj1)
206
207 ceph osd set nobackfill
208 ceph osd out osd.$primary
209 ceph osd pool set $poolname size 3
210 sleep 2
211 ceph osd unset nobackfill
212
213 wait_for_clean || return 1
214
215 local degraded=$(expr $objects \* 2)
216 check $dir $PG $primary replicated $degraded 0 $objects 0 || return 1
217
218 delete_pool $poolname
219 kill_daemons $dir || return 1
220 }
221
222
223 # [1 0] -> [1,2]/[1,0]
224 # misplaced 500 -> 0
225 # state: active+remapped+backfilling
226
227 # PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
228 # 1.0 500 0 0 500 0 0 100 100 active+remapped+backfilling 2017-10-27 09:51:18.800517 22'500 25:570 [1,2] 1 [1,0] 1 0'0 2017-10-27 09:50:40.441274 0'0 2017-10-27 09:50:40.441274
229 function TEST_backfill_out() {
230 local dir=$1
231
232 run_mon $dir a || return 1
233 run_mgr $dir x || return 1
234 run_osd $dir 0 || return 1
235 run_osd $dir 1 || return 1
236 run_osd $dir 2 || return 1
237 run_osd $dir 3 || return 1
238 run_osd $dir 4 || return 1
239 run_osd $dir 5 || return 1
240
241 create_pool $poolname 1 1
242 ceph osd pool set $poolname size 2
243 sleep 5
244
245 wait_for_clean || return 1
246
247 for i in $(seq 1 $objects)
248 do
249 rados -p $poolname put obj$i /dev/null
250 done
251
252 local PG=$(get_pg $poolname obj1)
253 # Remember primary during the backfill
254 local primary=$(get_primary $poolname obj1)
255
256 ceph osd set nobackfill
257 ceph osd out osd.$(get_not_primary $poolname obj1)
258 sleep 2
259 ceph osd unset nobackfill
260
261 wait_for_clean || return 1
262
263 check $dir $PG $primary replicated 0 0 $objects 0 || return 1
264
265 delete_pool $poolname
266 kill_daemons $dir || return 1
267 }
268
269
270 # [0, 1] -> [0, 2]/[0]
271 # osd 1 down/out
272 # degraded 500 -> 0
273 # state: active+undersized+degraded+remapped+backfilling
274
275 # PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
276 # 1.0 500 0 500 0 0 0 100 100 active+undersized+degraded+remapped+backfilling 2017-10-27 09:53:24.051091 22'500 27:719 [0,2] 0 [0] 0 0'0 2017-10-27 09:52:43.188368 0'0 2017-10-27 09:52:43.188368
277 function TEST_backfill_down_out() {
278 local dir=$1
279
280 run_mon $dir a || return 1
281 run_mgr $dir x || return 1
282 run_osd $dir 0 || return 1
283 run_osd $dir 1 || return 1
284 run_osd $dir 2 || return 1
285 run_osd $dir 3 || return 1
286 run_osd $dir 4 || return 1
287 run_osd $dir 5 || return 1
288
289 create_pool $poolname 1 1
290 ceph osd pool set $poolname size 2
291 sleep 5
292
293 wait_for_clean || return 1
294
295 for i in $(seq 1 $objects)
296 do
297 rados -p $poolname put obj$i /dev/null
298 done
299
300 local PG=$(get_pg $poolname obj1)
301 # Remember primary during the backfill
302 local primary=$(get_primary $poolname obj1)
303 local otherosd=$(get_not_primary $poolname obj1)
304
305 ceph osd set nobackfill
306 kill $(cat $dir/osd.${otherosd}.pid)
307 ceph osd down osd.${otherosd}
308 ceph osd out osd.${otherosd}
309 sleep 2
310 ceph osd unset nobackfill
311
312 wait_for_clean || return 1
313
314 check $dir $PG $primary replicated $objects 0 0 0 || return 1
315
316 delete_pool $poolname
317 kill_daemons $dir || return 1
318 }
319
320
321 # [1, 0] -> [2, 3, 4]
322 # degraded 500 -> 0
323 # misplaced 1000 -> 0
324 # state: active+undersized+degraded+remapped+backfilling
325
326 # PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
327 # 1.0 500 0 500 1000 0 0 100 100 active+undersized+degraded+remapped+backfilling 2017-10-27 09:55:50.375722 23'500 27:553 [2,4,3] 2 [1,0] 1 0'0 2017-10-27 09:55:10.230919 0'0 2017-10-27 09:55:10.230919
328 function TEST_backfill_out2() {
329 local dir=$1
330
331 run_mon $dir a || return 1
332 run_mgr $dir x || return 1
333 run_osd $dir 0 || return 1
334 run_osd $dir 1 || return 1
335 run_osd $dir 2 || return 1
336 run_osd $dir 3 || return 1
337 run_osd $dir 4 || return 1
338 run_osd $dir 5 || return 1
339
340 create_pool $poolname 1 1
341 ceph osd pool set $poolname size 2
342 sleep 5
343
344 wait_for_clean || return 1
345
346 for i in $(seq 1 $objects)
347 do
348 rados -p $poolname put obj$i /dev/null
349 done
350
351 local PG=$(get_pg $poolname obj1)
352 # Remember primary during the backfill
353 local primary=$(get_primary $poolname obj1)
354 local otherosd=$(get_not_primary $poolname obj1)
355
356 ceph osd set nobackfill
357 ceph osd pool set $poolname size 3
358 ceph osd out osd.${otherosd}
359 ceph osd out osd.${primary}
360 # Primary might change before backfill starts
361 sleep 2
362 primary=$(get_primary $poolname obj1)
363 ceph osd unset nobackfill
364 ceph tell osd.$primary get_latest_osdmap
365 ceph tell osd.$primary debug kick_recovery_wq 0
366 sleep 2
367
368 wait_for_clean || return 1
369
370 local misplaced=$(expr $objects \* 2)
371
372 check $dir $PG $primary replicated $objects 0 $misplaced 0 || return 1
373
374 delete_pool $poolname
375 kill_daemons $dir || return 1
376 }
377
378
379 # [0,1] -> [2,4,3]/[0,1]
380 # degraded 1000 -> 0
381 # misplaced 1000 -> 500
382 # state ends at active+clean+remapped [2,4,3]/[2,4,3,0]
383 # PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
384 # 1.0 500 0 1000 1000 0 0 100 100 active+undersized+degraded+remapped+backfilling 2017-10-30 18:21:45.995149 19'500 23:1817 [2,4,3] 2 [0,1] 0 0'0 2017-10-30 18:21:05.109904 0'0 2017-10-30 18:21:05.109904
385 # ENDS:
386 # PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
387 # 1.0 500 0 0 500 0 0 5 5 active+clean+remapped 2017-10-30 18:22:42.293730 19'500 25:2557 [2,4,3] 2 [2,4,3,0] 2 0'0 2017-10-30 18:21:05.109904 0'0 2017-10-30 18:21:05.109904
388 function TEST_backfill_sizeup4_allout() {
389 local dir=$1
390
391 run_mon $dir a || return 1
392 run_mgr $dir x || return 1
393 run_osd $dir 0 || return 1
394 run_osd $dir 1 || return 1
395 run_osd $dir 2 || return 1
396 run_osd $dir 3 || return 1
397 run_osd $dir 4 || return 1
398
399 create_pool $poolname 1 1
400 ceph osd pool set $poolname size 2
401
402 wait_for_clean || return 1
403
404 for i in $(seq 1 $objects)
405 do
406 rados -p $poolname put obj$i /dev/null
407 done
408
409 local PG=$(get_pg $poolname obj1)
410 # Remember primary during the backfill
411 local primary=$(get_primary $poolname obj1)
412 local otherosd=$(get_not_primary $poolname obj1)
413
414 ceph osd set nobackfill
415 ceph osd out osd.$otherosd
416 ceph osd out osd.$primary
417 ceph osd pool set $poolname size 4
418 # Primary might change before backfill starts
419 sleep 2
420 primary=$(get_primary $poolname obj1)
421 ceph osd unset nobackfill
422 ceph tell osd.$primary get_latest_osdmap
423 ceph tell osd.$primary debug kick_recovery_wq 0
424 sleep 2
425
426 wait_for_clean || return 1
427
428 local misdeg=$(expr $objects \* 2)
429 check $dir $PG $primary replicated $misdeg 0 $misdeg $objects || return 1
430
431 delete_pool $poolname
432 kill_daemons $dir || return 1
433 }
434
435
436 # [1,2,0] -> [3]/[1,2]
437 # misplaced 1000 -> 500
438 # state ends at active+clean+remapped [3]/[3,1]
439 # PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
440 # 1.0 500 0 0 1000 0 0 100 100 active+remapped+backfilling 2017-11-28 19:13:56.092439 21'500 31:790 [3] 3 [1,2] 1 0'0 2017-11-28 19:13:28.698661 0'0 2017-11-28 19:13:28.698661
441 function TEST_backfill_remapped() {
442 local dir=$1
443
444 run_mon $dir a || return 1
445 run_mgr $dir x || return 1
446 run_osd $dir 0 || return 1
447 run_osd $dir 1 || return 1
448 run_osd $dir 2 || return 1
449 run_osd $dir 3 || return 1
450
451 create_pool $poolname 1 1
452 ceph osd pool set $poolname size 3
453 sleep 5
454
455 wait_for_clean || return 1
456
457 for i in $(seq 1 $objects)
458 do
459 rados -p $poolname put obj$i /dev/null
460 done
461
462 local PG=$(get_pg $poolname obj1)
463 # Remember primary during the backfill
464 local primary=$(get_primary $poolname obj1)
465 local otherosd=$(get_not_primary $poolname obj1)
466
467 ceph osd set nobackfill
468 ceph osd out osd.${otherosd}
469 for i in $(get_osds $poolname obj1)
470 do
471 if [ $i = $primary -o $i = $otherosd ];
472 then
473 continue
474 fi
475 ceph osd out osd.$i
476 break
477 done
478 ceph osd out osd.${primary}
479 ceph osd pool set $poolname size 2
480 sleep 2
481
482 # primary may change due to invalidating the old pg_temp, which was [1,2,0],
483 # but up_primary (3) chooses [0,1] for acting.
484 primary=$(get_primary $poolname obj1)
485
486 ceph osd unset nobackfill
487 ceph tell osd.$primary get_latest_osdmap
488 ceph tell osd.$primary debug kick_recovery_wq 0
489
490 sleep 2
491
492 wait_for_clean || return 1
493
494 local misplaced=$(expr $objects \* 2)
495
496 check $dir $PG $primary replicated 0 0 $misplaced $objects "" "" false || return 1
497
498 delete_pool $poolname
499 kill_daemons $dir || return 1
500 }
501
502 # [1,0,2] -> [4,3,NONE]/[1,0,2]
503 # misplaced 1500 -> 500
504 # state ends at active+clean+remapped [4,3,NONE]/[4,3,2]
505
506 # PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
507 # 1.0 500 0 0 1500 0 0 100 100 active+degraded+remapped+backfilling 2017-10-31 16:53:39.467126 19'500 23:615 [4,3,NONE] 4 [1,0,2] 1 0'0 2017-10-31 16:52:59.624429 0'0 2017-10-31 16:52:59.624429
508
509
510 # ENDS:
511
512 # PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
513 # 1.0 500 0 0 500 0 0 5 5 active+clean+remapped 2017-10-31 16:48:34.414040 19'500 25:2049 [4,3,NONE] 4 [4,3,2] 4 0'0 2017-10-31 16:46:58.203440 0'0 2017-10-31 16:46:58.203440
514 function TEST_backfill_ec_all_out() {
515 local dir=$1
516
517 run_mon $dir a || return 1
518 run_mgr $dir x || return 1
519 run_osd $dir 0 || return 1
520 run_osd $dir 1 || return 1
521 run_osd $dir 2 || return 1
522 run_osd $dir 3 || return 1
523 run_osd $dir 4 || return 1
524
525 ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd
526 create_pool $poolname 1 1 erasure myprofile
527
528 wait_for_clean || return 1
529
530 for i in $(seq 1 $objects)
531 do
532 rados -p $poolname put obj$i /dev/null
533 done
534
535 local PG=$(get_pg $poolname obj1)
536 # Remember primary during the backfill
537 local primary=$(get_primary $poolname obj1)
538
539 ceph osd set nobackfill
540 for o in $(get_osds $poolname obj1)
541 do
542 ceph osd out osd.$o
543 done
544 # Primary might change before backfill starts
545 sleep 2
546 primary=$(get_primary $poolname obj1)
547 ceph osd unset nobackfill
548 ceph tell osd.$primary get_latest_osdmap
549 ceph tell osd.$primary debug kick_recovery_wq 0
550 sleep 2
551
552 wait_for_clean || return 1
553
554 local misplaced=$(expr $objects \* 3)
555 check $dir $PG $primary erasure 0 0 $misplaced $objects || return 1
556
557 delete_pool $poolname
558 kill_daemons $dir || return 1
559 }
560
561
562 # [1,0,2] -> [4, 0, 2]
563 # misplaced 500 -> 0
564 # active+remapped+backfilling
565 #
566 # PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
567 # 1.0 500 0 0 500 0 0 100 100 active+remapped+backfilling 2017-11-08 18:05:39.036420 24'500 27:742 [4,0,2] 4 [1,0,2] 1 0'0 2017-11-08 18:04:58.697315 0'0 2017-11-08 18:04:58.697315
568 function TEST_backfill_ec_prim_out() {
569 local dir=$1
570
571 run_mon $dir a || return 1
572 run_mgr $dir x || return 1
573 run_osd $dir 0 || return 1
574 run_osd $dir 1 || return 1
575 run_osd $dir 2 || return 1
576 run_osd $dir 3 || return 1
577 run_osd $dir 4 || return 1
578
579 ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd
580 create_pool $poolname 1 1 erasure myprofile
581
582 wait_for_clean || return 1
583
584 for i in $(seq 1 $objects)
585 do
586 rados -p $poolname put obj$i /dev/null
587 done
588
589 local PG=$(get_pg $poolname obj1)
590 # Remember primary during the backfill
591 local primary=$(get_primary $poolname obj1)
592
593 ceph osd set nobackfill
594 ceph osd out osd.$primary
595 # Primary might change before backfill starts
596 sleep 2
597 primary=$(get_primary $poolname obj1)
598 ceph osd unset nobackfill
599 ceph tell osd.$primary get_latest_osdmap
600 ceph tell osd.$primary debug kick_recovery_wq 0
601 sleep 2
602
603 wait_for_clean || return 1
604
605 local misplaced=$(expr $objects \* 3)
606 check $dir $PG $primary erasure 0 0 $objects 0 || return 1
607
608 delete_pool $poolname
609 kill_daemons $dir || return 1
610 }
611
612 # [1,0] -> [1,2]
613 # degraded 500 -> 0
614 # misplaced 1000 -> 0
615 #
616 # PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
617 # 1.0 500 0 500 1000 0 0 100 100 active+undersized+degraded+remapped+backfilling 2017-11-06 14:02:29.439105 24'500 29:1020 [4,3,5] 4 [1,NONE,2] 1 0'0 2017-11-06 14:01:46.509963 0'0 2017-11-06 14:01:46.509963
618 function TEST_backfill_ec_down_all_out() {
619 local dir=$1
620
621 run_mon $dir a || return 1
622 run_mgr $dir x || return 1
623 run_osd $dir 0 || return 1
624 run_osd $dir 1 || return 1
625 run_osd $dir 2 || return 1
626 run_osd $dir 3 || return 1
627 run_osd $dir 4 || return 1
628 run_osd $dir 5 || return 1
629
630 ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd
631 create_pool $poolname 1 1 erasure myprofile
632 ceph osd pool set $poolname min_size 2
633
634 wait_for_clean || return 1
635
636 for i in $(seq 1 $objects)
637 do
638 rados -p $poolname put obj$i /dev/null
639 done
640
641 local PG=$(get_pg $poolname obj1)
642 # Remember primary during the backfill
643 local primary=$(get_primary $poolname obj1)
644 local otherosd=$(get_not_primary $poolname obj1)
645 local allosds=$(get_osds $poolname obj1)
646
647 ceph osd set nobackfill
648 kill $(cat $dir/osd.${otherosd}.pid)
649 ceph osd down osd.${otherosd}
650 for o in $allosds
651 do
652 ceph osd out osd.$o
653 done
654 # Primary might change before backfill starts
655 sleep 2
656 primary=$(get_primary $poolname obj1)
657 ceph osd unset nobackfill
658 ceph tell osd.$primary get_latest_osdmap
659 ceph tell osd.$primary debug kick_recovery_wq 0
660 sleep 2
661 flush_pg_stats
662
663 # Wait for recovery to finish
664 # Can't use wait_for_clean() because state goes from active+undersized+degraded+remapped+backfilling
665 # to active+undersized+remapped
666 while(true)
667 do
668 if test "$(ceph --format json pg dump pgs |
669 jq '.pg_stats | [.[] | .state | select(. == "incomplete")] | length')" -ne "0"
670 then
671 sleep 2
672 continue
673 fi
674 break
675 done
676 ceph pg dump pgs
677 for i in $(seq 1 60)
678 do
679 if ceph pg dump pgs | grep ^$PG | grep -qv backfilling
680 then
681 break
682 fi
683 if [ $i = "60" ];
684 then
685 echo "Timeout waiting for recovery to finish"
686 return 1
687 fi
688 sleep 1
689 done
690
691 ceph pg dump pgs
692
693 local misplaced=$(expr $objects \* 2)
694 check $dir $PG $primary erasure $objects 0 $misplaced 0 || return 1
695
696 delete_pool $poolname
697 kill_daemons $dir || return 1
698 }
699
700
701 # [1,0,2] -> [1,3,2]
702 # degraded 500 -> 0
703 # active+backfilling+degraded
704 #
705 # PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
706 # 1.0 500 0 500 0 0 0 100 100 active+undersized+degraded+remapped+backfilling 2017-11-06 13:57:25.412322 22'500 28:794 [1,3,2] 1 [1,NONE,2] 1 0'0 2017-11-06 13:54:58.033906 0'0 2017-11-06 13:54:58.033906
707 function TEST_backfill_ec_down_out() {
708 local dir=$1
709
710 run_mon $dir a || return 1
711 run_mgr $dir x || return 1
712 run_osd $dir 0 || return 1
713 run_osd $dir 1 || return 1
714 run_osd $dir 2 || return 1
715 run_osd $dir 3 || return 1
716 run_osd $dir 4 || return 1
717 run_osd $dir 5 || return 1
718
719 ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd
720 create_pool $poolname 1 1 erasure myprofile
721 ceph osd pool set $poolname min_size 2
722
723 wait_for_clean || return 1
724
725 for i in $(seq 1 $objects)
726 do
727 rados -p $poolname put obj$i /dev/null
728 done
729
730 local PG=$(get_pg $poolname obj1)
731 # Remember primary during the backfill
732 local primary=$(get_primary $poolname obj1)
733 local otherosd=$(get_not_primary $poolname obj1)
734
735 ceph osd set nobackfill
736 kill $(cat $dir/osd.${otherosd}.pid)
737 ceph osd down osd.${otherosd}
738 ceph osd out osd.${otherosd}
739 # Primary might change before backfill starts
740 sleep 2
741 primary=$(get_primary $poolname obj1)
742 ceph osd unset nobackfill
743 ceph tell osd.$primary get_latest_osdmap
744 ceph tell osd.$primary debug kick_recovery_wq 0
745 sleep 2
746
747 wait_for_clean || return 1
748
749 local misplaced=$(expr $objects \* 2)
750 check $dir $PG $primary erasure $objects 0 0 0 || return 1
751
752 delete_pool $poolname
753 kill_daemons $dir || return 1
754 }
755
756
757 main osd-backfill-stats "$@"
758
759 # Local Variables:
760 # compile-command: "make -j4 && ../qa/run-standalone.sh osd-backfill-stats.sh"
761 # End: