]> git.proxmox.com Git - ceph.git/blob - ceph/qa/standalone/osd-backfill/osd-backfill-stats.sh
import quincy beta 17.1.0
[ceph.git] / ceph / qa / standalone / osd-backfill / osd-backfill-stats.sh
1 #!/usr/bin/env bash
2 #
3 # Copyright (C) 2017 Red Hat <contact@redhat.com>
4 #
5 # Author: David Zafman <dzafman@redhat.com>
6 #
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU Library Public License as published by
9 # the Free Software Foundation; either version 2, or (at your option)
10 # any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU Library Public License for more details.
16 #
17
18 source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
19
20 function run() {
21 local dir=$1
22 shift
23
24 # Fix port????
25 export CEPH_MON="127.0.0.1:7114" # git grep '\<7114\>' : there must be only one
26 export CEPH_ARGS
27 CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
28 CEPH_ARGS+="--mon-host=$CEPH_MON "
29 CEPH_ARGS+="--osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10 "
30 # Use "high_recovery_ops" profile if mclock_scheduler is enabled.
31 CEPH_ARGS+="--osd-mclock-profile=high_recovery_ops "
32 export margin=10
33 export objects=200
34 export poolname=test
35
36 local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
37 for func in $funcs ; do
38 setup $dir || return 1
39 $func $dir || return 1
40 teardown $dir || return 1
41 done
42 }
43
44 function below_margin() {
45 local -i check=$1
46 shift
47 local -i target=$1
48
49 return $(( $check <= $target && $check >= $target - $margin ? 0 : 1 ))
50 }
51
52 function above_margin() {
53 local -i check=$1
54 shift
55 local -i target=$1
56
57 return $(( $check >= $target && $check <= $target + $margin ? 0 : 1 ))
58 }
59
60 FIND_UPACT='grep "pg[[]${PG}.*backfilling.*update_calc_stats " $log | tail -1 | sed "s/.*[)] \([[][^ p]*\).*$/\1/"'
61 FIND_FIRST='grep "pg[[]${PG}.*backfilling.*update_calc_stats $which " $log | grep -F " ${UPACT}${addp}" | grep -v est | head -1 | sed "s/.* \([0-9]*\)$/\1/"'
62 FIND_LAST='grep "pg[[]${PG}.*backfilling.*update_calc_stats $which " $log | tail -1 | sed "s/.* \([0-9]*\)$/\1/"'
63
64 function check() {
65 local dir=$1
66 local PG=$2
67 local primary=$3
68 local type=$4
69 local degraded_start=$5
70 local degraded_end=$6
71 local misplaced_start=$7
72 local misplaced_end=$8
73 local primary_start=${9:-}
74 local primary_end=${10:-}
75 local check_setup=${11:-true}
76
77 local log=$(grep -l +backfilling $dir/osd.$primary.log)
78 if [ $check_setup = "true" ];
79 then
80 local alllogs=$(grep -l +backfilling $dir/osd.*.log)
81 if [ "$(echo "$alllogs" | wc -w)" != "1" ];
82 then
83 echo "Test setup failure, a single OSD should have performed backfill"
84 return 1
85 fi
86 fi
87
88 local addp=" "
89 if [ "$type" = "erasure" ];
90 then
91 addp="p"
92 fi
93
94 UPACT=$(eval $FIND_UPACT)
95 [ -n "$UPACT" ] || return 1
96
97 # Check 3rd line at start because of false recovery starts
98 local which="degraded"
99 FIRST=$(eval $FIND_FIRST)
100 [ -n "$FIRST" ] || return 1
101 below_margin $FIRST $degraded_start || return 1
102 LAST=$(eval $FIND_LAST)
103 [ -n "$LAST" ] || return 1
104 above_margin $LAST $degraded_end || return 1
105
106 # Check 3rd line at start because of false recovery starts
107 which="misplaced"
108 FIRST=$(eval $FIND_FIRST)
109 [ -n "$FIRST" ] || return 1
110 below_margin $FIRST $misplaced_start || return 1
111 LAST=$(eval $FIND_LAST)
112 [ -n "$LAST" ] || return 1
113 above_margin $LAST $misplaced_end || return 1
114
115 # This is the value of set into MISSING_ON_PRIMARY
116 if [ -n "$primary_start" ];
117 then
118 which="shard $primary"
119 FIRST=$(eval $FIND_FIRST)
120 [ -n "$FIRST" ] || return 1
121 below_margin $FIRST $primary_start || return 1
122 LAST=$(eval $FIND_LAST)
123 [ -n "$LAST" ] || return 1
124 above_margin $LAST $primary_end || return 1
125 fi
126 }
127
128 # [1] -> [1, 0, 2]
129 # degraded 1000 -> 0
130 # state: active+undersized+degraded+remapped+backfilling
131
132 # PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
133 # 1.0 500 0 1000 0 0 0 100 100 active+undersized+degraded+remapped+backfilling 2017-10-27 09:44:23.531466 22'500 26:617 [1,0,2] 1 [1] 1 0'0 2017-10-27 09:43:44.654882 0'0 2017-10-27 09:43:44.654882
134 function TEST_backfill_sizeup() {
135 local dir=$1
136
137 run_mon $dir a || return 1
138 run_mgr $dir x || return 1
139 export CEPH_ARGS
140 run_osd $dir 0 || return 1
141 run_osd $dir 1 || return 1
142 run_osd $dir 2 || return 1
143 run_osd $dir 3 || return 1
144 run_osd $dir 4 || return 1
145 run_osd $dir 5 || return 1
146
147 create_pool $poolname 1 1
148 ceph osd pool set $poolname size 1 --yes-i-really-mean-it
149
150 wait_for_clean || return 1
151
152 for i in $(seq 1 $objects)
153 do
154 rados -p $poolname put obj$i /dev/null
155 done
156
157 ceph osd set nobackfill
158 ceph osd pool set $poolname size 3
159 sleep 2
160 ceph osd unset nobackfill
161
162 wait_for_clean || return 1
163
164 local primary=$(get_primary $poolname obj1)
165 local PG=$(get_pg $poolname obj1)
166
167 local degraded=$(expr $objects \* 2)
168 check $dir $PG $primary replicated $degraded 0 0 0 || return 1
169
170 delete_pool $poolname
171 kill_daemons $dir || return 1
172 }
173
174
175
176 # [1] -> [0, 2, 4]
177 # degraded 1000 -> 0
178 # misplaced 500 -> 0
179 # state: active+undersized+degraded+remapped+backfilling
180
181 # PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
182 # 1.0 500 0 1000 500 0 0 100 100 active+undersized+degraded+remapped+backfilling 2017-10-27 09:48:53.326849 22'500 26:603 [0,2,4] 0 [1] 1 0'0 2017-10-27 09:48:13.236253 0'0 2017-10-27 09:48:13.236253
183 function TEST_backfill_sizeup_out() {
184 local dir=$1
185
186 run_mon $dir a || return 1
187 run_mgr $dir x || return 1
188 run_osd $dir 0 || return 1
189 run_osd $dir 1 || return 1
190 run_osd $dir 2 || return 1
191 run_osd $dir 3 || return 1
192 run_osd $dir 4 || return 1
193 run_osd $dir 5 || return 1
194
195 create_pool $poolname 1 1
196 ceph osd pool set $poolname size 1 --yes-i-really-mean-it
197
198 wait_for_clean || return 1
199
200 for i in $(seq 1 $objects)
201 do
202 rados -p $poolname put obj$i /dev/null
203 done
204
205 local PG=$(get_pg $poolname obj1)
206 # Remember primary during the backfill
207 local primary=$(get_primary $poolname obj1)
208
209 ceph osd set nobackfill
210 ceph osd out osd.$primary
211 ceph osd pool set $poolname size 3
212 sleep 2
213 ceph osd unset nobackfill
214
215 wait_for_clean || return 1
216
217 local degraded=$(expr $objects \* 2)
218 check $dir $PG $primary replicated $degraded 0 $objects 0 || return 1
219
220 delete_pool $poolname
221 kill_daemons $dir || return 1
222 }
223
224
225 # [1 0] -> [1,2]/[1,0]
226 # misplaced 500 -> 0
227 # state: active+remapped+backfilling
228
229 # PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
230 # 1.0 500 0 0 500 0 0 100 100 active+remapped+backfilling 2017-10-27 09:51:18.800517 22'500 25:570 [1,2] 1 [1,0] 1 0'0 2017-10-27 09:50:40.441274 0'0 2017-10-27 09:50:40.441274
231 function TEST_backfill_out() {
232 local dir=$1
233
234 run_mon $dir a || return 1
235 run_mgr $dir x || return 1
236 run_osd $dir 0 || return 1
237 run_osd $dir 1 || return 1
238 run_osd $dir 2 || return 1
239 run_osd $dir 3 || return 1
240 run_osd $dir 4 || return 1
241 run_osd $dir 5 || return 1
242
243 create_pool $poolname 1 1
244 ceph osd pool set $poolname size 2
245 sleep 5
246
247 wait_for_clean || return 1
248
249 for i in $(seq 1 $objects)
250 do
251 rados -p $poolname put obj$i /dev/null
252 done
253
254 local PG=$(get_pg $poolname obj1)
255 # Remember primary during the backfill
256 local primary=$(get_primary $poolname obj1)
257
258 ceph osd set nobackfill
259 ceph osd out osd.$(get_not_primary $poolname obj1)
260 sleep 2
261 ceph osd unset nobackfill
262
263 wait_for_clean || return 1
264
265 check $dir $PG $primary replicated 0 0 $objects 0 || return 1
266
267 delete_pool $poolname
268 kill_daemons $dir || return 1
269 }
270
271
272 # [0, 1] -> [0, 2]/[0]
273 # osd 1 down/out
274 # degraded 500 -> 0
275 # state: active+undersized+degraded+remapped+backfilling
276
277 # PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
278 # 1.0 500 0 500 0 0 0 100 100 active+undersized+degraded+remapped+backfilling 2017-10-27 09:53:24.051091 22'500 27:719 [0,2] 0 [0] 0 0'0 2017-10-27 09:52:43.188368 0'0 2017-10-27 09:52:43.188368
279 function TEST_backfill_down_out() {
280 local dir=$1
281
282 run_mon $dir a || return 1
283 run_mgr $dir x || return 1
284 run_osd $dir 0 || return 1
285 run_osd $dir 1 || return 1
286 run_osd $dir 2 || return 1
287 run_osd $dir 3 || return 1
288 run_osd $dir 4 || return 1
289 run_osd $dir 5 || return 1
290
291 create_pool $poolname 1 1
292 ceph osd pool set $poolname size 2
293 sleep 5
294
295 wait_for_clean || return 1
296
297 for i in $(seq 1 $objects)
298 do
299 rados -p $poolname put obj$i /dev/null
300 done
301
302 local PG=$(get_pg $poolname obj1)
303 # Remember primary during the backfill
304 local primary=$(get_primary $poolname obj1)
305 local otherosd=$(get_not_primary $poolname obj1)
306
307 ceph osd set nobackfill
308 kill $(cat $dir/osd.${otherosd}.pid)
309 ceph osd down osd.${otherosd}
310 ceph osd out osd.${otherosd}
311 sleep 2
312 ceph osd unset nobackfill
313
314 wait_for_clean || return 1
315
316 check $dir $PG $primary replicated $objects 0 0 0 || return 1
317
318 delete_pool $poolname
319 kill_daemons $dir || return 1
320 }
321
322
323 # [1, 0] -> [2, 3, 4]
324 # degraded 500 -> 0
325 # misplaced 1000 -> 0
326 # state: active+undersized+degraded+remapped+backfilling
327
328 # PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
329 # 1.0 500 0 500 1000 0 0 100 100 active+undersized+degraded+remapped+backfilling 2017-10-27 09:55:50.375722 23'500 27:553 [2,4,3] 2 [1,0] 1 0'0 2017-10-27 09:55:10.230919 0'0 2017-10-27 09:55:10.230919
330 function TEST_backfill_out2() {
331 local dir=$1
332
333 run_mon $dir a || return 1
334 run_mgr $dir x || return 1
335 run_osd $dir 0 || return 1
336 run_osd $dir 1 || return 1
337 run_osd $dir 2 || return 1
338 run_osd $dir 3 || return 1
339 run_osd $dir 4 || return 1
340 run_osd $dir 5 || return 1
341
342 create_pool $poolname 1 1
343 ceph osd pool set $poolname size 2
344 sleep 5
345
346 wait_for_clean || return 1
347
348 for i in $(seq 1 $objects)
349 do
350 rados -p $poolname put obj$i /dev/null
351 done
352
353 local PG=$(get_pg $poolname obj1)
354 # Remember primary during the backfill
355 local primary=$(get_primary $poolname obj1)
356 local otherosd=$(get_not_primary $poolname obj1)
357
358 ceph osd set nobackfill
359 ceph osd pool set $poolname size 3
360 ceph osd out osd.${otherosd}
361 ceph osd out osd.${primary}
362 # Primary might change before backfill starts
363 sleep 2
364 primary=$(get_primary $poolname obj1)
365 ceph osd unset nobackfill
366 ceph tell osd.$primary get_latest_osdmap
367 ceph tell osd.$primary debug kick_recovery_wq 0
368 sleep 2
369
370 wait_for_clean || return 1
371
372 local misplaced=$(expr $objects \* 2)
373
374 check $dir $PG $primary replicated $objects 0 $misplaced 0 || return 1
375
376 delete_pool $poolname
377 kill_daemons $dir || return 1
378 }
379
380
381 # [0,1] -> [2,4,3]/[0,1]
382 # degraded 1000 -> 0
383 # misplaced 1000 -> 500
384 # state ends at active+clean+remapped [2,4,3]/[2,4,3,0]
385 # PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
386 # 1.0 500 0 1000 1000 0 0 100 100 active+undersized+degraded+remapped+backfilling 2017-10-30 18:21:45.995149 19'500 23:1817 [2,4,3] 2 [0,1] 0 0'0 2017-10-30 18:21:05.109904 0'0 2017-10-30 18:21:05.109904
387 # ENDS:
388 # PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
389 # 1.0 500 0 0 500 0 0 5 5 active+clean+remapped 2017-10-30 18:22:42.293730 19'500 25:2557 [2,4,3] 2 [2,4,3,0] 2 0'0 2017-10-30 18:21:05.109904 0'0 2017-10-30 18:21:05.109904
390 function TEST_backfill_sizeup4_allout() {
391 local dir=$1
392
393 run_mon $dir a || return 1
394 run_mgr $dir x || return 1
395 run_osd $dir 0 || return 1
396 run_osd $dir 1 || return 1
397 run_osd $dir 2 || return 1
398 run_osd $dir 3 || return 1
399 run_osd $dir 4 || return 1
400
401 create_pool $poolname 1 1
402 ceph osd pool set $poolname size 2
403
404 wait_for_clean || return 1
405
406 for i in $(seq 1 $objects)
407 do
408 rados -p $poolname put obj$i /dev/null
409 done
410
411 local PG=$(get_pg $poolname obj1)
412 # Remember primary during the backfill
413 local primary=$(get_primary $poolname obj1)
414 local otherosd=$(get_not_primary $poolname obj1)
415
416 ceph osd set nobackfill
417 ceph osd out osd.$otherosd
418 ceph osd out osd.$primary
419 ceph osd pool set $poolname size 4
420 # Primary might change before backfill starts
421 sleep 2
422 primary=$(get_primary $poolname obj1)
423 ceph osd unset nobackfill
424 ceph tell osd.$primary get_latest_osdmap
425 ceph tell osd.$primary debug kick_recovery_wq 0
426 sleep 2
427
428 wait_for_clean || return 1
429
430 local misdeg=$(expr $objects \* 2)
431 check $dir $PG $primary replicated $misdeg 0 $misdeg $objects || return 1
432
433 delete_pool $poolname
434 kill_daemons $dir || return 1
435 }
436
437
438 # [1,2,0] -> [3]/[1,2]
439 # misplaced 1000 -> 500
440 # state ends at active+clean+remapped [3]/[3,1]
441 # PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
442 # 1.0 500 0 0 1000 0 0 100 100 active+remapped+backfilling 2017-11-28 19:13:56.092439 21'500 31:790 [3] 3 [1,2] 1 0'0 2017-11-28 19:13:28.698661 0'0 2017-11-28 19:13:28.698661
443 function TEST_backfill_remapped() {
444 local dir=$1
445
446 run_mon $dir a || return 1
447 run_mgr $dir x || return 1
448 run_osd $dir 0 || return 1
449 run_osd $dir 1 || return 1
450 run_osd $dir 2 || return 1
451 run_osd $dir 3 || return 1
452
453 create_pool $poolname 1 1
454 ceph osd pool set $poolname size 3
455 sleep 5
456
457 wait_for_clean || return 1
458
459 for i in $(seq 1 $objects)
460 do
461 rados -p $poolname put obj$i /dev/null
462 done
463
464 local PG=$(get_pg $poolname obj1)
465 # Remember primary during the backfill
466 local primary=$(get_primary $poolname obj1)
467 local otherosd=$(get_not_primary $poolname obj1)
468
469 ceph osd set nobackfill
470 ceph osd out osd.${otherosd}
471 for i in $(get_osds $poolname obj1)
472 do
473 if [ $i = $primary -o $i = $otherosd ];
474 then
475 continue
476 fi
477 ceph osd out osd.$i
478 break
479 done
480 ceph osd out osd.${primary}
481 ceph osd pool set $poolname size 2
482 sleep 2
483
484 # primary may change due to invalidating the old pg_temp, which was [1,2,0],
485 # but up_primary (3) chooses [0,1] for acting.
486 primary=$(get_primary $poolname obj1)
487
488 ceph osd unset nobackfill
489 ceph tell osd.$primary get_latest_osdmap
490 ceph tell osd.$primary debug kick_recovery_wq 0
491
492 sleep 2
493
494 wait_for_clean || return 1
495
496 local misplaced=$(expr $objects \* 2)
497
498 check $dir $PG $primary replicated 0 0 $misplaced $objects "" "" false || return 1
499
500 delete_pool $poolname
501 kill_daemons $dir || return 1
502 }
503
504 # [1,0,2] -> [4,3,NONE]/[1,0,2]
505 # misplaced 1500 -> 500
506 # state ends at active+clean+remapped [4,3,NONE]/[4,3,2]
507
508 # PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
509 # 1.0 500 0 0 1500 0 0 100 100 active+degraded+remapped+backfilling 2017-10-31 16:53:39.467126 19'500 23:615 [4,3,NONE] 4 [1,0,2] 1 0'0 2017-10-31 16:52:59.624429 0'0 2017-10-31 16:52:59.624429
510
511
512 # ENDS:
513
514 # PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
515 # 1.0 500 0 0 500 0 0 5 5 active+clean+remapped 2017-10-31 16:48:34.414040 19'500 25:2049 [4,3,NONE] 4 [4,3,2] 4 0'0 2017-10-31 16:46:58.203440 0'0 2017-10-31 16:46:58.203440
516 function TEST_backfill_ec_all_out() {
517 local dir=$1
518
519 run_mon $dir a || return 1
520 run_mgr $dir x || return 1
521 run_osd $dir 0 || return 1
522 run_osd $dir 1 || return 1
523 run_osd $dir 2 || return 1
524 run_osd $dir 3 || return 1
525 run_osd $dir 4 || return 1
526
527 ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd
528 create_pool $poolname 1 1 erasure myprofile
529
530 wait_for_clean || return 1
531
532 for i in $(seq 1 $objects)
533 do
534 rados -p $poolname put obj$i /dev/null
535 done
536
537 local PG=$(get_pg $poolname obj1)
538 # Remember primary during the backfill
539 local primary=$(get_primary $poolname obj1)
540
541 ceph osd set nobackfill
542 for o in $(get_osds $poolname obj1)
543 do
544 ceph osd out osd.$o
545 done
546 # Primary might change before backfill starts
547 sleep 2
548 primary=$(get_primary $poolname obj1)
549 ceph osd unset nobackfill
550 ceph tell osd.$primary get_latest_osdmap
551 ceph tell osd.$primary debug kick_recovery_wq 0
552 sleep 2
553
554 wait_for_clean || return 1
555
556 local misplaced=$(expr $objects \* 3)
557 check $dir $PG $primary erasure 0 0 $misplaced $objects || return 1
558
559 delete_pool $poolname
560 kill_daemons $dir || return 1
561 }
562
563
564 # [1,0,2] -> [4, 0, 2]
565 # misplaced 500 -> 0
566 # active+remapped+backfilling
567 #
568 # PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
569 # 1.0 500 0 0 500 0 0 100 100 active+remapped+backfilling 2017-11-08 18:05:39.036420 24'500 27:742 [4,0,2] 4 [1,0,2] 1 0'0 2017-11-08 18:04:58.697315 0'0 2017-11-08 18:04:58.697315
570 function TEST_backfill_ec_prim_out() {
571 local dir=$1
572
573 run_mon $dir a || return 1
574 run_mgr $dir x || return 1
575 run_osd $dir 0 || return 1
576 run_osd $dir 1 || return 1
577 run_osd $dir 2 || return 1
578 run_osd $dir 3 || return 1
579 run_osd $dir 4 || return 1
580
581 ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd
582 create_pool $poolname 1 1 erasure myprofile
583
584 wait_for_clean || return 1
585
586 for i in $(seq 1 $objects)
587 do
588 rados -p $poolname put obj$i /dev/null
589 done
590
591 local PG=$(get_pg $poolname obj1)
592 # Remember primary during the backfill
593 local primary=$(get_primary $poolname obj1)
594
595 ceph osd set nobackfill
596 ceph osd out osd.$primary
597 # Primary might change before backfill starts
598 sleep 2
599 primary=$(get_primary $poolname obj1)
600 ceph osd unset nobackfill
601 ceph tell osd.$primary get_latest_osdmap
602 ceph tell osd.$primary debug kick_recovery_wq 0
603 sleep 2
604
605 wait_for_clean || return 1
606
607 local misplaced=$(expr $objects \* 3)
608 check $dir $PG $primary erasure 0 0 $objects 0 || return 1
609
610 delete_pool $poolname
611 kill_daemons $dir || return 1
612 }
613
614 # [1,0] -> [1,2]
615 # degraded 500 -> 0
616 # misplaced 1000 -> 0
617 #
618 # PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
619 # 1.0 500 0 500 1000 0 0 100 100 active+undersized+degraded+remapped+backfilling 2017-11-06 14:02:29.439105 24'500 29:1020 [4,3,5] 4 [1,NONE,2] 1 0'0 2017-11-06 14:01:46.509963 0'0 2017-11-06 14:01:46.509963
620 function TEST_backfill_ec_down_all_out() {
621 local dir=$1
622
623 run_mon $dir a || return 1
624 run_mgr $dir x || return 1
625 run_osd $dir 0 || return 1
626 run_osd $dir 1 || return 1
627 run_osd $dir 2 || return 1
628 run_osd $dir 3 || return 1
629 run_osd $dir 4 || return 1
630 run_osd $dir 5 || return 1
631
632 ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd
633 create_pool $poolname 1 1 erasure myprofile
634 ceph osd pool set $poolname min_size 2
635
636 wait_for_clean || return 1
637
638 for i in $(seq 1 $objects)
639 do
640 rados -p $poolname put obj$i /dev/null
641 done
642
643 local PG=$(get_pg $poolname obj1)
644 # Remember primary during the backfill
645 local primary=$(get_primary $poolname obj1)
646 local otherosd=$(get_not_primary $poolname obj1)
647 local allosds=$(get_osds $poolname obj1)
648
649 ceph osd set nobackfill
650 kill $(cat $dir/osd.${otherosd}.pid)
651 ceph osd down osd.${otherosd}
652 for o in $allosds
653 do
654 ceph osd out osd.$o
655 done
656 # Primary might change before backfill starts
657 sleep 2
658 primary=$(get_primary $poolname obj1)
659 ceph osd unset nobackfill
660 ceph tell osd.$primary get_latest_osdmap
661 ceph tell osd.$primary debug kick_recovery_wq 0
662 sleep 2
663 flush_pg_stats
664
665 # Wait for recovery to finish
666 # Can't use wait_for_clean() because state goes from active+undersized+degraded+remapped+backfilling
667 # to active+undersized+remapped
668 while(true)
669 do
670 if test "$(ceph --format json pg dump pgs |
671 jq '.pg_stats | [.[] | .state | select(. == "incomplete")] | length')" -ne "0"
672 then
673 sleep 2
674 continue
675 fi
676 break
677 done
678 ceph pg dump pgs
679 for i in $(seq 1 240)
680 do
681 if ceph pg dump pgs | grep ^$PG | grep -qv backfilling
682 then
683 break
684 fi
685 if [ $i = "240" ];
686 then
687 echo "Timeout waiting for recovery to finish"
688 return 1
689 fi
690 sleep 1
691 done
692
693 ceph pg dump pgs
694
695 local misplaced=$(expr $objects \* 2)
696 check $dir $PG $primary erasure $objects 0 $misplaced 0 || return 1
697
698 delete_pool $poolname
699 kill_daemons $dir || return 1
700 }
701
702
703 # [1,0,2] -> [1,3,2]
704 # degraded 500 -> 0
705 # active+backfilling+degraded
706 #
707 # PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
708 # 1.0 500 0 500 0 0 0 100 100 active+undersized+degraded+remapped+backfilling 2017-11-06 13:57:25.412322 22'500 28:794 [1,3,2] 1 [1,NONE,2] 1 0'0 2017-11-06 13:54:58.033906 0'0 2017-11-06 13:54:58.033906
709 function TEST_backfill_ec_down_out() {
710 local dir=$1
711
712 run_mon $dir a || return 1
713 run_mgr $dir x || return 1
714 run_osd $dir 0 || return 1
715 run_osd $dir 1 || return 1
716 run_osd $dir 2 || return 1
717 run_osd $dir 3 || return 1
718 run_osd $dir 4 || return 1
719 run_osd $dir 5 || return 1
720
721 ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd
722 create_pool $poolname 1 1 erasure myprofile
723 ceph osd pool set $poolname min_size 2
724
725 wait_for_clean || return 1
726
727 for i in $(seq 1 $objects)
728 do
729 rados -p $poolname put obj$i /dev/null
730 done
731
732 local PG=$(get_pg $poolname obj1)
733 # Remember primary during the backfill
734 local primary=$(get_primary $poolname obj1)
735 local otherosd=$(get_not_primary $poolname obj1)
736
737 ceph osd set nobackfill
738 kill $(cat $dir/osd.${otherosd}.pid)
739 ceph osd down osd.${otherosd}
740 ceph osd out osd.${otherosd}
741 # Primary might change before backfill starts
742 sleep 2
743 primary=$(get_primary $poolname obj1)
744 ceph osd unset nobackfill
745 ceph tell osd.$primary get_latest_osdmap
746 ceph tell osd.$primary debug kick_recovery_wq 0
747 sleep 2
748
749 wait_for_clean || return 1
750
751 local misplaced=$(expr $objects \* 2)
752 check $dir $PG $primary erasure $objects 0 0 0 || return 1
753
754 delete_pool $poolname
755 kill_daemons $dir || return 1
756 }
757
758
759 main osd-backfill-stats "$@"
760
761 # Local Variables:
762 # compile-command: "make -j4 && ../qa/run-standalone.sh osd-backfill-stats.sh"
763 # End: