3 # Copyright (C) 2014 Red Hat <contact@redhat.com>
5 # Author: Loic Dachary <loic@dachary.org>
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU Library Public License as published by
9 # the Free Software Foundation; either version 2, or (at your option)
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU Library Public License for more details.
18 source $CEPH_ROOT/qa
/standalone
/ceph-helpers.sh
20 if [ `uname` = FreeBSD
]; then
21 # erasure coding overwrites are only tested on Bluestore
22 # erasure coding on filestore is unsafe
23 # http://docs.ceph.com/en/latest/rados/operations/erasure-code/#erasure-coding-with-overwrites
24 use_ec_overwrite
=false
29 # Test development and debugging
30 # Set to "yes" in order to ignore diff errors and save results to update test
33 # Filter out mtime and local_mtime dates, version, prior_version and last_reqid (client) from any object_info.
34 jqfilter
='def walk(f):
36 | if type == "object" then
38 ( {}; . + { ($key): ($in[$key] | walk(f)) } ) | f
39 elif type == "array" then map( walk(f) ) | f
42 walk(if type == "object" then del(.mtime) else . end)
43 | walk(if type == "object" then del(.local_mtime) else . end)
44 | walk(if type == "object" then del(.last_reqid) else . end)
45 | walk(if type == "object" then del(.version) else . end)
46 | walk(if type == "object" then del(.prior_version) else . end)'
48 sortkeys
='import json; import sys ; JSON=sys.stdin.read() ; ud = json.loads(JSON) ; print(json.dumps(ud, sort_keys=True, indent=2))'
54 export CEPH_MON
="127.0.0.1:7107" # git grep '\<7107\>' : there must be only one
56 CEPH_ARGS
+="--fsid=$(uuidgen) --auth-supported=none "
57 CEPH_ARGS
+="--mon-host=$CEPH_MON "
58 CEPH_ARGS
+="--osd-skip-data-digest=false "
60 export -n CEPH_CLI_TEST_DUP_COMMAND
61 local funcs
=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
62 for func
in $funcs ; do
63 setup
$dir ||
return 1
64 $func $dir ||
return 1
65 teardown
$dir ||
return 1
69 function add_something
() {
72 local obj
=${3:-SOMETHING}
73 local scrub
=${4:-noscrub}
75 if [ "$scrub" = "noscrub" ];
77 ceph osd
set noscrub ||
return 1
78 ceph osd
set nodeep-scrub ||
return 1
80 ceph osd
unset noscrub ||
return 1
81 ceph osd
unset nodeep-scrub ||
return 1
85 echo $payload > $dir/ORIGINAL
86 rados
--pool $poolname put
$obj $dir/ORIGINAL ||
return 1
90 # Corrupt one copy of a replicated pool
92 function TEST_corrupt_and_repair_replicated
() {
96 run_mon
$dir a
--osd_pool_default_size=2 ||
return 1
97 run_mgr
$dir x ||
return 1
98 run_osd
$dir 0 ||
return 1
99 run_osd
$dir 1 ||
return 1
100 create_rbd_pool ||
return 1
101 wait_for_clean ||
return 1
103 add_something
$dir $poolname ||
return 1
104 corrupt_and_repair_one
$dir $poolname $
(get_not_primary
$poolname SOMETHING
) ||
return 1
105 # Reproduces http://tracker.ceph.com/issues/8914
106 corrupt_and_repair_one
$dir $poolname $
(get_primary
$poolname SOMETHING
) ||
return 1
110 # Allow repair to be scheduled when some recovering is still undergoing on the same OSD
112 function TEST_allow_repair_during_recovery
() {
116 run_mon
$dir a
--osd_pool_default_size=2 ||
return 1
117 run_mgr
$dir x ||
return 1
118 run_osd
$dir 0 --osd_scrub_during_recovery=false \
119 --osd_repair_during_recovery=true \
120 --osd_debug_pretend_recovery_active=true ||
return 1
121 run_osd
$dir 1 --osd_scrub_during_recovery=false \
122 --osd_repair_during_recovery=true \
123 --osd_debug_pretend_recovery_active=true ||
return 1
124 create_rbd_pool ||
return 1
125 wait_for_clean ||
return 1
127 add_something
$dir $poolname ||
return 1
128 corrupt_and_repair_one
$dir $poolname $
(get_not_primary
$poolname SOMETHING
) ||
return 1
132 # Skip non-repair scrub correctly during recovery
134 function TEST_skip_non_repair_during_recovery
() {
138 run_mon
$dir a
--osd_pool_default_size=2 ||
return 1
139 run_mgr
$dir x ||
return 1
140 run_osd
$dir 0 --osd_scrub_during_recovery=false \
141 --osd_repair_during_recovery=true \
142 --osd_debug_pretend_recovery_active=true ||
return 1
143 run_osd
$dir 1 --osd_scrub_during_recovery=false \
144 --osd_repair_during_recovery=true \
145 --osd_debug_pretend_recovery_active=true ||
return 1
146 create_rbd_pool ||
return 1
147 wait_for_clean ||
return 1
149 add_something
$dir $poolname ||
return 1
150 scrub_and_not_schedule
$dir $poolname $
(get_not_primary
$poolname SOMETHING
) ||
return 1
153 function scrub_and_not_schedule
() {
159 # 1) start a non-repair scrub
161 local pg
=$
(get_pg
$poolname SOMETHING
)
162 local last_scrub
=$
(get_last_scrub_stamp
$pg)
166 # 2) Assure the scrub is not scheduled
168 for ((i
=0; i
< 3; i
++)); do
169 if test "$(get_last_scrub_stamp $pg)" '>' "$last_scrub" ; then
176 # 3) Access to the file must OK
178 objectstore_tool
$dir $osd SOMETHING list-attrs ||
return 1
179 rados
--pool $poolname get SOMETHING
$dir/COPY ||
return 1
180 diff $dir/ORIGINAL
$dir/COPY ||
return 1
183 function corrupt_and_repair_two
() {
190 # 1) remove the corresponding file from the OSDs
193 run_in_background pids objectstore_tool
$dir $first SOMETHING remove
194 run_in_background pids objectstore_tool
$dir $second SOMETHING remove
197 if [ $return_code -ne 0 ]; then return $return_code; fi
202 local pg
=$
(get_pg
$poolname SOMETHING
)
205 # 3) The files must be back
208 run_in_background pids objectstore_tool
$dir $first SOMETHING list-attrs
209 run_in_background pids objectstore_tool
$dir $second SOMETHING list-attrs
212 if [ $return_code -ne 0 ]; then return $return_code; fi
214 rados
--pool $poolname get SOMETHING
$dir/COPY ||
return 1
215 diff $dir/ORIGINAL
$dir/COPY ||
return 1
220 # 2) remove the corresponding file from a designated OSD
222 # 4) check that the file has been restored in the designated OSD
224 function corrupt_and_repair_one
() {
230 # 1) remove the corresponding file from the OSD
232 objectstore_tool
$dir $osd SOMETHING remove ||
return 1
236 local pg
=$
(get_pg
$poolname SOMETHING
)
239 # 3) The file must be back
241 objectstore_tool
$dir $osd SOMETHING list-attrs ||
return 1
242 rados
--pool $poolname get SOMETHING
$dir/COPY ||
return 1
243 diff $dir/ORIGINAL
$dir/COPY ||
return 1
246 function corrupt_and_repair_erasure_coded
() {
250 add_something
$dir $poolname ||
return 1
252 local primary
=$
(get_primary
$poolname SOMETHING
)
253 local -a osds
=($
(get_osds
$poolname SOMETHING |
sed -e "s/$primary//"))
254 local not_primary_first
=${osds[0]}
255 local not_primary_second
=${osds[1]}
257 # Reproduces http://tracker.ceph.com/issues/10017
258 corrupt_and_repair_one
$dir $poolname $primary ||
return 1
259 # Reproduces http://tracker.ceph.com/issues/10409
260 corrupt_and_repair_one
$dir $poolname $not_primary_first ||
return 1
261 corrupt_and_repair_two
$dir $poolname $not_primary_first $not_primary_second ||
return 1
262 corrupt_and_repair_two
$dir $poolname $primary $not_primary_first ||
return 1
266 function auto_repair_erasure_coded
() {
268 local allow_overwrites
=$2
269 local poolname
=ecpool
271 # Launch a cluster with 5 seconds scrub interval
272 run_mon
$dir a ||
return 1
273 run_mgr
$dir x ||
return 1
274 local ceph_osd_args
="--osd-scrub-auto-repair=true \
275 --osd-deep-scrub-interval=5 \
276 --osd-scrub-max-interval=5 \
277 --osd-scrub-min-interval=5 \
278 --osd-scrub-interval-randomize-ratio=0"
279 for id
in $
(seq 0 2) ; do
280 if [ "$allow_overwrites" = "true" ]; then
281 run_osd
$dir $id $ceph_osd_args ||
return 1
283 run_osd_filestore
$dir $id $ceph_osd_args ||
return 1
286 create_rbd_pool ||
return 1
287 wait_for_clean ||
return 1
290 create_ec_pool
$poolname $allow_overwrites k
=2 m
=1 ||
return 1
294 echo $payload > $dir/ORIGINAL
295 rados
--pool $poolname put SOMETHING
$dir/ORIGINAL ||
return 1
297 # Remove the object from one shard physically
298 # Restarted osd get $ceph_osd_args passed
299 objectstore_tool
$dir $
(get_not_primary
$poolname SOMETHING
) SOMETHING remove ||
return 1
300 # Wait for auto repair
301 local pgid
=$
(get_pg
$poolname SOMETHING
)
302 wait_for_scrub
$pgid "$(get_last_scrub_stamp $pgid)"
303 wait_for_clean ||
return 1
304 # Verify - the file should be back
305 # Restarted osd get $ceph_osd_args passed
306 objectstore_tool
$dir $
(get_not_primary
$poolname SOMETHING
) SOMETHING list-attrs ||
return 1
307 rados
--pool $poolname get SOMETHING
$dir/COPY ||
return 1
308 diff $dir/ORIGINAL
$dir/COPY ||
return 1
311 function TEST_auto_repair_erasure_coded_appends
() {
312 auto_repair_erasure_coded
$1 false
315 function TEST_auto_repair_erasure_coded_overwrites
() {
316 if [ "$use_ec_overwrite" = "true" ]; then
317 auto_repair_erasure_coded
$1 true
321 # initiate a scrub, then check for the (expected) 'scrubbing' and the
322 # (not expected until an error was identified) 'repair'
323 # Arguments: osd#, pg, sleep time
324 function initiate_and_fetch_state
() {
325 local the_osd
="osd.$1"
327 local last_scrub
=$
(get_last_scrub_stamp
$pgid)
329 set_config
"osd" "$1" "osd_scrub_sleep" "$3"
330 set_config
"osd" "$1" "osd_scrub_auto_repair" "true"
335 # note: must initiate a "regular" (periodic) deep scrub - not an operator-initiated one
336 env CEPH_ARGS
= ceph
--format json daemon $
(get_asok_path
$the_osd) deep_scrub
"$pgid"
337 env CEPH_ARGS
= ceph
--format json daemon $
(get_asok_path
$the_osd) scrub
"$pgid"
339 # wait for 'scrubbing' to appear
340 for ((i
=0; i
< 80; i
++)); do
342 st
=`ceph pg $pgid query --format json | jq '.state' `
343 echo $i ") state now: " $st
346 *scrubbing
*repair
* ) echo "found scrub+repair"; return 1;; # PR #41258 should have prevented this
347 *scrubbing
* ) echo "found scrub"; return 0;;
348 *inconsistent
* ) echo "Got here too late. Scrub has already finished"; return 1;;
349 *recovery
* ) echo "Got here too late. Scrub has already finished."; return 1;;
353 if [ $
((i
% 10)) == 4 ]; then
354 echo "loop --------> " $i
359 echo "Timeout waiting for deep-scrub of " $pgid " on " $the_osd " to start"
363 function wait_end_of_scrub
() { # osd# pg
364 local the_osd
="osd.$1"
367 for ((i
=0; i
< 40; i
++)); do
368 st
=`ceph pg $pgid query --format json | jq '.state' `
369 echo "wait-scrub-end state now: " $st
370 [[ $st =~
(.
*scrubbing.
*) ]] ||
break
371 if [ $
((i
% 5)) == 4 ] ; then
377 if [[ $st =~
(.
*scrubbing.
*) ]]
386 function TEST_auto_repair_bluestore_tag
() {
388 local poolname
=testpool
390 # Launch a cluster with 3 seconds scrub interval
391 run_mon
$dir a ||
return 1
392 run_mgr
$dir x ||
return 1
393 # Set scheduler to "wpq" until there's a reliable way to query scrub states
394 # with "--osd-scrub-sleep" set to 0. The "mclock_scheduler" overrides the
395 # scrub sleep to 0 and as a result the checks in the test fail.
396 local ceph_osd_args
="--osd-scrub-auto-repair=true \
397 --osd_deep_scrub_randomize_ratio=0 \
398 --osd-scrub-interval-randomize-ratio=0 \
400 for id
in $
(seq 0 2) ; do
401 run_osd
$dir $id $ceph_osd_args ||
return 1
404 create_pool
$poolname 1 1 ||
return 1
405 ceph osd pool
set $poolname size
2
406 wait_for_clean ||
return 1
410 echo $payload > $dir/ORIGINAL
411 rados
--pool $poolname put SOMETHING
$dir/ORIGINAL ||
return 1
413 # Remove the object from one shard physically
414 # Restarted osd get $ceph_osd_args passed
415 objectstore_tool
$dir $
(get_not_primary
$poolname SOMETHING
) SOMETHING remove ||
return 1
417 local pgid
=$
(get_pg
$poolname SOMETHING
)
418 local primary
=$
(get_primary
$poolname SOMETHING
)
419 echo "Affected PG " $pgid " w/ primary " $primary
420 local last_scrub_stamp
="$(get_last_scrub_stamp $pgid)"
421 initiate_and_fetch_state
$primary $pgid "3.0"
423 echo "initiate_and_fetch_state ret: " $r
424 set_config
"osd" "$1" "osd_scrub_sleep" "0"
425 if [ $r -ne 0 ]; then
429 wait_end_of_scrub
"$primary" "$pgid" ||
return 1
432 # Verify - the file should be back
433 # Restarted osd get $ceph_osd_args passed
434 objectstore_tool
$dir $
(get_not_primary
$poolname SOMETHING
) SOMETHING list-attrs ||
return 1
435 objectstore_tool
$dir $
(get_not_primary
$poolname SOMETHING
) SOMETHING get-bytes
$dir/COPY ||
return 1
436 diff $dir/ORIGINAL
$dir/COPY ||
return 1
437 grep scrub_finish
$dir/osd.
${primary}.log
441 function TEST_auto_repair_bluestore_basic
() {
443 local poolname
=testpool
445 # Launch a cluster with 5 seconds scrub interval
446 run_mon
$dir a ||
return 1
447 run_mgr
$dir x ||
return 1
448 local ceph_osd_args
="--osd-scrub-auto-repair=true \
449 --osd_deep_scrub_randomize_ratio=0 \
450 --osd-scrub-interval-randomize-ratio=0"
451 for id
in $
(seq 0 2) ; do
452 run_osd
$dir $id $ceph_osd_args ||
return 1
455 create_pool
$poolname 1 1 ||
return 1
456 ceph osd pool
set $poolname size
2
457 wait_for_clean ||
return 1
461 echo $payload > $dir/ORIGINAL
462 rados
--pool $poolname put SOMETHING
$dir/ORIGINAL ||
return 1
464 # Remove the object from one shard physically
465 # Restarted osd get $ceph_osd_args passed
466 objectstore_tool
$dir $
(get_not_primary
$poolname SOMETHING
) SOMETHING remove ||
return 1
468 local pgid
=$
(get_pg
$poolname SOMETHING
)
469 local primary
=$
(get_primary
$poolname SOMETHING
)
470 local last_scrub_stamp
="$(get_last_scrub_stamp $pgid)"
471 ceph tell
$pgid deep_scrub
472 ceph tell
$pgid scrub
474 # Wait for auto repair
475 wait_for_scrub
$pgid "$last_scrub_stamp" ||
return 1
476 wait_for_clean ||
return 1
478 # Verify - the file should be back
479 # Restarted osd get $ceph_osd_args passed
480 objectstore_tool
$dir $
(get_not_primary
$poolname SOMETHING
) SOMETHING list-attrs ||
return 1
481 objectstore_tool
$dir $
(get_not_primary
$poolname SOMETHING
) SOMETHING get-bytes
$dir/COPY ||
return 1
482 diff $dir/ORIGINAL
$dir/COPY ||
return 1
483 grep scrub_finish
$dir/osd.
${primary}.log
486 function TEST_auto_repair_bluestore_scrub
() {
488 local poolname
=testpool
490 # Launch a cluster with 5 seconds scrub interval
491 run_mon
$dir a ||
return 1
492 run_mgr
$dir x ||
return 1
493 local ceph_osd_args
="--osd-scrub-auto-repair=true \
494 --osd_deep_scrub_randomize_ratio=0 \
495 --osd-scrub-interval-randomize-ratio=0 \
496 --osd-scrub-backoff-ratio=0"
497 for id
in $
(seq 0 2) ; do
498 run_osd
$dir $id $ceph_osd_args ||
return 1
501 create_pool
$poolname 1 1 ||
return 1
502 ceph osd pool
set $poolname size
2
503 wait_for_clean ||
return 1
507 echo $payload > $dir/ORIGINAL
508 rados
--pool $poolname put SOMETHING
$dir/ORIGINAL ||
return 1
510 # Remove the object from one shard physically
511 # Restarted osd get $ceph_osd_args passed
512 objectstore_tool
$dir $
(get_not_primary
$poolname SOMETHING
) SOMETHING remove ||
return 1
514 local pgid
=$
(get_pg
$poolname SOMETHING
)
515 local primary
=$
(get_primary
$poolname SOMETHING
)
516 local last_scrub_stamp
="$(get_last_scrub_stamp $pgid)"
517 ceph tell
$pgid scrub
519 # Wait for scrub -> auto repair
520 wait_for_scrub
$pgid "$last_scrub_stamp" ||
return 1
522 # Actually this causes 2 scrubs, so we better wait a little longer
524 wait_for_clean ||
return 1
526 # Verify - the file should be back
527 # Restarted osd get $ceph_osd_args passed
528 objectstore_tool
$dir $
(get_not_primary
$poolname SOMETHING
) SOMETHING list-attrs ||
return 1
529 rados
--pool $poolname get SOMETHING
$dir/COPY ||
return 1
530 diff $dir/ORIGINAL
$dir/COPY ||
return 1
531 grep scrub_finish
$dir/osd.
${primary}.log
533 # This should have caused 1 object to be repaired
534 COUNT
=$
(ceph pg
$pgid query | jq
'.info.stats.stat_sum.num_objects_repaired')
535 test "$COUNT" = "1" ||
return 1
538 function TEST_auto_repair_bluestore_failed
() {
540 local poolname
=testpool
542 # Launch a cluster with 5 seconds scrub interval
543 run_mon
$dir a ||
return 1
544 run_mgr
$dir x ||
return 1
545 local ceph_osd_args
="--osd-scrub-auto-repair=true \
546 --osd_deep_scrub_randomize_ratio=0 \
547 --osd-scrub-interval-randomize-ratio=0"
548 for id
in $
(seq 0 2) ; do
549 run_osd
$dir $id $ceph_osd_args ||
return 1
552 create_pool
$poolname 1 1 ||
return 1
553 ceph osd pool
set $poolname size
2
554 wait_for_clean ||
return 1
558 echo $payload > $dir/ORIGINAL
561 rados
--pool $poolname put obj
$i $dir/ORIGINAL ||
return 1
564 # Remove the object from one shard physically
565 # Restarted osd get $ceph_osd_args passed
566 objectstore_tool
$dir $
(get_not_primary
$poolname SOMETHING
) obj1 remove ||
return 1
567 # obj2 can't be repaired
568 objectstore_tool
$dir $
(get_not_primary
$poolname SOMETHING
) obj2 remove ||
return 1
569 objectstore_tool
$dir $
(get_primary
$poolname SOMETHING
) obj2 rm-attr _ ||
return 1
571 local pgid
=$
(get_pg
$poolname obj1
)
572 local primary
=$
(get_primary
$poolname obj1
)
573 local last_scrub_stamp
="$(get_last_scrub_stamp $pgid)"
574 ceph tell
$pgid deep_scrub
575 ceph tell
$pgid scrub
577 # Wait for auto repair
578 wait_for_scrub
$pgid "$last_scrub_stamp" ||
return 1
579 wait_for_clean ||
return 1
581 grep scrub_finish
$dir/osd.
${primary}.log
582 grep -q "scrub_finish.*still present after re-scrub" $dir/osd.
${primary}.log ||
return 1
584 ceph pg dump pgs |
grep -q "^${pgid}.*+failed_repair" ||
return 1
586 # Verify - obj1 should be back
587 # Restarted osd get $ceph_osd_args passed
588 objectstore_tool
$dir $
(get_not_primary
$poolname obj1
) obj1 list-attrs ||
return 1
589 rados
--pool $poolname get obj1
$dir/COPY ||
return 1
590 diff $dir/ORIGINAL
$dir/COPY ||
return 1
591 grep scrub_finish
$dir/osd.
${primary}.log
594 objectstore_tool
$dir $
(get_primary
$poolname SOMETHING
) obj2 remove ||
return 1
600 ceph pg dump pgs |
grep -q -e "^${pgid}.* active+clean " -e "^${pgid}.* active+clean+wait " ||
return 1
601 grep scrub_finish
$dir/osd.
${primary}.log
604 function TEST_auto_repair_bluestore_failed_norecov
() {
606 local poolname
=testpool
608 # Launch a cluster with 5 seconds scrub interval
609 run_mon
$dir a ||
return 1
610 run_mgr
$dir x ||
return 1
611 local ceph_osd_args
="--osd-scrub-auto-repair=true \
612 --osd_deep_scrub_randomize_ratio=0 \
613 --osd-scrub-interval-randomize-ratio=0"
614 for id
in $
(seq 0 2) ; do
615 run_osd
$dir $id $ceph_osd_args ||
return 1
618 create_pool
$poolname 1 1 ||
return 1
619 ceph osd pool
set $poolname size
2
620 wait_for_clean ||
return 1
624 echo $payload > $dir/ORIGINAL
627 rados
--pool $poolname put obj
$i $dir/ORIGINAL ||
return 1
630 # Remove the object from one shard physically
631 # Restarted osd get $ceph_osd_args passed
632 # obj1 can't be repaired
633 objectstore_tool
$dir $
(get_not_primary
$poolname SOMETHING
) obj1 remove ||
return 1
634 objectstore_tool
$dir $
(get_primary
$poolname SOMETHING
) obj1 rm-attr _ ||
return 1
635 # obj2 can't be repaired
636 objectstore_tool
$dir $
(get_not_primary
$poolname SOMETHING
) obj2 remove ||
return 1
637 objectstore_tool
$dir $
(get_primary
$poolname SOMETHING
) obj2 rm-attr _ ||
return 1
639 local pgid
=$
(get_pg
$poolname obj1
)
640 local primary
=$
(get_primary
$poolname obj1
)
641 local last_scrub_stamp
="$(get_last_scrub_stamp $pgid)"
642 ceph tell
$pgid deep_scrub
643 ceph tell
$pgid scrub
645 # Wait for auto repair
646 wait_for_scrub
$pgid "$last_scrub_stamp" ||
return 1
647 wait_for_clean ||
return 1
649 grep -q "scrub_finish.*present with no repair possible" $dir/osd.
${primary}.log ||
return 1
651 ceph pg dump pgs |
grep -q "^${pgid}.*+failed_repair" ||
return 1
654 function TEST_repair_stats
() {
656 local poolname
=testpool
659 # This need to be an even number
662 # Launch a cluster with 5 seconds scrub interval
663 run_mon
$dir a ||
return 1
664 run_mgr
$dir x ||
return 1
665 local ceph_osd_args
="--osd_deep_scrub_randomize_ratio=0 \
666 --osd-scrub-interval-randomize-ratio=0"
667 for id
in $
(seq 0 $
(expr $OSDS - 1)) ; do
668 run_osd
$dir $id $ceph_osd_args ||
return 1
671 create_pool
$poolname 1 1 ||
return 1
672 ceph osd pool
set $poolname size
2
673 wait_for_clean ||
return 1
677 echo $payload > $dir/ORIGINAL
678 for i
in $
(seq 1 $OBJS)
680 rados
--pool $poolname put obj
$i $dir/ORIGINAL ||
return 1
683 # Remove the object from one shard physically
684 # Restarted osd get $ceph_osd_args passed
685 local other
=$
(get_not_primary
$poolname obj1
)
686 local pgid
=$
(get_pg
$poolname obj1
)
687 local primary
=$
(get_primary
$poolname obj1
)
689 kill_daemons
$dir TERM osd.
$other >&2 < /dev
/null ||
return 1
690 kill_daemons
$dir TERM osd.
$primary >&2 < /dev
/null ||
return 1
691 for i
in $
(seq 1 $REPAIRS)
693 # Remove from both osd.0 and osd.1
695 _objectstore_tool_nodown
$dir $OSD obj
$i remove ||
return 1
697 activate_osd
$dir $primary $ceph_osd_args ||
return 1
698 activate_osd
$dir $other $ceph_osd_args ||
return 1
699 wait_for_clean ||
return 1
702 wait_for_clean ||
return 1
706 # This should have caused 1 object to be repaired
707 ceph pg
$pgid query | jq
'.info.stats.stat_sum'
708 COUNT
=$
(ceph pg
$pgid query | jq
'.info.stats.stat_sum.num_objects_repaired')
709 test "$COUNT" = "$REPAIRS" ||
return 1
711 ceph pg dump
--format=json-pretty | jq
".pg_map.osd_stats[] | select(.osd == $primary )"
712 COUNT
=$
(ceph pg dump
--format=json-pretty | jq
".pg_map.osd_stats[] | select(.osd == $primary ).num_shards_repaired")
713 test "$COUNT" = "$(expr $REPAIRS / 2)" ||
return 1
715 ceph pg dump
--format=json-pretty | jq
".pg_map.osd_stats[] | select(.osd == $other )"
716 COUNT
=$
(ceph pg dump
--format=json-pretty | jq
".pg_map.osd_stats[] | select(.osd == $other ).num_shards_repaired")
717 test "$COUNT" = "$(expr $REPAIRS / 2)" ||
return 1
719 ceph pg dump
--format=json-pretty | jq
".pg_map.osd_stats_sum"
720 COUNT
=$
(ceph pg dump
--format=json-pretty | jq
".pg_map.osd_stats_sum.num_shards_repaired")
721 test "$COUNT" = "$REPAIRS" ||
return 1
724 function TEST_repair_stats_ec
() {
726 local poolname
=testpool
729 # This need to be an even number
731 local allow_overwrites
=false
733 # Launch a cluster with 5 seconds scrub interval
734 run_mon
$dir a ||
return 1
735 run_mgr
$dir x ||
return 1
736 local ceph_osd_args
="--osd_deep_scrub_randomize_ratio=0 \
737 --osd-scrub-interval-randomize-ratio=0"
738 for id
in $
(seq 0 $
(expr $OSDS - 1)) ; do
739 run_osd
$dir $id $ceph_osd_args ||
return 1
743 create_ec_pool
$poolname $allow_overwrites k
=2 m
=1 ||
return 1
747 echo $payload > $dir/ORIGINAL
748 for i
in $
(seq 1 $OBJS)
750 rados
--pool $poolname put obj
$i $dir/ORIGINAL ||
return 1
753 # Remove the object from one shard physically
754 # Restarted osd get $ceph_osd_args passed
755 local other
=$
(get_not_primary
$poolname obj1
)
756 local pgid
=$
(get_pg
$poolname obj1
)
757 local primary
=$
(get_primary
$poolname obj1
)
759 kill_daemons
$dir TERM osd.
$other >&2 < /dev
/null ||
return 1
760 kill_daemons
$dir TERM osd.
$primary >&2 < /dev
/null ||
return 1
761 for i
in $
(seq 1 $REPAIRS)
763 # Remove from both osd.0 and osd.1
765 _objectstore_tool_nodown
$dir $OSD obj
$i remove ||
return 1
767 activate_osd
$dir $primary $ceph_osd_args ||
return 1
768 activate_osd
$dir $other $ceph_osd_args ||
return 1
769 wait_for_clean ||
return 1
772 wait_for_clean ||
return 1
776 # This should have caused 1 object to be repaired
777 ceph pg
$pgid query | jq
'.info.stats.stat_sum'
778 COUNT
=$
(ceph pg
$pgid query | jq
'.info.stats.stat_sum.num_objects_repaired')
779 test "$COUNT" = "$REPAIRS" ||
return 1
781 for osd
in $
(seq 0 $
(expr $OSDS - 1)) ; do
782 if [ $osd = $other -o $osd = $primary ]; then
783 repair
=$
(expr $REPAIRS / 2)
788 ceph pg dump
--format=json-pretty | jq
".pg_map.osd_stats[] | select(.osd == $osd )"
789 COUNT
=$
(ceph pg dump
--format=json-pretty | jq
".pg_map.osd_stats[] | select(.osd == $osd ).num_shards_repaired")
790 test "$COUNT" = "$repair" ||
return 1
793 ceph pg dump
--format=json-pretty | jq
".pg_map.osd_stats_sum"
794 COUNT
=$
(ceph pg dump
--format=json-pretty | jq
".pg_map.osd_stats_sum.num_shards_repaired")
795 test "$COUNT" = "$REPAIRS" ||
return 1
798 function corrupt_and_repair_jerasure
() {
800 local allow_overwrites
=$2
801 local poolname
=ecpool
803 run_mon
$dir a ||
return 1
804 run_mgr
$dir x ||
return 1
805 for id
in $
(seq 0 3) ; do
806 if [ "$allow_overwrites" = "true" ]; then
807 run_osd
$dir $id ||
return 1
809 run_osd_filestore
$dir $id ||
return 1
812 create_rbd_pool ||
return 1
813 wait_for_clean ||
return 1
815 create_ec_pool
$poolname $allow_overwrites k
=2 m
=2 ||
return 1
816 corrupt_and_repair_erasure_coded
$dir $poolname ||
return 1
819 function TEST_corrupt_and_repair_jerasure_appends
() {
820 corrupt_and_repair_jerasure
$1 false
823 function TEST_corrupt_and_repair_jerasure_overwrites
() {
824 if [ "$use_ec_overwrite" = "true" ]; then
825 corrupt_and_repair_jerasure
$1 true
829 function corrupt_and_repair_lrc
() {
831 local allow_overwrites
=$2
832 local poolname
=ecpool
834 run_mon
$dir a ||
return 1
835 run_mgr
$dir x ||
return 1
836 for id
in $
(seq 0 9) ; do
837 if [ "$allow_overwrites" = "true" ]; then
838 run_osd
$dir $id ||
return 1
840 run_osd_filestore
$dir $id ||
return 1
843 create_rbd_pool ||
return 1
844 wait_for_clean ||
return 1
846 create_ec_pool
$poolname $allow_overwrites k
=4 m
=2 l
=3 plugin
=lrc ||
return 1
847 corrupt_and_repair_erasure_coded
$dir $poolname ||
return 1
850 function TEST_corrupt_and_repair_lrc_appends
() {
851 corrupt_and_repair_lrc
$1 false
854 function TEST_corrupt_and_repair_lrc_overwrites
() {
855 if [ "$use_ec_overwrite" = "true" ]; then
856 corrupt_and_repair_lrc
$1 true
860 function unfound_erasure_coded
() {
862 local allow_overwrites
=$2
863 local poolname
=ecpool
866 run_mon
$dir a ||
return 1
867 run_mgr
$dir x ||
return 1
868 for id
in $
(seq 0 3) ; do
869 if [ "$allow_overwrites" = "true" ]; then
870 run_osd
$dir $id ||
return 1
872 run_osd_filestore
$dir $id ||
return 1
876 create_ec_pool
$poolname $allow_overwrites k
=2 m
=2 ||
return 1
878 add_something
$dir $poolname ||
return 1
880 local primary
=$
(get_primary
$poolname SOMETHING
)
881 local -a osds
=($
(get_osds
$poolname SOMETHING |
sed -e "s/$primary//"))
882 local not_primary_first
=${osds[0]}
883 local not_primary_second
=${osds[1]}
884 local not_primary_third
=${osds[2]}
887 # 1) remove the corresponding file from the OSDs
890 run_in_background pids objectstore_tool
$dir $not_primary_first SOMETHING remove
891 run_in_background pids objectstore_tool
$dir $not_primary_second SOMETHING remove
892 run_in_background pids objectstore_tool
$dir $not_primary_third SOMETHING remove
895 if [ $return_code -ne 0 ]; then return $return_code; fi
900 local pg
=$
(get_pg
$poolname SOMETHING
)
905 # it may take a bit to appear due to mon/mgr asynchrony
906 for f
in `seq 1 60`; do
907 ceph
-s |
grep "1/1 objects unfound" && break
910 ceph
-s|
grep "4 up" ||
return 1
911 ceph
-s|
grep "4 in" ||
return 1
912 ceph
-s|
grep "1/1 objects unfound" ||
return 1
915 function TEST_unfound_erasure_coded_appends
() {
916 unfound_erasure_coded
$1 false
919 function TEST_unfound_erasure_coded_overwrites
() {
920 if [ "$use_ec_overwrite" = "true" ]; then
921 unfound_erasure_coded
$1 true
926 # list_missing for EC pool
928 function list_missing_erasure_coded
() {
930 local allow_overwrites
=$2
931 local poolname
=ecpool
933 run_mon
$dir a ||
return 1
934 run_mgr
$dir x ||
return 1
935 for id
in $
(seq 0 2) ; do
936 if [ "$allow_overwrites" = "true" ]; then
937 run_osd
$dir $id ||
return 1
939 run_osd_filestore
$dir $id ||
return 1
942 create_rbd_pool ||
return 1
943 wait_for_clean ||
return 1
945 create_ec_pool
$poolname $allow_overwrites k
=2 m
=1 ||
return 1
947 # Put an object and remove the two shards (including primary)
948 add_something
$dir $poolname MOBJ0 ||
return 1
949 local -a osds0
=($
(get_osds
$poolname MOBJ0
))
951 # Put another object and remove two shards (excluding primary)
952 add_something
$dir $poolname MOBJ1 ||
return 1
953 local -a osds1
=($
(get_osds
$poolname MOBJ1
))
955 # Stop all osd daemons
956 for id
in $
(seq 0 2) ; do
957 kill_daemons
$dir TERM osd.
$id >&2 < /dev
/null ||
return 1
961 ceph-objectstore-tool
--data-path $dir/$id \
962 MOBJ0 remove ||
return 1
964 ceph-objectstore-tool
--data-path $dir/$id \
965 MOBJ0 remove ||
return 1
968 ceph-objectstore-tool
--data-path $dir/$id \
969 MOBJ1 remove ||
return 1
971 ceph-objectstore-tool
--data-path $dir/$id \
972 MOBJ1 remove ||
return 1
974 for id
in $
(seq 0 2) ; do
975 activate_osd
$dir $id >&2 ||
return 1
977 create_rbd_pool ||
return 1
978 wait_for_clean ||
return 1
980 # Get get - both objects should in the same PG
981 local pg
=$
(get_pg
$poolname MOBJ0
)
983 # Repair the PG, which triggers the recovering,
984 # and should mark the object as unfound
987 for i
in $
(seq 0 120) ; do
988 [ $i -lt 60 ] ||
return 1
989 matches
=$
(ceph pg
$pg list_unfound |
egrep "MOBJ0|MOBJ1" |
wc -l)
990 [ $matches -eq 2 ] && break
994 function TEST_list_missing_erasure_coded_appends
() {
995 list_missing_erasure_coded
$1 false
998 function TEST_list_missing_erasure_coded_overwrites
() {
999 if [ "$use_ec_overwrite" = "true" ]; then
1000 list_missing_erasure_coded
$1 true
1005 # Corrupt one copy of a replicated pool
1007 function TEST_corrupt_scrub_replicated
() {
1009 local poolname
=csr_pool
1012 run_mon
$dir a
--osd_pool_default_size=2 ||
return 1
1013 run_mgr
$dir x ||
return 1
1014 run_osd
$dir 0 ||
return 1
1015 run_osd
$dir 1 ||
return 1
1016 create_rbd_pool ||
return 1
1017 wait_for_clean ||
return 1
1019 create_pool foo
1 ||
return 1
1020 create_pool
$poolname 1 1 ||
return 1
1021 wait_for_clean ||
return 1
1023 for i
in $
(seq 1 $total_objs) ; do
1025 add_something
$dir $poolname $objname ||
return 1
1027 rados
--pool $poolname setomapheader
$objname hdr-
$objname ||
return 1
1028 rados
--pool $poolname setomapval
$objname key-
$objname val-
$objname ||
return 1
1031 # Increase file 1 MB + 1KB
1032 dd if=/dev
/zero of
=$dir/new.ROBJ19 bs
=1024 count
=1025
1033 rados
--pool $poolname put
$objname $dir/new.ROBJ19 ||
return 1
1034 rm -f $dir/new.ROBJ19
1036 local pg
=$
(get_pg
$poolname ROBJ0
)
1037 local primary
=$
(get_primary
$poolname ROBJ0
)
1039 # Compute an old omap digest and save oi
1040 CEPH_ARGS
='' ceph daemon $
(get_asok_path osd
.0) \
1041 config
set osd_deep_scrub_update_digest_min_age
0
1042 CEPH_ARGS
='' ceph daemon $
(get_asok_path osd
.1) \
1043 config
set osd_deep_scrub_update_digest_min_age
0
1046 for i
in $
(seq 1 $total_objs) ; do
1049 # Alternate corruption between osd.0 and osd.1
1050 local osd
=$
(expr $i % 2)
1054 # Size (deep scrub data_digest too)
1055 local payload
=UVWXYZZZ
1056 echo $payload > $dir/CORRUPT
1057 objectstore_tool
$dir $osd $objname set-bytes
$dir/CORRUPT ||
return 1
1061 # digest (deep scrub only)
1062 local payload
=UVWXYZ
1063 echo $payload > $dir/CORRUPT
1064 objectstore_tool
$dir $osd $objname set-bytes
$dir/CORRUPT ||
return 1
1069 objectstore_tool
$dir $osd $objname remove ||
return 1
1073 # Modify omap value (deep scrub only)
1074 objectstore_tool
$dir $osd $objname set-omap key-
$objname $dir/CORRUPT ||
return 1
1078 # Delete omap key (deep scrub only)
1079 objectstore_tool
$dir $osd $objname rm-omap key-
$objname ||
return 1
1083 # Add extra omap key (deep scrub only)
1084 echo extra
> $dir/extra-val
1085 objectstore_tool
$dir $osd $objname set-omap key2-
$objname $dir/extra-val ||
return 1
1090 # Modify omap header (deep scrub only)
1091 echo -n newheader
> $dir/hdr
1092 objectstore_tool
$dir $osd $objname set-omaphdr
$dir/hdr ||
return 1
1097 rados
--pool $poolname setxattr
$objname key1-
$objname val1-
$objname ||
return 1
1098 rados
--pool $poolname setxattr
$objname key2-
$objname val2-
$objname ||
return 1
1101 echo -n bad-val
> $dir/bad-val
1102 objectstore_tool
$dir $osd $objname set-attr _key1-
$objname $dir/bad-val ||
return 1
1103 objectstore_tool
$dir $osd $objname rm-attr _key2-
$objname ||
return 1
1104 echo -n val3-
$objname > $dir/newval
1105 objectstore_tool
$dir $osd $objname set-attr _key3-
$objname $dir/newval ||
return 1
1106 rm $dir/bad-val
$dir/newval
1110 objectstore_tool
$dir $osd $objname get-attr _
> $dir/robj9-oi
1111 echo -n D
> $dir/change
1112 rados
--pool $poolname put
$objname $dir/change
1113 objectstore_tool
$dir $osd $objname set-attr _
$dir/robj9-oi
1114 rm $dir/oi
$dir/change
1117 # ROBJ10 must be handled after digests are re-computed by a deep scrub below
1118 # ROBJ11 must be handled with config change before deep scrub
1119 # ROBJ12 must be handled with config change before scrubs
1120 # ROBJ13 must be handled before scrubs
1123 echo -n bad-val
> $dir/bad-val
1124 objectstore_tool
$dir 0 $objname set-attr _
$dir/bad-val ||
return 1
1125 objectstore_tool
$dir 1 $objname rm-attr _ ||
return 1
1130 objectstore_tool
$dir $osd $objname rm-attr _ ||
return 1
1134 objectstore_tool
$dir 0 $objname rm-attr snapset ||
return 1
1135 echo -n bad-val
> $dir/bad-val
1136 objectstore_tool
$dir 1 $objname set-attr snapset
$dir/bad-val ||
return 1
1140 # Deep-scrub only (all replicas are diffent than the object info
1141 local payload
=ROBJ17
1142 echo $payload > $dir/new.ROBJ17
1143 objectstore_tool
$dir 0 $objname set-bytes
$dir/new.ROBJ17 ||
return 1
1144 objectstore_tool
$dir 1 $objname set-bytes
$dir/new.ROBJ17 ||
return 1
1148 # Deep-scrub only (all replicas are diffent than the object info
1149 local payload
=ROBJ18
1150 echo $payload > $dir/new.ROBJ18
1151 objectstore_tool
$dir 0 $objname set-bytes
$dir/new.ROBJ18 ||
return 1
1152 objectstore_tool
$dir 1 $objname set-bytes
$dir/new.ROBJ18 ||
return 1
1153 # Make one replica have a different object info, so a full repair must happen too
1154 objectstore_tool
$dir $osd $objname corrupt-info ||
return 1
1158 # Set osd-max-object-size smaller than this object's size
1163 local pg
=$
(get_pg
$poolname ROBJ0
)
1165 ceph tell osd.\
* injectargs
-- --osd-max-object-size=1048576
1167 inject_eio rep data
$poolname ROBJ11
$dir 0 ||
return 1 # shard 0 of [1, 0], osd.1
1168 inject_eio rep mdata
$poolname ROBJ12
$dir 1 ||
return 1 # shard 1 of [1, 0], osd.0
1169 inject_eio rep mdata
$poolname ROBJ13
$dir 1 ||
return 1 # shard 1 of [1, 0], osd.0
1170 inject_eio rep data
$poolname ROBJ13
$dir 0 ||
return 1 # shard 0 of [1, 0], osd.1
1175 declare -a s_err_strings
1176 err_strings
[0]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:30259878:::ROBJ15:head : candidate had a missing info key"
1177 err_strings
[1]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:33aca486:::ROBJ18:head : object info inconsistent "
1178 err_strings
[2]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:5c7b2c47:::ROBJ16:head : candidate had a corrupt snapset"
1179 err_strings
[3]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:5c7b2c47:::ROBJ16:head : candidate had a missing snapset key"
1180 err_strings
[4]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:5c7b2c47:::ROBJ16:head : failed to pick suitable object info"
1181 err_strings
[5]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:86586531:::ROBJ8:head : attr value mismatch '_key1-ROBJ8', attr name mismatch '_key3-ROBJ8', attr name mismatch '_key2-ROBJ8'"
1182 err_strings
[6]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:bc819597:::ROBJ12:head : candidate had a stat error"
1183 err_strings
[7]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:c0c86b1d:::ROBJ14:head : candidate had a missing info key"
1184 err_strings
[8]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:c0c86b1d:::ROBJ14:head : candidate had a corrupt info"
1185 err_strings
[9]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:c0c86b1d:::ROBJ14:head : failed to pick suitable object info"
1186 err_strings
[10]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:ce3f1d6a:::ROBJ1:head : candidate size 9 info size 7 mismatch"
1187 err_strings
[11]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:ce3f1d6a:::ROBJ1:head : size 9 != size 7 from auth oi 3:ce3f1d6a:::ROBJ1:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od f5fba2c6 alloc_hint [[]0 0 0[]][)], size 9 != size 7 from shard 0"
1188 err_strings
[12]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:d60617f9:::ROBJ13:head : candidate had a stat error"
1189 err_strings
[13]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 3:f2a5b2a4:::ROBJ3:head : missing"
1190 err_strings
[14]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:ffdb2004:::ROBJ9:head : candidate size 1 info size 7 mismatch"
1191 err_strings
[15]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:ffdb2004:::ROBJ9:head : object info inconsistent "
1192 err_strings
[16]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 3:c0c86b1d:::ROBJ14:head : no '_' attr"
1193 err_strings
[17]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 3:5c7b2c47:::ROBJ16:head : can't decode 'snapset' attr .* no longer understand old encoding version 3 < 97: Malformed input"
1194 err_strings
[18]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub : stat mismatch, got 19/19 objects, 0/0 clones, 18/19 dirty, 18/19 omap, 0/0 pinned, 0/0 hit_set_archive, 0/0 whiteouts, 1049713/1049720 bytes, 0/0 manifest objects, 0/0 hit_set_archive bytes."
1195 err_strings
[19]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 1 missing, 8 inconsistent objects"
1196 err_strings
[20]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 18 errors"
1197 err_strings
[21]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:123a5f55:::ROBJ19:head : size 1049600 > 1048576 is too large"
1199 for err_string
in "${err_strings[@]}"
1201 if ! grep -q "$err_string" $dir/osd.
${primary}.log
1203 echo "Missing log message '$err_string'"
1204 ERRORS
=$
(expr $ERRORS + 1)
1208 rados list-inconsistent-pg
$poolname > $dir/json ||
return 1
1210 test $
(jq
'. | length' $dir/json
) = "1" ||
return 1
1212 test $
(jq
-r '.[0]' $dir/json
) = $pg ||
return 1
1214 rados list-inconsistent-obj
$pg > $dir/json ||
return 1
1215 # Get epoch for repair-get requests
1216 epoch
=$
(jq .epoch
$dir/json
)
1218 jq
"$jqfilter" << EOF | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/checkcsjson
1241 "prior_version": "21'3",
1242 "last_reqid": "osd.1.0:57",
1256 "data_digest": "0x2ddbf8f5",
1257 "omap_digest": "0xf5fba2c6",
1258 "expected_object_size": 0,
1259 "expected_write_size": 0,
1260 "alloc_hint_flags": 0,
1268 "size_mismatch_info",
1269 "obj_size_info_mismatch"
1275 "selected_object_info": {
1286 "prior_version": "21'3",
1287 "last_reqid": "osd.1.0:57",
1290 "mtime": "2018-04-05 14:33:19.804040",
1291 "local_mtime": "2018-04-05 14:33:19.804839",
1301 "data_digest": "0x2ddbf8f5",
1302 "omap_digest": "0xf5fba2c6",
1303 "expected_object_size": 0,
1304 "expected_write_size": 0,
1305 "alloc_hint_flags": 0,
1311 "union_shard_errors": [
1312 "size_mismatch_info",
1313 "obj_size_info_mismatch"
1342 "selected_object_info": {
1353 "prior_version": "43'36",
1354 "last_reqid": "osd.1.0:55",
1368 "data_digest": "0x2ddbf8f5",
1369 "omap_digest": "0x067f306a",
1370 "expected_object_size": 0,
1371 "expected_write_size": 0,
1372 "alloc_hint_flags": 0,
1378 "union_shard_errors": [
1406 "selected_object_info": {
1417 "prior_version": "45'39",
1418 "last_reqid": "osd.1.0:58",
1432 "data_digest": "0x2ddbf8f5",
1433 "omap_digest": "0x6441854d",
1434 "expected_object_size": 0,
1435 "expected_write_size": 0,
1436 "alloc_hint_flags": 0,
1442 "union_shard_errors": [
1457 "object_info": "bad-val",
1474 "union_shard_errors": [
1501 "prior_version": "49'45",
1502 "last_reqid": "osd.1.0:48",
1505 "mtime": "2018-04-05 14:33:29.498969",
1506 "local_mtime": "2018-04-05 14:33:29.499890",
1516 "data_digest": "0x2ddbf8f5",
1517 "omap_digest": "0x2d2a4d6e",
1518 "expected_object_size": 0,
1519 "expected_write_size": 0,
1520 "alloc_hint_flags": 0,
1540 "selected_object_info": {
1551 "prior_version": "49'45",
1552 "last_reqid": "osd.1.0:48",
1566 "data_digest": "0x2ddbf8f5",
1567 "omap_digest": "0x2d2a4d6e",
1568 "expected_object_size": 0,
1569 "expected_write_size": 0,
1570 "alloc_hint_flags": 0,
1576 "union_shard_errors": [
1612 "snapset": "bad-val",
1616 "union_shard_errors": [
1623 "object_info_inconsistency"
1631 "selected_object_info": {
1632 "alloc_hint_flags": 255,
1633 "data_digest": "0x2ddbf8f5",
1634 "expected_object_size": 0,
1635 "expected_write_size": 0,
1655 "omap_digest": "0xddc3680f",
1666 "alloc_hint_flags": 0,
1667 "data_digest": "0x2ddbf8f5",
1668 "expected_object_size": 0,
1669 "expected_write_size": 0,
1689 "omap_digest": "0xddc3680f",
1703 "alloc_hint_flags": 255,
1704 "data_digest": "0x2ddbf8f5",
1705 "expected_object_size": 0,
1706 "expected_write_size": 0,
1726 "omap_digest": "0xddc3680f",
1738 "union_shard_errors": []
1751 "union_shard_errors": [],
1752 "selected_object_info": {
1763 "prior_version": "63'58",
1764 "last_reqid": "osd.1.0:58",
1767 "mtime": "2019-08-09T23:33:58.340709+0000",
1768 "local_mtime": "2019-08-09T23:33:58.345676+0000",
1778 "data_digest": "0x3dde0ef3",
1779 "omap_digest": "0xbffddd28",
1780 "expected_object_size": 0,
1781 "expected_write_size": 0,
1782 "alloc_hint_flags": 0,
1819 "selected_object_info": {
1830 "prior_version": "25'9",
1831 "last_reqid": "osd.1.0:60",
1845 "data_digest": "0x2ddbf8f5",
1846 "omap_digest": "0x00b35dfd",
1847 "expected_object_size": 0,
1848 "expected_write_size": 0,
1849 "alloc_hint_flags": 0,
1855 "union_shard_errors": [
1874 "name": "key1-ROBJ8"
1878 "value": "val2-ROBJ8",
1879 "name": "key2-ROBJ8"
1891 "value": "val1-ROBJ8",
1892 "name": "key1-ROBJ8"
1896 "value": "val3-ROBJ8",
1897 "name": "key3-ROBJ8"
1906 "selected_object_info": {
1917 "prior_version": "79'65",
1918 "last_reqid": "client.4554.0:1",
1932 "data_digest": "0x2ddbf8f5",
1933 "omap_digest": "0xd6be81dc",
1934 "expected_object_size": 0,
1935 "expected_write_size": 0,
1936 "alloc_hint_flags": 0,
1942 "union_shard_errors": [],
1944 "attr_value_mismatch",
1945 "attr_name_mismatch"
1969 "prior_version": "51'64",
1970 "last_reqid": "client.4649.0:1",
1984 "data_digest": "0x2b63260d",
1985 "omap_digest": "0x2eecc539",
1986 "expected_object_size": 0,
1987 "expected_write_size": 0,
1988 "alloc_hint_flags": 0,
2011 "prior_version": "37'27",
2012 "last_reqid": "osd.1.0:63",
2015 "mtime": "2018-04-05 14:33:25.352485",
2016 "local_mtime": "2018-04-05 14:33:25.353746",
2026 "data_digest": "0x2ddbf8f5",
2027 "omap_digest": "0x2eecc539",
2028 "expected_object_size": 0,
2029 "expected_write_size": 0,
2030 "alloc_hint_flags": 0,
2038 "obj_size_info_mismatch"
2044 "selected_object_info": {
2055 "prior_version": "51'64",
2056 "last_reqid": "client.4649.0:1",
2070 "data_digest": "0x2b63260d",
2071 "omap_digest": "0x2eecc539",
2072 "expected_object_size": 0,
2073 "expected_write_size": 0,
2074 "alloc_hint_flags": 0,
2080 "union_shard_errors": [
2081 "obj_size_info_mismatch"
2084 "object_info_inconsistency"
2099 jq
"$jqfilter" $dir/json | jq
'.inconsistents' | python3
-c "$sortkeys" > $dir/csjson
2100 multidiff
$dir/checkcsjson
$dir/csjson ||
test $getjson = "yes" ||
return 1
2101 if test $getjson = "yes"
2103 jq
'.' $dir/json
> save1.json
2106 if test "$LOCALRUN" = "yes" && which jsonschema
> /dev
/null
;
2108 jsonschema
-i $dir/json
$CEPH_ROOT/doc
/rados
/command
/list-inconsistent-obj.json ||
return 1
2112 # Change data and size again because digest was recomputed
2113 echo -n ZZZ
> $dir/change
2114 rados
--pool $poolname put
$objname $dir/change
2115 # Set one to an even older value
2116 objectstore_tool
$dir 0 $objname set-attr _
$dir/robj9-oi
2117 rm $dir/oi
$dir/change
2120 objectstore_tool
$dir 1 $objname get-attr _
> $dir/oi
2121 rados
--pool $poolname setomapval
$objname key2-
$objname val2-
$objname
2122 objectstore_tool
$dir 0 $objname set-attr _
$dir/oi
2123 objectstore_tool
$dir 1 $objname set-attr _
$dir/oi
2126 inject_eio rep data
$poolname ROBJ11
$dir 0 ||
return 1 # shard 0 of [1, 0], osd.1
2127 inject_eio rep mdata
$poolname ROBJ12
$dir 1 ||
return 1 # shard 1 of [1, 0], osd.0
2128 inject_eio rep mdata
$poolname ROBJ13
$dir 1 ||
return 1 # shard 1 of [1, 0], osd.0
2129 inject_eio rep data
$poolname ROBJ13
$dir 0 ||
return 1 # shard 0 of [1, 0], osd.1
2131 # ROBJ19 won't error this time
2132 ceph tell osd.\
* injectargs
-- --osd-max-object-size=134217728
2137 err_strings
[0]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:30259878:::ROBJ15:head : candidate had a missing info key"
2138 err_strings
[1]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:33aca486:::ROBJ18:head : data_digest 0xbd89c912 != data_digest 0x2ddbf8f5 from auth oi 3:33aca486:::ROBJ18:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 54 dd 2ddbf8f5 od ddc3680f alloc_hint [[]0 0 255[]][)], object info inconsistent "
2139 err_strings
[2]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:33aca486:::ROBJ18:head : data_digest 0xbd89c912 != data_digest 0x2ddbf8f5 from auth oi 3:33aca486:::ROBJ18:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 54 dd 2ddbf8f5 od ddc3680f alloc_hint [[]0 0 255[]][)]"
2140 err_strings
[3]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:33aca486:::ROBJ18:head : failed to pick suitable auth object"
2141 err_strings
[4]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:5c7b2c47:::ROBJ16:head : candidate had a corrupt snapset"
2142 err_strings
[5]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:5c7b2c47:::ROBJ16:head : candidate had a missing snapset key"
2143 err_strings
[6]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:5c7b2c47:::ROBJ16:head : failed to pick suitable object info"
2144 err_strings
[7]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:86586531:::ROBJ8:head : attr value mismatch '_key1-ROBJ8', attr name mismatch '_key3-ROBJ8', attr name mismatch '_key2-ROBJ8'"
2145 err_strings
[8]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:87abbf36:::ROBJ11:head : candidate had a read error"
2146 err_strings
[9]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:8aa5320e:::ROBJ17:head : data_digest 0x5af0c3ef != data_digest 0x2ddbf8f5 from auth oi 3:8aa5320e:::ROBJ17:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 51 dd 2ddbf8f5 od e9572720 alloc_hint [[]0 0 0[]][)]"
2147 err_strings
[10]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:8aa5320e:::ROBJ17:head : data_digest 0x5af0c3ef != data_digest 0x2ddbf8f5 from auth oi 3:8aa5320e:::ROBJ17:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 51 dd 2ddbf8f5 od e9572720 alloc_hint [[]0 0 0[]][)]"
2148 err_strings
[11]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:8aa5320e:::ROBJ17:head : failed to pick suitable auth object"
2149 err_strings
[12]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:8b55fa4b:::ROBJ7:head : omap_digest 0xefced57a != omap_digest 0x6a73cc07 from shard 1"
2150 err_strings
[13]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:8b55fa4b:::ROBJ7:head : omap_digest 0x6a73cc07 != omap_digest 0xefced57a from auth oi 3:8b55fa4b:::ROBJ7:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 21 dd 2ddbf8f5 od efced57a alloc_hint [[]0 0 0[]][)]"
2151 err_strings
[14]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:a53c12e8:::ROBJ6:head : omap_digest 0x689ee887 != omap_digest 0x179c919f from shard 1, omap_digest 0x689ee887 != omap_digest 0x179c919f from auth oi 3:a53c12e8:::ROBJ6:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 18 dd 2ddbf8f5 od 179c919f alloc_hint [[]0 0 0[]][)]"
2152 err_strings
[15]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:b1f19cbd:::ROBJ10:head : omap_digest 0xa8dd5adc != omap_digest 0xc2025a24 from auth oi 3:b1f19cbd:::ROBJ10:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 30 dd 2ddbf8f5 od c2025a24 alloc_hint [[]0 0 0[]][)]"
2153 err_strings
[16]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:b1f19cbd:::ROBJ10:head : omap_digest 0xa8dd5adc != omap_digest 0xc2025a24 from auth oi 3:b1f19cbd:::ROBJ10:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 30 dd 2ddbf8f5 od c2025a24 alloc_hint [[]0 0 0[]][)]"
2154 err_strings
[17]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:b1f19cbd:::ROBJ10:head : failed to pick suitable auth object"
2155 err_strings
[18]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:bc819597:::ROBJ12:head : candidate had a stat error"
2156 err_strings
[19]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:c0c86b1d:::ROBJ14:head : candidate had a missing info key"
2157 err_strings
[20]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:c0c86b1d:::ROBJ14:head : candidate had a corrupt info"
2158 err_strings
[21]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:c0c86b1d:::ROBJ14:head : failed to pick suitable object info"
2159 err_strings
[22]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:ce3f1d6a:::ROBJ1:head : candidate size 9 info size 7 mismatch"
2160 err_strings
[23]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:ce3f1d6a:::ROBJ1:head : data_digest 0x2d4a11c2 != data_digest 0x2ddbf8f5 from shard 0, data_digest 0x2d4a11c2 != data_digest 0x2ddbf8f5 from auth oi 3:ce3f1d6a:::ROBJ1:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od f5fba2c6 alloc_hint [[]0 0 0[]][)], size 9 != size 7 from auth oi 3:ce3f1d6a:::ROBJ1:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od f5fba2c6 alloc_hint [[]0 0 0[]][)], size 9 != size 7 from shard 0"
2161 err_strings
[24]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:d60617f9:::ROBJ13:head : candidate had a read error"
2162 err_strings
[25]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:d60617f9:::ROBJ13:head : candidate had a stat error"
2163 err_strings
[26]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:d60617f9:::ROBJ13:head : failed to pick suitable object info"
2164 err_strings
[27]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:e97ce31e:::ROBJ2:head : data_digest 0x578a4830 != data_digest 0x2ddbf8f5 from shard 1, data_digest 0x578a4830 != data_digest 0x2ddbf8f5 from auth oi 3:e97ce31e:::ROBJ2:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 6 dd 2ddbf8f5 od f8e11918 alloc_hint [[]0 0 0[]][)]"
2165 err_strings
[28]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 3:f2a5b2a4:::ROBJ3:head : missing"
2166 err_strings
[29]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:f4981d31:::ROBJ4:head : omap_digest 0xd7178dfe != omap_digest 0xe2d46ea4 from shard 1, omap_digest 0xd7178dfe != omap_digest 0xe2d46ea4 from auth oi 3:f4981d31:::ROBJ4:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 12 dd 2ddbf8f5 od e2d46ea4 alloc_hint [[]0 0 0[]][)]"
2167 err_strings
[30]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:f4bfd4d1:::ROBJ5:head : omap_digest 0x1a862a41 != omap_digest 0x6cac8f6 from shard 1"
2168 err_strings
[31]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:f4bfd4d1:::ROBJ5:head : omap_digest 0x6cac8f6 != omap_digest 0x1a862a41 from auth oi 3:f4bfd4d1:::ROBJ5:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 15 dd 2ddbf8f5 od 1a862a41 alloc_hint [[]0 0 0[]][)]"
2169 err_strings
[32]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:ffdb2004:::ROBJ9:head : candidate size 3 info size 7 mismatch"
2170 err_strings
[33]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:ffdb2004:::ROBJ9:head : object info inconsistent "
2171 err_strings
[34]="log_channel[(]cluster[)] log [[]ERR[]] : deep-scrub [0-9]*[.]0 3:c0c86b1d:::ROBJ14:head : no '_' attr"
2172 err_strings
[35]="log_channel[(]cluster[)] log [[]ERR[]] : deep-scrub [0-9]*[.]0 3:5c7b2c47:::ROBJ16:head : can't decode 'snapset' attr .* no longer understand old encoding version 3 < 97: Malformed input"
2173 err_strings
[36]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 deep-scrub : stat mismatch, got 19/19 objects, 0/0 clones, 18/19 dirty, 18/19 omap, 0/0 pinned, 0/0 hit_set_archive, 0/0 whiteouts, 1049715/1049716 bytes, 0/0 manifest objects, 0/0 hit_set_archive bytes."
2174 err_strings
[37]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 deep-scrub 1 missing, 11 inconsistent objects"
2175 err_strings
[38]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 deep-scrub 35 errors"
2177 for err_string
in "${err_strings[@]}"
2179 if ! grep -q "$err_string" $dir/osd.
${primary}.log
2181 echo "Missing log message '$err_string'"
2182 ERRORS
=$
(expr $ERRORS + 1)
2186 rados list-inconsistent-pg
$poolname > $dir/json ||
return 1
2188 test $
(jq
'. | length' $dir/json
) = "1" ||
return 1
2190 test $
(jq
-r '.[0]' $dir/json
) = $pg ||
return 1
2192 rados list-inconsistent-obj
$pg > $dir/json ||
return 1
2193 # Get epoch for repair-get requests
2194 epoch
=$
(jq .epoch
$dir/json
)
2196 jq
"$jqfilter" << EOF | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/checkcsjson
2202 "data_digest": "0x2ddbf8f5",
2203 "omap_digest": "0xf5fba2c6",
2221 "prior_version": "21'3",
2222 "last_reqid": "osd.1.0:57",
2225 "mtime": "2018-04-05 14:33:19.804040",
2226 "local_mtime": "2018-04-05 14:33:19.804839",
2236 "data_digest": "0x2ddbf8f5",
2237 "omap_digest": "0xf5fba2c6",
2238 "expected_object_size": 0,
2239 "expected_write_size": 0,
2240 "alloc_hint_flags": 0,
2246 "data_digest": "0x2d4a11c2",
2247 "omap_digest": "0xf5fba2c6",
2250 "data_digest_mismatch_info",
2251 "size_mismatch_info",
2252 "obj_size_info_mismatch"
2258 "selected_object_info": {
2269 "prior_version": "21'3",
2270 "last_reqid": "osd.1.0:57",
2273 "mtime": "2018-04-05 14:33:19.804040",
2274 "local_mtime": "2018-04-05 14:33:19.804839",
2284 "data_digest": "0x2ddbf8f5",
2285 "omap_digest": "0xf5fba2c6",
2286 "expected_object_size": 0,
2287 "expected_write_size": 0,
2288 "alloc_hint_flags": 0,
2294 "union_shard_errors": [
2295 "data_digest_mismatch_info",
2296 "size_mismatch_info",
2297 "obj_size_info_mismatch"
2300 "data_digest_mismatch",
2314 "data_digest": "0x2ddbf8f5",
2315 "omap_digest": "0xa8dd5adc",
2318 "omap_digest_mismatch_info"
2324 "data_digest": "0x2ddbf8f5",
2325 "omap_digest": "0xa8dd5adc",
2328 "omap_digest_mismatch_info"
2334 "selected_object_info": {
2335 "alloc_hint_flags": 0,
2336 "data_digest": "0x2ddbf8f5",
2337 "expected_object_size": 0,
2338 "expected_write_size": 0,
2358 "omap_digest": "0xc2025a24",
2365 "union_shard_errors": [
2366 "omap_digest_mismatch_info"
2380 "data_digest": "0x2ddbf8f5",
2381 "omap_digest": "0xa03cef03",
2396 "selected_object_info": {
2407 "prior_version": "41'33",
2408 "last_reqid": "osd.1.0:51",
2411 "mtime": "2018-04-05 14:33:26.761286",
2412 "local_mtime": "2018-04-05 14:33:26.762368",
2422 "data_digest": "0x2ddbf8f5",
2423 "omap_digest": "0xa03cef03",
2424 "expected_object_size": 0,
2425 "expected_write_size": 0,
2426 "alloc_hint_flags": 0,
2432 "union_shard_errors": [
2454 "data_digest": "0x2ddbf8f5",
2455 "omap_digest": "0x067f306a",
2462 "selected_object_info": {
2473 "prior_version": "43'36",
2474 "last_reqid": "osd.1.0:55",
2477 "mtime": "2018-04-05 14:33:27.460958",
2478 "local_mtime": "2018-04-05 14:33:27.462109",
2488 "data_digest": "0x2ddbf8f5",
2489 "omap_digest": "0x067f306a",
2490 "expected_object_size": 0,
2491 "expected_write_size": 0,
2492 "alloc_hint_flags": 0,
2498 "union_shard_errors": [
2528 "union_shard_errors": [
2544 "object_info": "bad-val",
2545 "data_digest": "0x2ddbf8f5",
2546 "omap_digest": "0x4f14f849",
2555 "data_digest": "0x2ddbf8f5",
2556 "omap_digest": "0x4f14f849",
2565 "union_shard_errors": [
2592 "prior_version": "49'45",
2593 "last_reqid": "osd.1.0:48",
2596 "mtime": "2018-04-05 14:33:29.498969",
2597 "local_mtime": "2018-04-05 14:33:29.499890",
2607 "data_digest": "0x2ddbf8f5",
2608 "omap_digest": "0x2d2a4d6e",
2609 "expected_object_size": 0,
2610 "expected_write_size": 0,
2611 "alloc_hint_flags": 0,
2617 "data_digest": "0x2ddbf8f5",
2618 "omap_digest": "0x2d2a4d6e",
2625 "data_digest": "0x2ddbf8f5",
2626 "omap_digest": "0x2d2a4d6e",
2635 "selected_object_info": {
2646 "prior_version": "49'45",
2647 "last_reqid": "osd.1.0:48",
2650 "mtime": "2018-04-05 14:33:29.498969",
2651 "local_mtime": "2018-04-05 14:33:29.499890",
2661 "data_digest": "0x2ddbf8f5",
2662 "omap_digest": "0x2d2a4d6e",
2663 "expected_object_size": 0,
2664 "expected_write_size": 0,
2665 "alloc_hint_flags": 0,
2671 "union_shard_errors": [
2694 "data_digest": "0x2ddbf8f5",
2698 "omap_digest": "0x8b699207",
2704 "snapset": "bad-val",
2705 "data_digest": "0x2ddbf8f5",
2709 "omap_digest": "0x8b699207",
2715 "union_shard_errors": [
2728 "selected_object_info": {
2729 "alloc_hint_flags": 0,
2730 "data_digest": "0x2ddbf8f5",
2731 "expected_object_size": 0,
2732 "expected_write_size": 0,
2752 "omap_digest": "0xe9572720",
2761 "data_digest": "0x5af0c3ef",
2763 "data_digest_mismatch_info"
2765 "omap_digest": "0xe9572720",
2771 "data_digest": "0x5af0c3ef",
2773 "data_digest_mismatch_info"
2775 "omap_digest": "0xe9572720",
2781 "union_shard_errors": [
2782 "data_digest_mismatch_info"
2787 "object_info_inconsistency"
2795 "selected_object_info": {
2796 "alloc_hint_flags": 255,
2797 "data_digest": "0x2ddbf8f5",
2798 "expected_object_size": 0,
2799 "expected_write_size": 0,
2819 "omap_digest": "0xddc3680f",
2828 "data_digest": "0xbd89c912",
2830 "data_digest_mismatch_info"
2833 "alloc_hint_flags": 0,
2834 "data_digest": "0x2ddbf8f5",
2835 "expected_object_size": 0,
2836 "expected_write_size": 0,
2856 "omap_digest": "0xddc3680f",
2863 "omap_digest": "0xddc3680f",
2869 "data_digest": "0xbd89c912",
2871 "data_digest_mismatch_info"
2874 "alloc_hint_flags": 255,
2875 "data_digest": "0x2ddbf8f5",
2876 "expected_object_size": 0,
2877 "expected_write_size": 0,
2897 "omap_digest": "0xddc3680f",
2904 "omap_digest": "0xddc3680f",
2910 "union_shard_errors": [
2911 "data_digest_mismatch_info"
2917 "data_digest": "0x578a4830",
2918 "omap_digest": "0xf8e11918",
2921 "data_digest_mismatch_info"
2927 "data_digest": "0x2ddbf8f5",
2928 "omap_digest": "0xf8e11918",
2935 "selected_object_info": {
2946 "prior_version": "23'6",
2947 "last_reqid": "osd.1.0:59",
2950 "mtime": "2018-04-05 14:33:20.498756",
2951 "local_mtime": "2018-04-05 14:33:20.499704",
2961 "data_digest": "0x2ddbf8f5",
2962 "omap_digest": "0xf8e11918",
2963 "expected_object_size": 0,
2964 "expected_write_size": 0,
2965 "alloc_hint_flags": 0,
2971 "union_shard_errors": [
2972 "data_digest_mismatch_info"
2975 "data_digest_mismatch"
2988 "data_digest": "0x2ddbf8f5",
2989 "omap_digest": "0x00b35dfd",
3003 "selected_object_info": {
3014 "prior_version": "25'9",
3015 "last_reqid": "osd.1.0:60",
3018 "mtime": "2018-04-05 14:33:21.189382",
3019 "local_mtime": "2018-04-05 14:33:21.190446",
3029 "data_digest": "0x2ddbf8f5",
3030 "omap_digest": "0x00b35dfd",
3031 "expected_object_size": 0,
3032 "expected_write_size": 0,
3033 "alloc_hint_flags": 0,
3039 "union_shard_errors": [
3054 "data_digest": "0x2ddbf8f5",
3055 "omap_digest": "0xd7178dfe",
3058 "omap_digest_mismatch_info"
3064 "data_digest": "0x2ddbf8f5",
3065 "omap_digest": "0xe2d46ea4",
3072 "selected_object_info": {
3083 "prior_version": "27'12",
3084 "last_reqid": "osd.1.0:61",
3087 "mtime": "2018-04-05 14:33:21.862313",
3088 "local_mtime": "2018-04-05 14:33:21.863261",
3098 "data_digest": "0x2ddbf8f5",
3099 "omap_digest": "0xe2d46ea4",
3100 "expected_object_size": 0,
3101 "expected_write_size": 0,
3102 "alloc_hint_flags": 0,
3108 "union_shard_errors": [
3109 "omap_digest_mismatch_info"
3112 "omap_digest_mismatch"
3125 "data_digest": "0x2ddbf8f5",
3126 "omap_digest": "0x1a862a41",
3133 "data_digest": "0x2ddbf8f5",
3134 "omap_digest": "0x06cac8f6",
3137 "omap_digest_mismatch_info"
3143 "selected_object_info": {
3154 "prior_version": "29'15",
3155 "last_reqid": "osd.1.0:62",
3158 "mtime": "2018-04-05 14:33:22.589300",
3159 "local_mtime": "2018-04-05 14:33:22.590376",
3169 "data_digest": "0x2ddbf8f5",
3170 "omap_digest": "0x1a862a41",
3171 "expected_object_size": 0,
3172 "expected_write_size": 0,
3173 "alloc_hint_flags": 0,
3179 "union_shard_errors": [
3180 "omap_digest_mismatch_info"
3183 "omap_digest_mismatch"
3196 "data_digest": "0x2ddbf8f5",
3197 "omap_digest": "0x689ee887",
3200 "omap_digest_mismatch_info"
3206 "data_digest": "0x2ddbf8f5",
3207 "omap_digest": "0x179c919f",
3214 "selected_object_info": {
3225 "prior_version": "31'18",
3226 "last_reqid": "osd.1.0:53",
3229 "mtime": "2018-04-05 14:33:23.289188",
3230 "local_mtime": "2018-04-05 14:33:23.290130",
3240 "data_digest": "0x2ddbf8f5",
3241 "omap_digest": "0x179c919f",
3242 "expected_object_size": 0,
3243 "expected_write_size": 0,
3244 "alloc_hint_flags": 0,
3250 "union_shard_errors": [
3251 "omap_digest_mismatch_info"
3254 "omap_digest_mismatch"
3267 "data_digest": "0x2ddbf8f5",
3268 "omap_digest": "0xefced57a",
3275 "data_digest": "0x2ddbf8f5",
3276 "omap_digest": "0x6a73cc07",
3279 "omap_digest_mismatch_info"
3285 "selected_object_info": {
3296 "prior_version": "33'21",
3297 "last_reqid": "osd.1.0:52",
3300 "mtime": "2018-04-05 14:33:23.979658",
3301 "local_mtime": "2018-04-05 14:33:23.980731",
3311 "data_digest": "0x2ddbf8f5",
3312 "omap_digest": "0xefced57a",
3313 "expected_object_size": 0,
3314 "expected_write_size": 0,
3315 "alloc_hint_flags": 0,
3321 "union_shard_errors": [
3322 "omap_digest_mismatch_info"
3325 "omap_digest_mismatch"
3342 "name": "key1-ROBJ8"
3346 "value": "val2-ROBJ8",
3347 "name": "key2-ROBJ8"
3350 "data_digest": "0x2ddbf8f5",
3351 "omap_digest": "0xd6be81dc",
3361 "value": "val1-ROBJ8",
3362 "name": "key1-ROBJ8"
3366 "value": "val3-ROBJ8",
3367 "name": "key3-ROBJ8"
3370 "data_digest": "0x2ddbf8f5",
3371 "omap_digest": "0xd6be81dc",
3378 "selected_object_info": {
3389 "prior_version": "79'65",
3390 "last_reqid": "client.4554.0:1",
3393 "mtime": "2018-04-05 14:34:05.598688",
3394 "local_mtime": "2018-04-05 14:34:05.599698",
3404 "data_digest": "0x2ddbf8f5",
3405 "omap_digest": "0xd6be81dc",
3406 "expected_object_size": 0,
3407 "expected_write_size": 0,
3408 "alloc_hint_flags": 0,
3414 "union_shard_errors": [],
3416 "attr_value_mismatch",
3417 "attr_name_mismatch"
3441 "prior_version": "37'27",
3442 "last_reqid": "osd.1.0:63",
3445 "mtime": "2018-04-05 14:33:25.352485",
3446 "local_mtime": "2018-04-05 14:33:25.353746",
3456 "data_digest": "0x2ddbf8f5",
3457 "omap_digest": "0x2eecc539",
3458 "expected_object_size": 0,
3459 "expected_write_size": 0,
3460 "alloc_hint_flags": 0,
3466 "data_digest": "0x1f26fb26",
3467 "omap_digest": "0x2eecc539",
3470 "obj_size_info_mismatch"
3486 "version": "119'68",
3487 "prior_version": "51'64",
3488 "last_reqid": "client.4834.0:1",
3491 "mtime": "2018-04-05 14:35:01.500659",
3492 "local_mtime": "2018-04-05 14:35:01.502117",
3502 "data_digest": "0x1f26fb26",
3503 "omap_digest": "0x2eecc539",
3504 "expected_object_size": 0,
3505 "expected_write_size": 0,
3506 "alloc_hint_flags": 0,
3512 "data_digest": "0x1f26fb26",
3513 "omap_digest": "0x2eecc539",
3520 "selected_object_info": {
3530 "version": "119'68",
3531 "prior_version": "51'64",
3532 "last_reqid": "client.4834.0:1",
3535 "mtime": "2018-04-05 14:35:01.500659",
3536 "local_mtime": "2018-04-05 14:35:01.502117",
3546 "data_digest": "0x1f26fb26",
3547 "omap_digest": "0x2eecc539",
3548 "expected_object_size": 0,
3549 "expected_write_size": 0,
3550 "alloc_hint_flags": 0,
3556 "union_shard_errors": [
3557 "obj_size_info_mismatch"
3560 "object_info_inconsistency"
3575 jq
"$jqfilter" $dir/json | jq
'.inconsistents' | python3
-c "$sortkeys" > $dir/csjson
3576 multidiff
$dir/checkcsjson
$dir/csjson ||
test $getjson = "yes" ||
return 1
3577 if test $getjson = "yes"
3579 jq
'.' $dir/json
> save2.json
3582 if test "$LOCALRUN" = "yes" && which jsonschema
> /dev
/null
;
3584 jsonschema
-i $dir/json
$CEPH_ROOT/doc
/rados
/command
/list-inconsistent-obj.json ||
return 1
3590 # This hangs if the repair doesn't work
3591 timeout
30 rados
-p $poolname get ROBJ17
$dir/robj17.out ||
return 1
3592 timeout
30 rados
-p $poolname get ROBJ18
$dir/robj18.out ||
return 1
3593 # Even though we couldn't repair all of the introduced errors, we can fix ROBJ17
3594 diff -q $dir/new.ROBJ17
$dir/robj17.out ||
return 1
3595 rm -f $dir/new.ROBJ17
$dir/robj17.out ||
return 1
3596 diff -q $dir/new.ROBJ18
$dir/robj18.out ||
return 1
3597 rm -f $dir/new.ROBJ18
$dir/robj18.out ||
return 1
3599 if [ $ERRORS != "0" ];
3601 echo "TEST FAILED WITH $ERRORS ERRORS"
3605 ceph osd pool
rm $poolname $poolname --yes-i-really-really-mean-it
3610 # Test scrub errors for an erasure coded pool
3612 function corrupt_scrub_erasure
() {
3614 local allow_overwrites
=$2
3615 local poolname
=ecpool
3618 run_mon
$dir a ||
return 1
3619 run_mgr
$dir x ||
return 1
3620 for id
in $
(seq 0 2) ; do
3621 if [ "$allow_overwrites" = "true" ]; then
3622 run_osd
$dir $id ||
return 1
3624 run_osd_filestore
$dir $id ||
return 1
3627 create_rbd_pool ||
return 1
3630 create_ec_pool
$poolname $allow_overwrites k
=2 m
=1 stripe_unit
=2K
--force ||
return 1
3631 wait_for_clean ||
return 1
3633 for i
in $
(seq 1 $total_objs) ; do
3635 add_something
$dir $poolname $objname ||
return 1
3637 local osd
=$
(expr $i % 2)
3641 # Size (deep scrub data_digest too)
3642 local payload
=UVWXYZZZ
3643 echo $payload > $dir/CORRUPT
3644 objectstore_tool
$dir $osd $objname set-bytes
$dir/CORRUPT ||
return 1
3649 dd if=/dev
/urandom of
=$dir/CORRUPT bs
=2048 count
=1
3650 objectstore_tool
$dir $osd $objname set-bytes
$dir/CORRUPT ||
return 1
3655 objectstore_tool
$dir $osd $objname remove ||
return 1
3659 rados
--pool $poolname setxattr
$objname key1-
$objname val1-
$objname ||
return 1
3660 rados
--pool $poolname setxattr
$objname key2-
$objname val2-
$objname ||
return 1
3663 echo -n bad-val
> $dir/bad-val
3664 objectstore_tool
$dir $osd $objname set-attr _key1-
$objname $dir/bad-val ||
return 1
3665 objectstore_tool
$dir $osd $objname rm-attr _key2-
$objname ||
return 1
3666 echo -n val3-
$objname > $dir/newval
3667 objectstore_tool
$dir $osd $objname set-attr _key3-
$objname $dir/newval ||
return 1
3668 rm $dir/bad-val
$dir/newval
3673 dd if=/dev
/urandom of
=$dir/CORRUPT bs
=2048 count
=2
3674 objectstore_tool
$dir $osd $objname set-bytes
$dir/CORRUPT ||
return 1
3678 objectstore_tool
$dir 0 $objname rm-attr hinfo_key ||
return 1
3679 echo -n bad-val
> $dir/bad-val
3680 objectstore_tool
$dir 1 $objname set-attr hinfo_key
$dir/bad-val ||
return 1
3684 local payload
=MAKETHISDIFFERENTFROMOTHEROBJECTS
3685 echo $payload > $dir/DIFFERENT
3686 rados
--pool $poolname put
$objname $dir/DIFFERENT ||
return 1
3688 # Get hinfo_key from EOBJ1
3689 objectstore_tool
$dir 0 EOBJ1 get-attr hinfo_key
> $dir/hinfo
3690 objectstore_tool
$dir 0 $objname set-attr hinfo_key
$dir/hinfo ||
return 1
3697 local pg
=$
(get_pg
$poolname EOBJ0
)
3701 rados list-inconsistent-pg
$poolname > $dir/json ||
return 1
3703 test $
(jq
'. | length' $dir/json
) = "1" ||
return 1
3705 test $
(jq
-r '.[0]' $dir/json
) = $pg ||
return 1
3707 rados list-inconsistent-obj
$pg > $dir/json ||
return 1
3708 # Get epoch for repair-get requests
3709 epoch
=$
(jq .epoch
$dir/json
)
3711 jq
"$jqfilter" << EOF | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/checkcsjson
3735 "prior_version": "0'0",
3736 "last_reqid": "client.4184.0:1",
3748 "data_digest": "0x2ddbf8f5",
3749 "omap_digest": "0xffffffff",
3750 "expected_object_size": 0,
3751 "expected_write_size": 0,
3752 "alloc_hint_flags": 0,
3761 "size_mismatch_info",
3762 "obj_size_info_mismatch"
3775 "selected_object_info": {
3786 "prior_version": "0'0",
3787 "last_reqid": "client.4184.0:1",
3799 "data_digest": "0x2ddbf8f5",
3800 "omap_digest": "0xffffffff",
3801 "expected_object_size": 0,
3802 "expected_write_size": 0,
3803 "alloc_hint_flags": 0,
3809 "union_shard_errors": [
3810 "size_mismatch_info",
3811 "obj_size_info_mismatch"
3849 "selected_object_info": {
3860 "prior_version": "0'0",
3861 "last_reqid": "client.4252.0:1",
3873 "data_digest": "0x2ddbf8f5",
3874 "omap_digest": "0xffffffff",
3875 "expected_object_size": 0,
3876 "expected_write_size": 0,
3877 "alloc_hint_flags": 0,
3883 "union_shard_errors": [
3902 "name": "key1-EOBJ4"
3906 "value": "val2-EOBJ4",
3907 "name": "key2-EOBJ4"
3925 "value": "val1-EOBJ4",
3926 "name": "key1-EOBJ4"
3930 "value": "val2-EOBJ4",
3931 "name": "key2-EOBJ4"
3944 "value": "val1-EOBJ4",
3945 "name": "key1-EOBJ4"
3949 "value": "val3-EOBJ4",
3950 "name": "key3-EOBJ4"
3955 "selected_object_info": {
3966 "prior_version": "45'5",
3967 "last_reqid": "client.4294.0:1",
3979 "data_digest": "0x2ddbf8f5",
3980 "omap_digest": "0xffffffff",
3981 "expected_object_size": 0,
3982 "expected_write_size": 0,
3983 "alloc_hint_flags": 0,
3989 "union_shard_errors": [],
3991 "attr_value_mismatch",
3992 "attr_name_mismatch"
4023 "prior_version": "0'0",
4024 "last_reqid": "client.4382.0:1",
4036 "data_digest": "0x2ddbf8f5",
4037 "omap_digest": "0xffffffff",
4038 "expected_object_size": 0,
4039 "expected_write_size": 0,
4040 "alloc_hint_flags": 0,
4049 "size_mismatch_info",
4050 "obj_size_info_mismatch"
4063 "selected_object_info": {
4074 "prior_version": "0'0",
4075 "last_reqid": "client.4382.0:1",
4087 "data_digest": "0x2ddbf8f5",
4088 "omap_digest": "0xffffffff",
4089 "expected_object_size": 0,
4090 "expected_write_size": 0,
4091 "alloc_hint_flags": 0,
4097 "union_shard_errors": [
4098 "size_mismatch_info",
4099 "obj_size_info_mismatch"
4121 "selected_object_info": {
4132 "prior_version": "0'0",
4133 "last_reqid": "client.4418.0:1",
4145 "data_digest": "0x2ddbf8f5",
4146 "omap_digest": "0xffffffff",
4147 "expected_object_size": 0,
4148 "expected_write_size": 0,
4149 "alloc_hint_flags": 0,
4172 "hashinfo": "bad-val",
4182 "cumulative_shard_hashes": [
4196 "total_chunk_size": 2048
4200 "union_shard_errors": [
4207 "hinfo_inconsistency"
4216 "selected_object_info": {
4227 "prior_version": "75'9",
4228 "last_reqid": "client.4482.0:1",
4240 "data_digest": "0x136e4e27",
4241 "omap_digest": "0xffffffff",
4242 "expected_object_size": 0,
4243 "expected_write_size": 0,
4244 "alloc_hint_flags": 0,
4253 "cumulative_shard_hashes": [
4267 "total_chunk_size": 2048
4277 "cumulative_shard_hashes": [
4291 "total_chunk_size": 2048
4301 "cumulative_shard_hashes": [
4315 "total_chunk_size": 2048
4324 "union_shard_errors": []
4331 jq
"$jqfilter" $dir/json | jq
'.inconsistents' | python3
-c "$sortkeys" > $dir/csjson
4332 multidiff
$dir/checkcsjson
$dir/csjson ||
test $getjson = "yes" ||
return 1
4333 if test $getjson = "yes"
4335 jq
'.' $dir/json
> save3.json
4338 if test "$LOCALRUN" = "yes" && which jsonschema
> /dev
/null
;
4340 jsonschema
-i $dir/json
$CEPH_ROOT/doc
/rados
/command
/list-inconsistent-obj.json ||
return 1
4345 rados list-inconsistent-pg
$poolname > $dir/json ||
return 1
4347 test $
(jq
'. | length' $dir/json
) = "1" ||
return 1
4349 test $
(jq
-r '.[0]' $dir/json
) = $pg ||
return 1
4351 rados list-inconsistent-obj
$pg > $dir/json ||
return 1
4352 # Get epoch for repair-get requests
4353 epoch
=$
(jq .epoch
$dir/json
)
4355 if [ "$allow_overwrites" = "true" ]
4357 jq
"$jqfilter" << EOF | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/checkcsjson
4363 "data_digest": "0x00000000",
4364 "omap_digest": "0xffffffff",
4383 "prior_version": "0'0",
4384 "last_reqid": "client.4184.0:1",
4387 "mtime": "2018-04-05 14:31:33.837147",
4388 "local_mtime": "2018-04-05 14:31:33.840763",
4396 "data_digest": "0x2ddbf8f5",
4397 "omap_digest": "0xffffffff",
4398 "expected_object_size": 0,
4399 "expected_write_size": 0,
4400 "alloc_hint_flags": 0,
4410 "size_mismatch_info",
4411 "obj_size_info_mismatch"
4417 "data_digest": "0x00000000",
4418 "omap_digest": "0xffffffff",
4426 "selected_object_info": {
4437 "prior_version": "0'0",
4438 "last_reqid": "client.4184.0:1",
4441 "mtime": "2018-04-05 14:31:33.837147",
4442 "local_mtime": "2018-04-05 14:31:33.840763",
4450 "data_digest": "0x2ddbf8f5",
4451 "omap_digest": "0xffffffff",
4452 "expected_object_size": 0,
4453 "expected_write_size": 0,
4454 "alloc_hint_flags": 0,
4460 "union_shard_errors": [
4462 "size_mismatch_info",
4463 "obj_size_info_mismatch"
4479 "data_digest": "0x00000000",
4480 "omap_digest": "0xffffffff",
4496 "data_digest": "0x00000000",
4497 "omap_digest": "0xffffffff",
4505 "selected_object_info": {
4516 "prior_version": "0'0",
4517 "last_reqid": "client.4252.0:1",
4520 "mtime": "2018-04-05 14:31:46.841145",
4521 "local_mtime": "2018-04-05 14:31:46.844996",
4529 "data_digest": "0x2ddbf8f5",
4530 "omap_digest": "0xffffffff",
4531 "expected_object_size": 0,
4532 "expected_write_size": 0,
4533 "alloc_hint_flags": 0,
4539 "union_shard_errors": [
4558 "name": "key1-EOBJ4"
4562 "value": "val2-EOBJ4",
4563 "name": "key2-EOBJ4"
4566 "data_digest": "0x00000000",
4567 "omap_digest": "0xffffffff",
4578 "value": "val1-EOBJ4",
4579 "name": "key1-EOBJ4"
4583 "value": "val2-EOBJ4",
4584 "name": "key2-EOBJ4"
4587 "data_digest": "0x00000000",
4588 "omap_digest": "0xffffffff",
4599 "value": "val1-EOBJ4",
4600 "name": "key1-EOBJ4"
4604 "value": "val3-EOBJ4",
4605 "name": "key3-EOBJ4"
4608 "data_digest": "0x00000000",
4609 "omap_digest": "0xffffffff",
4617 "selected_object_info": {
4628 "prior_version": "45'5",
4629 "last_reqid": "client.4294.0:1",
4632 "mtime": "2018-04-05 14:31:54.663622",
4633 "local_mtime": "2018-04-05 14:31:54.664527",
4641 "data_digest": "0x2ddbf8f5",
4642 "omap_digest": "0xffffffff",
4643 "expected_object_size": 0,
4644 "expected_write_size": 0,
4645 "alloc_hint_flags": 0,
4651 "union_shard_errors": [],
4653 "attr_value_mismatch",
4654 "attr_name_mismatch"
4667 "data_digest": "0x00000000",
4668 "omap_digest": "0xffffffff",
4687 "prior_version": "0'0",
4688 "last_reqid": "client.4382.0:1",
4691 "mtime": "2018-04-05 14:32:12.929161",
4692 "local_mtime": "2018-04-05 14:32:12.934707",
4700 "data_digest": "0x2ddbf8f5",
4701 "omap_digest": "0xffffffff",
4702 "expected_object_size": 0,
4703 "expected_write_size": 0,
4704 "alloc_hint_flags": 0,
4713 "size_mismatch_info",
4714 "obj_size_info_mismatch"
4721 "data_digest": "0x00000000",
4722 "omap_digest": "0xffffffff",
4730 "selected_object_info": {
4741 "prior_version": "0'0",
4742 "last_reqid": "client.4382.0:1",
4745 "mtime": "2018-04-05 14:32:12.929161",
4746 "local_mtime": "2018-04-05 14:32:12.934707",
4754 "data_digest": "0x2ddbf8f5",
4755 "omap_digest": "0xffffffff",
4756 "expected_object_size": 0,
4757 "expected_write_size": 0,
4758 "alloc_hint_flags": 0,
4764 "union_shard_errors": [
4766 "size_mismatch_info",
4767 "obj_size_info_mismatch"
4789 "union_shard_errors": [
4794 "selected_object_info": {
4805 "prior_version": "0'0",
4806 "last_reqid": "client.4418.0:1",
4809 "mtime": "2018-04-05 14:32:20.634116",
4810 "local_mtime": "2018-04-05 14:32:20.637999",
4818 "data_digest": "0x2ddbf8f5",
4819 "omap_digest": "0xffffffff",
4820 "expected_object_size": 0,
4821 "expected_write_size": 0,
4822 "alloc_hint_flags": 0,
4848 "hashinfo": "bad-val"
4856 "omap_digest": "0xffffffff",
4857 "data_digest": "0x00000000",
4859 "cumulative_shard_hashes": [
4873 "total_chunk_size": 2048
4887 "hinfo_inconsistency"
4889 "union_shard_errors": [],
4890 "selected_object_info": {
4901 "prior_version": "75'9",
4902 "last_reqid": "client.4482.0:1",
4905 "mtime": "2018-04-05 14:32:33.058782",
4906 "local_mtime": "2018-04-05 14:32:33.059679",
4914 "data_digest": "0x136e4e27",
4915 "omap_digest": "0xffffffff",
4916 "expected_object_size": 0,
4917 "expected_write_size": 0,
4918 "alloc_hint_flags": 0,
4931 "omap_digest": "0xffffffff",
4932 "data_digest": "0x00000000",
4934 "cumulative_shard_hashes": [
4948 "total_chunk_size": 2048
4957 "omap_digest": "0xffffffff",
4958 "data_digest": "0x00000000",
4960 "cumulative_shard_hashes": [
4974 "total_chunk_size": 2048
4983 "omap_digest": "0xffffffff",
4984 "data_digest": "0x00000000",
4986 "cumulative_shard_hashes": [
5000 "total_chunk_size": 2048
5012 jq
"$jqfilter" << EOF | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/checkcsjson
5018 "data_digest": "0x04cfa72f",
5019 "omap_digest": "0xffffffff",
5038 "prior_version": "0'0",
5039 "last_reqid": "client.4192.0:1",
5042 "mtime": "2018-04-05 14:30:10.688009",
5043 "local_mtime": "2018-04-05 14:30:10.691774",
5051 "data_digest": "0x2ddbf8f5",
5052 "omap_digest": "0xffffffff",
5053 "expected_object_size": 0,
5054 "expected_write_size": 0,
5055 "alloc_hint_flags": 0,
5065 "size_mismatch_info",
5066 "obj_size_info_mismatch"
5072 "data_digest": "0x04cfa72f",
5073 "omap_digest": "0xffffffff",
5081 "selected_object_info": {
5092 "prior_version": "0'0",
5093 "last_reqid": "client.4192.0:1",
5096 "mtime": "2018-04-05 14:30:10.688009",
5097 "local_mtime": "2018-04-05 14:30:10.691774",
5105 "data_digest": "0x2ddbf8f5",
5106 "omap_digest": "0xffffffff",
5107 "expected_object_size": 0,
5108 "expected_write_size": 0,
5109 "alloc_hint_flags": 0,
5115 "union_shard_errors": [
5117 "size_mismatch_info",
5118 "obj_size_info_mismatch"
5143 "data_digest": "0x04cfa72f",
5144 "omap_digest": "0xffffffff",
5152 "data_digest": "0x04cfa72f",
5153 "omap_digest": "0xffffffff",
5161 "selected_object_info": {
5172 "prior_version": "0'0",
5173 "last_reqid": "client.4224.0:1",
5176 "mtime": "2018-04-05 14:30:14.152945",
5177 "local_mtime": "2018-04-05 14:30:14.154014",
5185 "data_digest": "0x2ddbf8f5",
5186 "omap_digest": "0xffffffff",
5187 "expected_object_size": 0,
5188 "expected_write_size": 0,
5189 "alloc_hint_flags": 0,
5195 "union_shard_errors": [
5210 "data_digest": "0x04cfa72f",
5211 "omap_digest": "0xffffffff",
5227 "data_digest": "0x04cfa72f",
5228 "omap_digest": "0xffffffff",
5236 "selected_object_info": {
5247 "prior_version": "0'0",
5248 "last_reqid": "client.4258.0:1",
5251 "mtime": "2018-04-05 14:30:18.875544",
5252 "local_mtime": "2018-04-05 14:30:18.880153",
5260 "data_digest": "0x2ddbf8f5",
5261 "omap_digest": "0xffffffff",
5262 "expected_object_size": 0,
5263 "expected_write_size": 0,
5264 "alloc_hint_flags": 0,
5270 "union_shard_errors": [
5289 "name": "key1-EOBJ4"
5293 "value": "val2-EOBJ4",
5294 "name": "key2-EOBJ4"
5297 "data_digest": "0x04cfa72f",
5298 "omap_digest": "0xffffffff",
5311 "omap_digest": "0xffffffff",
5312 "data_digest": "0x04cfa72f",
5316 "value": "val1-EOBJ4",
5317 "name": "key1-EOBJ4"
5321 "value": "val2-EOBJ4",
5322 "name": "key2-EOBJ4"
5332 "omap_digest": "0xffffffff",
5333 "data_digest": "0x04cfa72f",
5337 "value": "val1-EOBJ4",
5338 "name": "key1-EOBJ4"
5342 "value": "val3-EOBJ4",
5343 "name": "key3-EOBJ4"
5348 "selected_object_info": {
5359 "prior_version": "45'5",
5360 "last_reqid": "client.4296.0:1",
5363 "mtime": "2018-04-05 14:30:22.271983",
5364 "local_mtime": "2018-04-05 14:30:22.272840",
5372 "data_digest": "0x2ddbf8f5",
5373 "omap_digest": "0xffffffff",
5374 "expected_object_size": 0,
5375 "expected_write_size": 0,
5376 "alloc_hint_flags": 0,
5382 "union_shard_errors": [],
5384 "attr_value_mismatch",
5385 "attr_name_mismatch"
5398 "data_digest": "0x04cfa72f",
5399 "omap_digest": "0xffffffff",
5418 "prior_version": "0'0",
5419 "last_reqid": "client.4384.0:1",
5422 "mtime": "2018-04-05 14:30:35.162395",
5423 "local_mtime": "2018-04-05 14:30:35.166390",
5431 "data_digest": "0x2ddbf8f5",
5432 "omap_digest": "0xffffffff",
5433 "expected_object_size": 0,
5434 "expected_write_size": 0,
5435 "alloc_hint_flags": 0,
5445 "size_mismatch_info",
5446 "obj_size_info_mismatch"
5452 "data_digest": "0x04cfa72f",
5453 "omap_digest": "0xffffffff",
5461 "selected_object_info": {
5472 "prior_version": "0'0",
5473 "last_reqid": "client.4384.0:1",
5476 "mtime": "2018-04-05 14:30:35.162395",
5477 "local_mtime": "2018-04-05 14:30:35.166390",
5485 "data_digest": "0x2ddbf8f5",
5486 "omap_digest": "0xffffffff",
5487 "expected_object_size": 0,
5488 "expected_write_size": 0,
5489 "alloc_hint_flags": 0,
5495 "union_shard_errors": [
5497 "size_mismatch_info",
5498 "obj_size_info_mismatch"
5520 "union_shard_errors": [
5525 "selected_object_info": {
5536 "prior_version": "0'0",
5537 "last_reqid": "client.4420.0:1",
5540 "mtime": "2018-04-05 14:30:40.914673",
5541 "local_mtime": "2018-04-05 14:30:40.917705",
5549 "data_digest": "0x2ddbf8f5",
5550 "omap_digest": "0xffffffff",
5551 "expected_object_size": 0,
5552 "expected_write_size": 0,
5553 "alloc_hint_flags": 0,
5579 "hashinfo": "bad-val"
5587 "omap_digest": "0xffffffff",
5588 "data_digest": "0x04cfa72f",
5590 "cumulative_shard_hashes": [
5604 "total_chunk_size": 2048
5618 "hinfo_inconsistency"
5620 "union_shard_errors": [
5623 "selected_object_info": {
5634 "prior_version": "75'9",
5635 "last_reqid": "client.4486.0:1",
5638 "mtime": "2018-04-05 14:30:50.995009",
5639 "local_mtime": "2018-04-05 14:30:50.996112",
5647 "data_digest": "0x136e4e27",
5648 "omap_digest": "0xffffffff",
5649 "expected_object_size": 0,
5650 "expected_write_size": 0,
5651 "alloc_hint_flags": 0,
5667 "cumulative_shard_hashes": [
5681 "total_chunk_size": 2048
5690 "omap_digest": "0xffffffff",
5691 "data_digest": "0x5b7455a8",
5693 "cumulative_shard_hashes": [
5707 "total_chunk_size": 2048
5716 "omap_digest": "0xffffffff",
5717 "data_digest": "0x5b7455a8",
5719 "cumulative_shard_hashes": [
5733 "total_chunk_size": 2048
5745 jq
"$jqfilter" $dir/json | jq
'.inconsistents' | python3
-c "$sortkeys" > $dir/csjson
5746 multidiff
$dir/checkcsjson
$dir/csjson ||
test $getjson = "yes" ||
return 1
5747 if test $getjson = "yes"
5749 if [ "$allow_overwrites" = "true" ]
5755 jq
'.' $dir/json
> save
${num}.json
5758 if test "$LOCALRUN" = "yes" && which jsonschema
> /dev
/null
;
5760 jsonschema
-i $dir/json
$CEPH_ROOT/doc
/rados
/command
/list-inconsistent-obj.json ||
return 1
5763 ceph osd pool
rm $poolname $poolname --yes-i-really-really-mean-it
5766 function TEST_corrupt_scrub_erasure_appends
() {
5767 corrupt_scrub_erasure
$1 false
5770 function TEST_corrupt_scrub_erasure_overwrites
() {
5771 if [ "$use_ec_overwrite" = "true" ]; then
5772 corrupt_scrub_erasure
$1 true
5777 # Test to make sure that a periodic scrub won't cause deep-scrub info to be lost
5779 function TEST_periodic_scrub_replicated
() {
5781 local poolname
=psr_pool
5784 run_mon
$dir a
--osd_pool_default_size=2 ||
return 1
5785 run_mgr
$dir x ||
return 1
5786 local ceph_osd_args
="--osd-scrub-interval-randomize-ratio=0 --osd-deep-scrub-randomize-ratio=0 "
5787 ceph_osd_args
+="--osd_scrub_backoff_ratio=0"
5788 run_osd
$dir 0 $ceph_osd_args ||
return 1
5789 run_osd
$dir 1 $ceph_osd_args ||
return 1
5790 create_rbd_pool ||
return 1
5791 wait_for_clean ||
return 1
5793 create_pool
$poolname 1 1 ||
return 1
5794 wait_for_clean ||
return 1
5797 add_something
$dir $poolname $objname scrub ||
return 1
5798 local primary
=$
(get_primary
$poolname $objname)
5799 local pg
=$
(get_pg
$poolname $objname)
5801 # Add deep-scrub only error
5802 local payload
=UVWXYZ
5803 echo $payload > $dir/CORRUPT
5804 # Uses $ceph_osd_args for osd restart
5805 objectstore_tool
$dir $osd $objname set-bytes
$dir/CORRUPT ||
return 1
5807 # No scrub information available, so expect failure
5809 ! rados list-inconsistent-obj
$pg | jq
'.' ||
return 1
5812 pg_deep_scrub
$pg ||
return 1
5814 # Make sure bad object found
5815 rados list-inconsistent-obj
$pg | jq
'.' |
grep -q $objname ||
return 1
5818 local last_scrub
=$
(get_last_scrub_stamp
$pg)
5819 # Fake a schedule scrub
5820 ceph tell
$pg scrub ||
return 1
5821 # Wait for schedule regular scrub
5822 wait_for_scrub
$pg "$last_scrub"
5824 # It needed to be upgraded
5825 grep -q "Deep scrub errors, upgrading scrub to deep-scrub" $dir/osd.
${primary}.log ||
return 1
5827 # Bad object still known
5828 rados list-inconsistent-obj
$pg | jq
'.' |
grep -q $objname ||
return 1
5830 # Can't upgrade with this set
5831 ceph osd
set nodeep-scrub
5832 # Let map change propagate to OSDs
5833 ceph tell osd
.0 get_latest_osdmap
5837 # Fake a schedule scrub
5838 ceph tell
$pg scrub ||
return 1
5839 # Wait for schedule regular scrub
5840 # to notice scrub and skip it
5842 for i
in $
(seq 14 -1 0)
5845 ! grep -q "Regular scrub skipped due to deep-scrub errors and nodeep-scrub set" $dir/osd.
${primary}.log ||
{ found
=true
; break; }
5846 echo Time left
: $i seconds
5848 test $found = "true" ||
return 1
5850 # Bad object still known
5851 rados list-inconsistent-obj
$pg | jq
'.' |
grep -q $objname ||
return 1
5854 # Request a regular scrub and it will be done
5856 grep -q "Regular scrub request, deep-scrub details will be lost" $dir/osd.
${primary}.log ||
return 1
5858 # deep-scrub error is no longer present
5859 rados list-inconsistent-obj
$pg | jq
'.' |
grep -qv $objname ||
return 1
5862 function TEST_scrub_warning
() {
5864 local poolname
=psr_pool
5869 local i7_days
=$
(calc
$i1_day \
* 7)
5870 local i14_days
=$
(calc
$i1_day \
* 14)
5872 local conf_overdue_seconds
=$
(calc
$i7_days + $i1_day + \
( $i7_days \
* $overdue \
) )
5873 local pool_overdue_seconds
=$
(calc
$i14_days + $i1_day + \
( $i14_days \
* $overdue \
) )
5875 run_mon
$dir a
--osd_pool_default_size=1 --mon_allow_pool_size_one=true ||
return 1
5876 run_mgr
$dir x
--mon_warn_pg_not_scrubbed_ratio=${overdue} --mon_warn_pg_not_deep_scrubbed_ratio=${overdue} ||
return 1
5877 run_osd
$dir 0 $ceph_osd_args --osd_scrub_backoff_ratio=0 ||
return 1
5879 for i
in $
(seq 1 $
(expr $scrubs + $deep_scrubs))
5881 create_pool
$poolname-$i 1 1 ||
return 1
5882 wait_for_clean ||
return 1
5885 ceph osd pool
set $poolname-$i scrub_max_interval
$i14_days
5887 if [ $i = $
(expr $scrubs + 1) ];
5889 ceph osd pool
set $poolname-$i deep_scrub_interval
$i14_days
5896 ceph osd
set noscrub ||
return 1
5897 ceph osd
set nodeep-scrub ||
return 1
5898 ceph config
set global osd_scrub_interval_randomize_ratio
0
5899 ceph config
set global osd_deep_scrub_randomize_ratio
0
5900 ceph config
set global osd_scrub_max_interval
${i7_days}
5901 ceph config
set global osd_deep_scrub_interval
${i7_days}
5903 # Fake schedule scrubs
5904 for i
in $
(seq 1 $scrubs)
5908 overdue_seconds
=$pool_overdue_seconds
5910 overdue_seconds
=$conf_overdue_seconds
5912 ceph tell
${i}.0 scrub $(expr ${overdue_seconds} + ${i}00) ||
return 1
5914 # Fake schedule deep scrubs
5915 for i
in $
(seq $
(expr $scrubs + 1) $
(expr $scrubs + $deep_scrubs))
5917 if [ $i = "$(expr $scrubs + 1)" ];
5919 overdue_seconds
=$pool_overdue_seconds
5921 overdue_seconds
=$conf_overdue_seconds
5923 ceph tell
${i}.0 deep_scrub $(expr ${overdue_seconds} + ${i}00) ||
return 1
5929 ceph health |
grep -q " pgs not deep-scrubbed in time" ||
return 1
5930 ceph health |
grep -q " pgs not scrubbed in time" ||
return 1
5932 # note that the 'ceph tell pg deep_scrub' command now also sets the regular scrub
5933 # time-stamp. I.e. - all 'late for deep scrubbing' pgs are also late for
5934 # regular scrubbing. For now, we'll allow both responses.
5935 COUNT
=$
(ceph health detail |
grep "not scrubbed since" |
wc -l)
5937 if (( $COUNT != $scrubs && $COUNT != $
(expr $scrubs+$deep_scrubs) )); then
5938 ceph health detail |
grep "not scrubbed since"
5941 COUNT
=$
(ceph health detail |
grep "not deep-scrubbed since" |
wc -l)
5942 if [ "$COUNT" != $deep_scrubs ]; then
5943 ceph health detail |
grep "not deep-scrubbed since"
5949 # Corrupt snapset in replicated pool
5951 function TEST_corrupt_snapset_scrub_rep
() {
5953 local poolname
=csr_pool
5956 run_mon
$dir a
--osd_pool_default_size=2 ||
return 1
5957 run_mgr
$dir x ||
return 1
5958 run_osd
$dir 0 ||
return 1
5959 run_osd
$dir 1 ||
return 1
5960 create_rbd_pool ||
return 1
5961 wait_for_clean ||
return 1
5963 create_pool foo
1 ||
return 1
5964 create_pool
$poolname 1 1 ||
return 1
5965 wait_for_clean ||
return 1
5967 for i
in $
(seq 1 $total_objs) ; do
5969 add_something
$dir $poolname $objname ||
return 1
5971 rados
--pool $poolname setomapheader
$objname hdr-
$objname ||
return 1
5972 rados
--pool $poolname setomapval
$objname key-
$objname val-
$objname ||
return 1
5975 local pg
=$
(get_pg
$poolname ROBJ0
)
5976 local primary
=$
(get_primary
$poolname ROBJ0
)
5978 rados
-p $poolname mksnap snap1
5979 echo -n head_of_snapshot_data
> $dir/change
5981 for i
in $
(seq 1 $total_objs) ; do
5984 # Alternate corruption between osd.0 and osd.1
5985 local osd
=$
(expr $i % 2)
5989 rados
--pool $poolname put
$objname $dir/change
5990 objectstore_tool
$dir $osd --head $objname clear-snapset corrupt ||
return 1
5994 rados
--pool $poolname put
$objname $dir/change
5995 objectstore_tool
$dir $osd --head $objname clear-snapset corrupt ||
return 1
6004 rados list-inconsistent-pg
$poolname > $dir/json ||
return 1
6006 test $
(jq
'. | length' $dir/json
) = "1" ||
return 1
6008 test $
(jq
-r '.[0]' $dir/json
) = $pg ||
return 1
6010 rados list-inconsistent-obj
$pg > $dir/json ||
return 1
6012 jq
"$jqfilter" << EOF | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/checkcsjson
6025 "snapset_inconsistency"
6027 "union_shard_errors": [],
6028 "selected_object_info": {
6039 "prior_version": "21'3",
6040 "last_reqid": "client.4195.0:1",
6043 "mtime": "2018-04-05 14:35:43.286117",
6044 "local_mtime": "2018-04-05 14:35:43.288990",
6053 "data_digest": "0x53acb008",
6054 "omap_digest": "0xffffffff",
6055 "expected_object_size": 0,
6056 "expected_write_size": 0,
6057 "alloc_hint_flags": 0,
6104 "snapset_inconsistency"
6106 "union_shard_errors": [],
6107 "selected_object_info": {
6118 "prior_version": "23'6",
6119 "last_reqid": "client.4223.0:1",
6122 "mtime": "2018-04-05 14:35:48.326856",
6123 "local_mtime": "2018-04-05 14:35:48.328097",
6132 "data_digest": "0x53acb008",
6133 "omap_digest": "0xffffffff",
6134 "expected_object_size": 0,
6135 "expected_write_size": 0,
6136 "alloc_hint_flags": 0,
6178 jq
"$jqfilter" $dir/json | jq
'.inconsistents' | python3
-c "$sortkeys" > $dir/csjson
6179 multidiff
$dir/checkcsjson
$dir/csjson ||
test $getjson = "yes" ||
return 1
6180 if test $getjson = "yes"
6182 jq
'.' $dir/json
> save6.json
6185 if test "$LOCALRUN" = "yes" && which jsonschema
> /dev
/null
;
6187 jsonschema
-i $dir/json
$CEPH_ROOT/doc
/rados
/command
/list-inconsistent-obj.json ||
return 1
6191 declare -a err_strings
6192 err_strings
[0]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid [0-9]*:.*:::ROBJ1:head : snapset inconsistent"
6193 err_strings
[1]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid [0-9]*:.*:::ROBJ2:head : snapset inconsistent"
6194 err_strings
[2]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 [0-9]*:.*:::ROBJ1:1 : is an unexpected clone"
6195 err_strings
[3]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub : stat mismatch, got 3/4 objects, 1/2 clones, 3/4 dirty, 3/4 omap, 0/0 pinned, 0/0 hit_set_archive, 0/0 whiteouts, 49/56 bytes, 0/0 manifest objects, 0/0 hit_set_archive bytes."
6196 err_strings
[4]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 0 missing, 2 inconsistent objects"
6197 err_strings
[5]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 4 errors"
6199 for err_string
in "${err_strings[@]}"
6201 if ! grep -q "$err_string" $dir/osd.
${primary}.log
6203 echo "Missing log message '$err_string'"
6204 ERRORS
=$
(expr $ERRORS + 1)
6208 if [ $ERRORS != "0" ];
6210 echo "TEST FAILED WITH $ERRORS ERRORS"
6214 ceph osd pool
rm $poolname $poolname --yes-i-really-really-mean-it
6217 function TEST_request_scrub_priority
() {
6219 local poolname
=psr_pool
6224 run_mon
$dir a
--osd_pool_default_size=1 --mon_allow_pool_size_one=true ||
return 1
6225 run_mgr
$dir x ||
return 1
6226 local ceph_osd_args
="--osd-scrub-interval-randomize-ratio=0 --osd-deep-scrub-randomize-ratio=0 "
6227 ceph_osd_args
+="--osd_scrub_backoff_ratio=0"
6228 run_osd
$dir 0 $ceph_osd_args ||
return 1
6230 create_pool
$poolname $PGS $PGS ||
return 1
6231 wait_for_clean ||
return 1
6234 add_something
$dir $poolname $objname noscrub ||
return 1
6235 local primary
=$
(get_primary
$poolname $objname)
6236 local pg
=$
(get_pg
$poolname $objname)
6237 poolid
=$
(ceph osd dump |
grep "^pool.*[']${poolname}[']" |
awk '{ print $2 }')
6240 for i
in $
(seq 0 $
(expr $PGS - 1))
6242 opg
="${poolid}.${i}"
6243 if [ "$opg" = "$pg" ]; then
6246 otherpgs
="${otherpgs}${opg} "
6247 local other_last_scrub
=$
(get_last_scrub_stamp
$pg)
6248 # Fake a schedule scrub
6249 ceph tell
$opg scrub
$opg ||
return 1
6255 # Request a regular scrub and it will be done
6256 local last_scrub
=$
(get_last_scrub_stamp
$pg)
6259 ceph osd
unset noscrub ||
return 1
6260 ceph osd
unset nodeep-scrub ||
return 1
6262 wait_for_scrub
$pg "$last_scrub"
6264 for opg
in $otherpgs $pg
6266 wait_for_scrub
$opg "$other_last_scrub"
6269 # Verify that the requested scrub ran first
6270 grep "log_channel.*scrub ok" $dir/osd.
${primary}.log |
grep -v purged_snaps |
head -1 |
sed 's/.*[[]DBG[]]//' |
grep -q $pg ||
return 1
6274 main osd-scrub-repair
"$@"
6277 # compile-command: "cd build ; make -j4 && \
6278 # ../qa/run-standalone.sh osd-scrub-repair.sh"