3 # Copyright (C) 2014 Red Hat <contact@redhat.com>
5 # Author: Loic Dachary <loic@dachary.org>
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU Library Public License as published by
9 # the Free Software Foundation; either version 2, or (at your option)
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU Library Public License for more details.
18 source $CEPH_ROOT/qa
/standalone
/ceph-helpers.sh
20 if [ `uname` = FreeBSD
]; then
21 # erasure coding overwrites are only tested on Bluestore
22 # erasure coding on filestore is unsafe
23 # http://docs.ceph.com/docs/master/rados/operations/erasure-code/#erasure-coding-with-overwrites
24 use_ec_overwrite
=false
29 # Test development and debugging
30 # Set to "yes" in order to ignore diff errors and save results to update test
33 # Filter out mtime and local_mtime dates, version, prior_version and last_reqid (client) from any object_info.
34 jqfilter
='def walk(f):
36 | if type == "object" then
38 ( {}; . + { ($key): ($in[$key] | walk(f)) } ) | f
39 elif type == "array" then map( walk(f) ) | f
42 walk(if type == "object" then del(.mtime) else . end)
43 | walk(if type == "object" then del(.local_mtime) else . end)
44 | walk(if type == "object" then del(.last_reqid) else . end)
45 | walk(if type == "object" then del(.version) else . end)
46 | walk(if type == "object" then del(.prior_version) else . end)'
48 sortkeys
='import json; import sys ; JSON=sys.stdin.read() ; ud = json.loads(JSON) ; print json.dumps(ud, sort_keys=True, indent=2)'
54 export CEPH_MON
="127.0.0.1:7107" # git grep '\<7107\>' : there must be only one
56 CEPH_ARGS
+="--fsid=$(uuidgen) --auth-supported=none "
57 CEPH_ARGS
+="--mon-host=$CEPH_MON "
58 CEPH_ARGS
+="--osd-skip-data-digest=false "
59 CEPH_ARGS
+="--osd-objectstore=filestore "
61 export -n CEPH_CLI_TEST_DUP_COMMAND
62 local funcs
=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
63 for func
in $funcs ; do
64 $func $dir ||
return 1
68 function add_something
() {
71 local obj
=${3:-SOMETHING}
72 local scrub
=${4:-noscrub}
74 if [ "$scrub" = "noscrub" ];
76 ceph osd
set noscrub ||
return 1
77 ceph osd
set nodeep-scrub ||
return 1
79 ceph osd
unset noscrub ||
return 1
80 ceph osd
unset nodeep-scrub ||
return 1
84 echo $payload > $dir/ORIGINAL
85 rados
--pool $poolname put
$obj $dir/ORIGINAL ||
return 1
89 # Corrupt one copy of a replicated pool
91 function TEST_corrupt_and_repair_replicated
() {
95 setup
$dir ||
return 1
96 run_mon
$dir a
--osd_pool_default_size=2 ||
return 1
97 run_mgr
$dir x ||
return 1
98 run_osd
$dir 0 ||
return 1
99 run_osd
$dir 1 ||
return 1
100 create_rbd_pool ||
return 1
101 wait_for_clean ||
return 1
103 add_something
$dir $poolname ||
return 1
104 corrupt_and_repair_one
$dir $poolname $
(get_not_primary
$poolname SOMETHING
) ||
return 1
105 # Reproduces http://tracker.ceph.com/issues/8914
106 corrupt_and_repair_one
$dir $poolname $
(get_primary
$poolname SOMETHING
) ||
return 1
108 teardown
$dir ||
return 1
111 function corrupt_and_repair_two
() {
118 # 1) remove the corresponding file from the OSDs
121 run_in_background pids objectstore_tool
$dir $first SOMETHING remove
122 run_in_background pids objectstore_tool
$dir $second SOMETHING remove
125 if [ $return_code -ne 0 ]; then return $return_code; fi
130 local pg
=$
(get_pg
$poolname SOMETHING
)
133 # 3) The files must be back
136 run_in_background pids objectstore_tool
$dir $first SOMETHING list-attrs
137 run_in_background pids objectstore_tool
$dir $second SOMETHING list-attrs
140 if [ $return_code -ne 0 ]; then return $return_code; fi
142 rados
--pool $poolname get SOMETHING
$dir/COPY ||
return 1
143 diff $dir/ORIGINAL
$dir/COPY ||
return 1
148 # 2) remove the corresponding file from a designated OSD
150 # 4) check that the file has been restored in the designated OSD
152 function corrupt_and_repair_one
() {
158 # 1) remove the corresponding file from the OSD
160 objectstore_tool
$dir $osd SOMETHING remove ||
return 1
164 local pg
=$
(get_pg
$poolname SOMETHING
)
167 # 3) The file must be back
169 objectstore_tool
$dir $osd SOMETHING list-attrs ||
return 1
170 rados
--pool $poolname get SOMETHING
$dir/COPY ||
return 1
171 diff $dir/ORIGINAL
$dir/COPY ||
return 1
174 function corrupt_and_repair_erasure_coded
() {
178 add_something
$dir $poolname ||
return 1
180 local primary
=$
(get_primary
$poolname SOMETHING
)
181 local -a osds
=($
(get_osds
$poolname SOMETHING |
sed -e "s/$primary//"))
182 local not_primary_first
=${osds[0]}
183 local not_primary_second
=${osds[1]}
185 # Reproduces http://tracker.ceph.com/issues/10017
186 corrupt_and_repair_one
$dir $poolname $primary ||
return 1
187 # Reproduces http://tracker.ceph.com/issues/10409
188 corrupt_and_repair_one
$dir $poolname $not_primary_first ||
return 1
189 corrupt_and_repair_two
$dir $poolname $not_primary_first $not_primary_second ||
return 1
190 corrupt_and_repair_two
$dir $poolname $primary $not_primary_first ||
return 1
194 function create_ec_pool
() {
197 local allow_overwrites
=$1
200 ceph osd erasure-code-profile
set myprofile crush-failure-domain
=osd
"$@" ||
return 1
202 create_pool
"$poolname" 1 1 erasure myprofile ||
return 1
204 if [ "$allow_overwrites" = "true" ]; then
205 ceph osd pool
set "$poolname" allow_ec_overwrites true ||
return 1
208 wait_for_clean ||
return 1
212 function auto_repair_erasure_coded
() {
214 local allow_overwrites
=$2
215 local poolname
=ecpool
217 # Launch a cluster with 5 seconds scrub interval
218 setup
$dir ||
return 1
219 run_mon
$dir a ||
return 1
220 run_mgr
$dir x ||
return 1
221 local ceph_osd_args
="--osd-scrub-auto-repair=true \
222 --osd-deep-scrub-interval=5 \
223 --osd-scrub-max-interval=5 \
224 --osd-scrub-min-interval=5 \
225 --osd-scrub-interval-randomize-ratio=0"
226 for id
in $
(seq 0 2) ; do
227 if [ "$allow_overwrites" = "true" ]; then
228 run_osd_bluestore
$dir $id $ceph_osd_args ||
return 1
230 run_osd
$dir $id $ceph_osd_args ||
return 1
233 create_rbd_pool ||
return 1
234 wait_for_clean ||
return 1
237 create_ec_pool
$poolname $allow_overwrites k
=2 m
=1 ||
return 1
241 echo $payload > $dir/ORIGINAL
242 rados
--pool $poolname put SOMETHING
$dir/ORIGINAL ||
return 1
244 # Remove the object from one shard physically
245 # Restarted osd get $ceph_osd_args passed
246 objectstore_tool
$dir $
(get_not_primary
$poolname SOMETHING
) SOMETHING remove ||
return 1
247 # Wait for auto repair
248 local pgid
=$
(get_pg
$poolname SOMETHING
)
249 wait_for_scrub
$pgid "$(get_last_scrub_stamp $pgid)"
250 wait_for_clean ||
return 1
251 # Verify - the file should be back
252 # Restarted osd get $ceph_osd_args passed
253 objectstore_tool
$dir $
(get_not_primary
$poolname SOMETHING
) SOMETHING list-attrs ||
return 1
254 rados
--pool $poolname get SOMETHING
$dir/COPY ||
return 1
255 diff $dir/ORIGINAL
$dir/COPY ||
return 1
258 teardown
$dir ||
return 1
261 function TEST_auto_repair_erasure_coded_appends
() {
262 auto_repair_erasure_coded
$1 false
265 function TEST_auto_repair_erasure_coded_overwrites
() {
266 if [ "$use_ec_overwrite" = "true" ]; then
267 auto_repair_erasure_coded
$1 true
271 function TEST_auto_repair_bluestore_basic
() {
273 local poolname
=testpool
275 # Launch a cluster with 5 seconds scrub interval
276 setup
$dir ||
return 1
277 run_mon
$dir a ||
return 1
278 run_mgr
$dir x ||
return 1
279 local ceph_osd_args
="--osd-scrub-auto-repair=true \
280 --osd_deep_scrub_randomize_ratio=0 \
281 --osd-scrub-interval-randomize-ratio=0"
282 for id
in $
(seq 0 2) ; do
283 run_osd_bluestore
$dir $id $ceph_osd_args ||
return 1
286 create_pool
$poolname 1 1 ||
return 1
287 ceph osd pool
set $poolname size
2
288 wait_for_clean ||
return 1
292 echo $payload > $dir/ORIGINAL
293 rados
--pool $poolname put SOMETHING
$dir/ORIGINAL ||
return 1
295 # Remove the object from one shard physically
296 # Restarted osd get $ceph_osd_args passed
297 objectstore_tool
$dir $
(get_not_primary
$poolname SOMETHING
) SOMETHING remove ||
return 1
299 local pgid
=$
(get_pg
$poolname SOMETHING
)
300 local primary
=$
(get_primary
$poolname SOMETHING
)
301 local last_scrub_stamp
="$(get_last_scrub_stamp $pgid)"
302 CEPH_ARGS
='' ceph daemon $
(get_asok_path osd.
$primary) trigger_deep_scrub
$pgid
303 CEPH_ARGS
='' ceph daemon $
(get_asok_path osd.
$primary) trigger_scrub
$pgid
305 # Wait for auto repair
306 wait_for_scrub
$pgid "$last_scrub_stamp" ||
return 1
307 wait_for_clean ||
return 1
309 # Verify - the file should be back
310 # Restarted osd get $ceph_osd_args passed
311 objectstore_tool
$dir $
(get_not_primary
$poolname SOMETHING
) SOMETHING list-attrs ||
return 1
312 objectstore_tool
$dir $
(get_not_primary
$poolname SOMETHING
) SOMETHING get-bytes
$dir/COPY ||
return 1
313 diff $dir/ORIGINAL
$dir/COPY ||
return 1
314 grep scrub_finish
$dir/osd.
${primary}.log
317 teardown
$dir ||
return 1
320 function TEST_auto_repair_bluestore_scrub
() {
322 local poolname
=testpool
324 # Launch a cluster with 5 seconds scrub interval
325 setup
$dir ||
return 1
326 run_mon
$dir a ||
return 1
327 run_mgr
$dir x ||
return 1
328 local ceph_osd_args
="--osd-scrub-auto-repair=true \
329 --osd_deep_scrub_randomize_ratio=0 \
330 --osd-scrub-interval-randomize-ratio=0"
331 for id
in $
(seq 0 2) ; do
332 run_osd_bluestore
$dir $id $ceph_osd_args ||
return 1
335 create_pool
$poolname 1 1 ||
return 1
336 ceph osd pool
set $poolname size
2
337 wait_for_clean ||
return 1
341 echo $payload > $dir/ORIGINAL
342 rados
--pool $poolname put SOMETHING
$dir/ORIGINAL ||
return 1
344 # Remove the object from one shard physically
345 # Restarted osd get $ceph_osd_args passed
346 objectstore_tool
$dir $
(get_not_primary
$poolname SOMETHING
) SOMETHING remove ||
return 1
348 local pgid
=$
(get_pg
$poolname SOMETHING
)
349 local primary
=$
(get_primary
$poolname SOMETHING
)
350 local last_scrub_stamp
="$(get_last_scrub_stamp $pgid)"
351 CEPH_ARGS
='' ceph daemon $
(get_asok_path osd.
$primary) trigger_scrub
$pgid
353 # Wait for scrub -> auto repair
354 wait_for_scrub
$pgid "$last_scrub_stamp" ||
return 1
356 # Actually this causes 2 scrubs, so we better wait a little longer
358 wait_for_clean ||
return 1
360 # Verify - the file should be back
361 # Restarted osd get $ceph_osd_args passed
362 objectstore_tool
$dir $
(get_not_primary
$poolname SOMETHING
) SOMETHING list-attrs ||
return 1
363 rados
--pool $poolname get SOMETHING
$dir/COPY ||
return 1
364 diff $dir/ORIGINAL
$dir/COPY ||
return 1
365 grep scrub_finish
$dir/osd.
${primary}.log
367 # This should have caused 1 object to be repaired
368 COUNT
=$
(ceph pg
$pgid query | jq
'.info.stats.stat_sum.num_objects_repaired')
369 test "$COUNT" = "1" ||
return 1
372 teardown
$dir ||
return 1
375 function TEST_auto_repair_bluestore_failed
() {
377 local poolname
=testpool
379 # Launch a cluster with 5 seconds scrub interval
380 setup
$dir ||
return 1
381 run_mon
$dir a ||
return 1
382 run_mgr
$dir x ||
return 1
383 local ceph_osd_args
="--osd-scrub-auto-repair=true \
384 --osd_deep_scrub_randomize_ratio=0 \
385 --osd-scrub-interval-randomize-ratio=0"
386 for id
in $
(seq 0 2) ; do
387 run_osd_bluestore
$dir $id $ceph_osd_args ||
return 1
390 create_pool
$poolname 1 1 ||
return 1
391 ceph osd pool
set $poolname size
2
392 wait_for_clean ||
return 1
396 echo $payload > $dir/ORIGINAL
399 rados
--pool $poolname put obj
$i $dir/ORIGINAL ||
return 1
402 # Remove the object from one shard physically
403 # Restarted osd get $ceph_osd_args passed
404 objectstore_tool
$dir $
(get_not_primary
$poolname SOMETHING
) obj1 remove ||
return 1
405 # obj2 can't be repaired
406 objectstore_tool
$dir $
(get_not_primary
$poolname SOMETHING
) obj2 remove ||
return 1
407 objectstore_tool
$dir $
(get_primary
$poolname SOMETHING
) obj2 rm-attr _ ||
return 1
409 local pgid
=$
(get_pg
$poolname obj1
)
410 local primary
=$
(get_primary
$poolname obj1
)
411 local last_scrub_stamp
="$(get_last_scrub_stamp $pgid)"
412 CEPH_ARGS
='' ceph daemon $
(get_asok_path osd.
$primary) trigger_deep_scrub
$pgid
413 CEPH_ARGS
='' ceph daemon $
(get_asok_path osd.
$primary) trigger_scrub
$pgid
415 # Wait for auto repair
416 wait_for_scrub
$pgid "$last_scrub_stamp" ||
return 1
417 wait_for_clean ||
return 1
419 grep scrub_finish
$dir/osd.
${primary}.log
420 grep -q "scrub_finish.*still present after re-scrub" $dir/osd.
${primary}.log ||
return 1
422 ceph pg dump pgs |
grep -q "^$(pgid).*+failed_repair" ||
return 1
424 # Verify - obj1 should be back
425 # Restarted osd get $ceph_osd_args passed
426 objectstore_tool
$dir $
(get_not_primary
$poolname obj1
) obj1 list-attrs ||
return 1
427 rados
--pool $poolname get obj1
$dir/COPY ||
return 1
428 diff $dir/ORIGINAL
$dir/COPY ||
return 1
429 grep scrub_finish
$dir/osd.
${primary}.log
432 objectstore_tool
$dir $
(get_primary
$poolname SOMETHING
) obj2 remove ||
return 1
437 ceph pg dump pgs |
grep -q "^$(pgid).* active+clean " ||
return 1
438 grep scrub_finish
$dir/osd.
${primary}.log
441 teardown
$dir ||
return 1
444 function TEST_auto_repair_bluestore_failed_norecov
() {
446 local poolname
=testpool
448 # Launch a cluster with 5 seconds scrub interval
449 setup
$dir ||
return 1
450 run_mon
$dir a ||
return 1
451 run_mgr
$dir x ||
return 1
452 local ceph_osd_args
="--osd-scrub-auto-repair=true \
453 --osd_deep_scrub_randomize_ratio=0 \
454 --osd-scrub-interval-randomize-ratio=0"
455 for id
in $
(seq 0 2) ; do
456 run_osd_bluestore
$dir $id $ceph_osd_args ||
return 1
459 create_pool
$poolname 1 1 ||
return 1
460 ceph osd pool
set $poolname size
2
461 wait_for_clean ||
return 1
465 echo $payload > $dir/ORIGINAL
468 rados
--pool $poolname put obj
$i $dir/ORIGINAL ||
return 1
471 # Remove the object from one shard physically
472 # Restarted osd get $ceph_osd_args passed
473 # obj1 can't be repaired
474 objectstore_tool
$dir $
(get_not_primary
$poolname SOMETHING
) obj1 remove ||
return 1
475 objectstore_tool
$dir $
(get_primary
$poolname SOMETHING
) obj1 rm-attr _ ||
return 1
476 # obj2 can't be repaired
477 objectstore_tool
$dir $
(get_not_primary
$poolname SOMETHING
) obj2 remove ||
return 1
478 objectstore_tool
$dir $
(get_primary
$poolname SOMETHING
) obj2 rm-attr _ ||
return 1
480 local pgid
=$
(get_pg
$poolname obj1
)
481 local primary
=$
(get_primary
$poolname obj1
)
482 local last_scrub_stamp
="$(get_last_scrub_stamp $pgid)"
483 CEPH_ARGS
='' ceph daemon $
(get_asok_path osd.
$primary) trigger_deep_scrub
$pgid
484 CEPH_ARGS
='' ceph daemon $
(get_asok_path osd.
$primary) trigger_scrub
$pgid
486 # Wait for auto repair
487 wait_for_scrub
$pgid "$last_scrub_stamp" ||
return 1
488 wait_for_clean ||
return 1
490 grep -q "scrub_finish.*present with no repair possible" $dir/osd.
${primary}.log ||
return 1
492 ceph pg dump pgs |
grep -q "^$(pgid).*+failed_repair" ||
return 1
495 teardown
$dir ||
return 1
498 function TEST_repair_stats
() {
500 local poolname
=testpool
503 # This need to be an even number
506 # Launch a cluster with 5 seconds scrub interval
507 setup
$dir ||
return 1
508 run_mon
$dir a ||
return 1
509 run_mgr
$dir x ||
return 1
510 local ceph_osd_args
="--osd_deep_scrub_randomize_ratio=0 \
511 --osd-scrub-interval-randomize-ratio=0"
512 for id
in $
(seq 0 $
(expr $OSDS - 1)) ; do
513 run_osd_bluestore
$dir $id $ceph_osd_args ||
return 1
516 create_pool
$poolname 1 1 ||
return 1
517 ceph osd pool
set $poolname size
2
518 wait_for_clean ||
return 1
522 echo $payload > $dir/ORIGINAL
523 for i
in $
(seq 1 $OBJS)
525 rados
--pool $poolname put obj
$i $dir/ORIGINAL ||
return 1
528 # Remove the object from one shard physically
529 # Restarted osd get $ceph_osd_args passed
530 local other
=$
(get_not_primary
$poolname obj1
)
531 local pgid
=$
(get_pg
$poolname obj1
)
532 local primary
=$
(get_primary
$poolname obj1
)
534 kill_daemons
$dir TERM osd.
$other >&2 < /dev
/null ||
return 1
535 kill_daemons
$dir TERM osd.
$primary >&2 < /dev
/null ||
return 1
536 for i
in $
(seq 1 $REPAIRS)
538 # Remove from both osd.0 and osd.1
540 _objectstore_tool_nodown
$dir $OSD obj
$i remove ||
return 1
542 run_osd_bluestore
$dir $primary $ceph_osd_args ||
return 1
543 run_osd_bluestore
$dir $other $ceph_osd_args ||
return 1
544 wait_for_clean ||
return 1
547 wait_for_clean ||
return 1
550 # This should have caused 1 object to be repaired
551 ceph pg
$pgid query | jq
'.info.stats.stat_sum'
552 COUNT
=$
(ceph pg
$pgid query | jq
'.info.stats.stat_sum.num_objects_repaired')
553 test "$COUNT" = "$REPAIRS" ||
return 1
555 ceph pg dump
--format=json-pretty | jq
".pg_map.osd_stats[] | select(.osd == $primary )"
556 COUNT
=$
(ceph pg dump
--format=json-pretty | jq
".pg_map.osd_stats[] | select(.osd == $primary ).num_shards_repaired")
557 test "$COUNT" = "$(expr $REPAIRS / 2)" ||
return 1
559 ceph pg dump
--format=json-pretty | jq
".pg_map.osd_stats[] | select(.osd == $other )"
560 COUNT
=$
(ceph pg dump
--format=json-pretty | jq
".pg_map.osd_stats[] | select(.osd == $other ).num_shards_repaired")
561 test "$COUNT" = "$(expr $REPAIRS / 2)" ||
return 1
563 ceph pg dump
--format=json-pretty | jq
".pg_map.osd_stats_sum"
564 COUNT
=$
(ceph pg dump
--format=json-pretty | jq
".pg_map.osd_stats_sum.num_shards_repaired")
565 test "$COUNT" = "$REPAIRS" ||
return 1
568 teardown
$dir ||
return 1
571 function TEST_repair_stats_ec
() {
573 local poolname
=testpool
576 # This need to be an even number
578 local allow_overwrites
=false
580 # Launch a cluster with 5 seconds scrub interval
581 setup
$dir ||
return 1
582 run_mon
$dir a ||
return 1
583 run_mgr
$dir x ||
return 1
584 local ceph_osd_args
="--osd_deep_scrub_randomize_ratio=0 \
585 --osd-scrub-interval-randomize-ratio=0"
586 for id
in $
(seq 0 $
(expr $OSDS - 1)) ; do
587 run_osd_bluestore
$dir $id $ceph_osd_args ||
return 1
591 create_ec_pool
$poolname $allow_overwrites k
=2 m
=1 ||
return 1
595 echo $payload > $dir/ORIGINAL
596 for i
in $
(seq 1 $OBJS)
598 rados
--pool $poolname put obj
$i $dir/ORIGINAL ||
return 1
601 # Remove the object from one shard physically
602 # Restarted osd get $ceph_osd_args passed
603 local other
=$
(get_not_primary
$poolname obj1
)
604 local pgid
=$
(get_pg
$poolname obj1
)
605 local primary
=$
(get_primary
$poolname obj1
)
607 kill_daemons
$dir TERM osd.
$other >&2 < /dev
/null ||
return 1
608 kill_daemons
$dir TERM osd.
$primary >&2 < /dev
/null ||
return 1
609 for i
in $
(seq 1 $REPAIRS)
611 # Remove from both osd.0 and osd.1
613 _objectstore_tool_nodown
$dir $OSD obj
$i remove ||
return 1
615 run_osd_bluestore
$dir $primary $ceph_osd_args ||
return 1
616 run_osd_bluestore
$dir $other $ceph_osd_args ||
return 1
617 wait_for_clean ||
return 1
620 wait_for_clean ||
return 1
623 # This should have caused 1 object to be repaired
624 ceph pg
$pgid query | jq
'.info.stats.stat_sum'
625 COUNT
=$
(ceph pg
$pgid query | jq
'.info.stats.stat_sum.num_objects_repaired')
626 test "$COUNT" = "$REPAIRS" ||
return 1
628 for osd
in $
(seq 0 $
(expr $OSDS - 1)) ; do
629 if [ $osd = $other -o $osd = $primary ]; then
630 repair
=$
(expr $REPAIRS / 2)
635 ceph pg dump
--format=json-pretty | jq
".pg_map.osd_stats[] | select(.osd == $osd )"
636 COUNT
=$
(ceph pg dump
--format=json-pretty | jq
".pg_map.osd_stats[] | select(.osd == $osd ).num_shards_repaired")
637 test "$COUNT" = "$repair" ||
return 1
640 ceph pg dump
--format=json-pretty | jq
".pg_map.osd_stats_sum"
641 COUNT
=$
(ceph pg dump
--format=json-pretty | jq
".pg_map.osd_stats_sum.num_shards_repaired")
642 test "$COUNT" = "$REPAIRS" ||
return 1
645 teardown
$dir ||
return 1
648 function corrupt_and_repair_jerasure
() {
650 local allow_overwrites
=$2
651 local poolname
=ecpool
653 setup
$dir ||
return 1
654 run_mon
$dir a ||
return 1
655 run_mgr
$dir x ||
return 1
656 for id
in $
(seq 0 3) ; do
657 if [ "$allow_overwrites" = "true" ]; then
658 run_osd_bluestore
$dir $id ||
return 1
660 run_osd
$dir $id ||
return 1
663 create_rbd_pool ||
return 1
664 wait_for_clean ||
return 1
666 create_ec_pool
$poolname $allow_overwrites k
=2 m
=2 ||
return 1
667 corrupt_and_repair_erasure_coded
$dir $poolname ||
return 1
669 teardown
$dir ||
return 1
672 function TEST_corrupt_and_repair_jerasure_appends
() {
673 corrupt_and_repair_jerasure
$1 false
676 function TEST_corrupt_and_repair_jerasure_overwrites
() {
677 if [ "$use_ec_overwrite" = "true" ]; then
678 corrupt_and_repair_jerasure
$1 true
682 function corrupt_and_repair_lrc
() {
684 local allow_overwrites
=$2
685 local poolname
=ecpool
687 setup
$dir ||
return 1
688 run_mon
$dir a ||
return 1
689 run_mgr
$dir x ||
return 1
690 for id
in $
(seq 0 9) ; do
691 if [ "$allow_overwrites" = "true" ]; then
692 run_osd_bluestore
$dir $id ||
return 1
694 run_osd
$dir $id ||
return 1
697 create_rbd_pool ||
return 1
698 wait_for_clean ||
return 1
700 create_ec_pool
$poolname $allow_overwrites k
=4 m
=2 l
=3 plugin
=lrc ||
return 1
701 corrupt_and_repair_erasure_coded
$dir $poolname ||
return 1
703 teardown
$dir ||
return 1
706 function TEST_corrupt_and_repair_lrc_appends
() {
707 corrupt_and_repair_lrc
$1 false
710 function TEST_corrupt_and_repair_lrc_overwrites
() {
711 if [ "$use_ec_overwrite" = "true" ]; then
712 corrupt_and_repair_lrc
$1 true
716 function unfound_erasure_coded
() {
718 local allow_overwrites
=$2
719 local poolname
=ecpool
722 setup
$dir ||
return 1
723 run_mon
$dir a ||
return 1
724 run_mgr
$dir x ||
return 1
725 for id
in $
(seq 0 3) ; do
726 if [ "$allow_overwrites" = "true" ]; then
727 run_osd_bluestore
$dir $id ||
return 1
729 run_osd
$dir $id ||
return 1
733 create_ec_pool
$poolname $allow_overwrites k
=2 m
=2 ||
return 1
735 add_something
$dir $poolname ||
return 1
737 local primary
=$
(get_primary
$poolname SOMETHING
)
738 local -a osds
=($
(get_osds
$poolname SOMETHING |
sed -e "s/$primary//"))
739 local not_primary_first
=${osds[0]}
740 local not_primary_second
=${osds[1]}
741 local not_primary_third
=${osds[2]}
744 # 1) remove the corresponding file from the OSDs
747 run_in_background pids objectstore_tool
$dir $not_primary_first SOMETHING remove
748 run_in_background pids objectstore_tool
$dir $not_primary_second SOMETHING remove
749 run_in_background pids objectstore_tool
$dir $not_primary_third SOMETHING remove
752 if [ $return_code -ne 0 ]; then return $return_code; fi
757 local pg
=$
(get_pg
$poolname SOMETHING
)
762 # it may take a bit to appear due to mon/mgr asynchrony
763 for f
in `seq 1 60`; do
764 ceph
-s |
grep "1/1 objects unfound" && break
767 ceph
-s|
grep "4 up" ||
return 1
768 ceph
-s|
grep "4 in" ||
return 1
769 ceph
-s|
grep "1/1 objects unfound" ||
return 1
771 teardown
$dir ||
return 1
774 function TEST_unfound_erasure_coded_appends
() {
775 unfound_erasure_coded
$1 false
778 function TEST_unfound_erasure_coded_overwrites
() {
779 if [ "$use_ec_overwrite" = "true" ]; then
780 unfound_erasure_coded
$1 true
785 # list_missing for EC pool
787 function list_missing_erasure_coded
() {
789 local allow_overwrites
=$2
790 local poolname
=ecpool
792 setup
$dir ||
return 1
793 run_mon
$dir a ||
return 1
794 run_mgr
$dir x ||
return 1
795 for id
in $
(seq 0 2) ; do
796 if [ "$allow_overwrites" = "true" ]; then
797 run_osd_bluestore
$dir $id ||
return 1
799 run_osd
$dir $id ||
return 1
802 create_rbd_pool ||
return 1
803 wait_for_clean ||
return 1
805 create_ec_pool
$poolname $allow_overwrites k
=2 m
=1 ||
return 1
807 # Put an object and remove the two shards (including primary)
808 add_something
$dir $poolname MOBJ0 ||
return 1
809 local -a osds0
=($
(get_osds
$poolname MOBJ0
))
811 # Put another object and remove two shards (excluding primary)
812 add_something
$dir $poolname MOBJ1 ||
return 1
813 local -a osds1
=($
(get_osds
$poolname MOBJ1
))
815 # Stop all osd daemons
816 for id
in $
(seq 0 2) ; do
817 kill_daemons
$dir TERM osd.
$id >&2 < /dev
/null ||
return 1
821 ceph-objectstore-tool
--data-path $dir/$id \
822 MOBJ0 remove ||
return 1
824 ceph-objectstore-tool
--data-path $dir/$id \
825 MOBJ0 remove ||
return 1
828 ceph-objectstore-tool
--data-path $dir/$id \
829 MOBJ1 remove ||
return 1
831 ceph-objectstore-tool
--data-path $dir/$id \
832 MOBJ1 remove ||
return 1
834 for id
in $
(seq 0 2) ; do
835 activate_osd
$dir $id >&2 ||
return 1
837 create_rbd_pool ||
return 1
838 wait_for_clean ||
return 1
840 # Get get - both objects should in the same PG
841 local pg
=$
(get_pg
$poolname MOBJ0
)
843 # Repair the PG, which triggers the recovering,
844 # and should mark the object as unfound
847 for i
in $
(seq 0 120) ; do
848 [ $i -lt 60 ] ||
return 1
849 matches
=$
(ceph pg
$pg list_unfound |
egrep "MOBJ0|MOBJ1" |
wc -l)
850 [ $matches -eq 2 ] && break
853 teardown
$dir ||
return 1
856 function TEST_list_missing_erasure_coded_appends
() {
857 list_missing_erasure_coded
$1 false
860 function TEST_list_missing_erasure_coded_overwrites
() {
861 if [ "$use_ec_overwrite" = "true" ]; then
862 list_missing_erasure_coded
$1 true
867 # Corrupt one copy of a replicated pool
869 function TEST_corrupt_scrub_replicated
() {
871 local poolname
=csr_pool
874 setup
$dir ||
return 1
875 run_mon
$dir a
--osd_pool_default_size=2 ||
return 1
876 run_mgr
$dir x ||
return 1
877 run_osd
$dir 0 ||
return 1
878 run_osd
$dir 1 ||
return 1
879 create_rbd_pool ||
return 1
880 wait_for_clean ||
return 1
882 create_pool foo
1 ||
return 1
883 create_pool
$poolname 1 1 ||
return 1
884 wait_for_clean ||
return 1
886 for i
in $
(seq 1 $total_objs) ; do
888 add_something
$dir $poolname $objname ||
return 1
890 rados
--pool $poolname setomapheader
$objname hdr-
$objname ||
return 1
891 rados
--pool $poolname setomapval
$objname key-
$objname val-
$objname ||
return 1
894 local pg
=$
(get_pg
$poolname ROBJ0
)
895 local primary
=$
(get_primary
$poolname ROBJ0
)
897 # Compute an old omap digest and save oi
898 CEPH_ARGS
='' ceph daemon $
(get_asok_path osd
.0) \
899 config
set osd_deep_scrub_update_digest_min_age
0
900 CEPH_ARGS
='' ceph daemon $
(get_asok_path osd
.1) \
901 config
set osd_deep_scrub_update_digest_min_age
0
904 for i
in $
(seq 1 $total_objs) ; do
907 # Alternate corruption between osd.0 and osd.1
908 local osd
=$
(expr $i % 2)
912 # Size (deep scrub data_digest too)
913 local payload
=UVWXYZZZ
914 echo $payload > $dir/CORRUPT
915 objectstore_tool
$dir $osd $objname set-bytes
$dir/CORRUPT ||
return 1
919 # digest (deep scrub only)
921 echo $payload > $dir/CORRUPT
922 objectstore_tool
$dir $osd $objname set-bytes
$dir/CORRUPT ||
return 1
927 objectstore_tool
$dir $osd $objname remove ||
return 1
931 # Modify omap value (deep scrub only)
932 objectstore_tool
$dir $osd $objname set-omap key-
$objname $dir/CORRUPT ||
return 1
936 # Delete omap key (deep scrub only)
937 objectstore_tool
$dir $osd $objname rm-omap key-
$objname ||
return 1
941 # Add extra omap key (deep scrub only)
942 echo extra
> $dir/extra-val
943 objectstore_tool
$dir $osd $objname set-omap key2-
$objname $dir/extra-val ||
return 1
948 # Modify omap header (deep scrub only)
949 echo -n newheader
> $dir/hdr
950 objectstore_tool
$dir $osd $objname set-omaphdr
$dir/hdr ||
return 1
955 rados
--pool $poolname setxattr
$objname key1-
$objname val1-
$objname ||
return 1
956 rados
--pool $poolname setxattr
$objname key2-
$objname val2-
$objname ||
return 1
959 echo -n bad-val
> $dir/bad-val
960 objectstore_tool
$dir $osd $objname set-attr _key1-
$objname $dir/bad-val ||
return 1
961 objectstore_tool
$dir $osd $objname rm-attr _key2-
$objname ||
return 1
962 echo -n val3-
$objname > $dir/newval
963 objectstore_tool
$dir $osd $objname set-attr _key3-
$objname $dir/newval ||
return 1
964 rm $dir/bad-val
$dir/newval
968 objectstore_tool
$dir $osd $objname get-attr _
> $dir/robj9-oi
969 echo -n D
> $dir/change
970 rados
--pool $poolname put
$objname $dir/change
971 objectstore_tool
$dir $osd $objname set-attr _
$dir/robj9-oi
972 rm $dir/oi
$dir/change
975 # ROBJ10 must be handled after digests are re-computed by a deep scrub below
976 # ROBJ11 must be handled with config change before deep scrub
977 # ROBJ12 must be handled with config change before scrubs
978 # ROBJ13 must be handled before scrubs
981 echo -n bad-val
> $dir/bad-val
982 objectstore_tool
$dir 0 $objname set-attr _
$dir/bad-val ||
return 1
983 objectstore_tool
$dir 1 $objname rm-attr _ ||
return 1
988 objectstore_tool
$dir $osd $objname rm-attr _ ||
return 1
992 objectstore_tool
$dir 0 $objname rm-attr snapset ||
return 1
993 echo -n bad-val
> $dir/bad-val
994 objectstore_tool
$dir 1 $objname set-attr snapset
$dir/bad-val ||
return 1
998 # Deep-scrub only (all replicas are diffent than the object info
1000 echo $payload > $dir/new.ROBJ17
1001 objectstore_tool
$dir 0 $objname set-bytes
$dir/new.ROBJ17 ||
return 1
1002 objectstore_tool
$dir 1 $objname set-bytes
$dir/new.ROBJ17 ||
return 1
1006 # Deep-scrub only (all replicas are diffent than the object info
1007 local payload
=ROBJ18
1008 echo $payload > $dir/new.ROBJ18
1009 objectstore_tool
$dir 0 $objname set-bytes
$dir/new.ROBJ18 ||
return 1
1010 objectstore_tool
$dir 1 $objname set-bytes
$dir/new.ROBJ18 ||
return 1
1011 # Make one replica have a different object info, so a full repair must happen too
1012 objectstore_tool
$dir $osd $objname corrupt-info ||
return 1
1017 local pg
=$
(get_pg
$poolname ROBJ0
)
1019 inject_eio rep data
$poolname ROBJ11
$dir 0 ||
return 1 # shard 0 of [1, 0], osd.1
1020 inject_eio rep mdata
$poolname ROBJ12
$dir 1 ||
return 1 # shard 1 of [1, 0], osd.0
1021 inject_eio rep mdata
$poolname ROBJ13
$dir 1 ||
return 1 # shard 1 of [1, 0], osd.0
1022 inject_eio rep data
$poolname ROBJ13
$dir 0 ||
return 1 # shard 0 of [1, 0], osd.1
1027 declare -a s_err_strings
1028 err_strings
[0]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:30259878:::ROBJ15:head : candidate had a missing info key"
1029 err_strings
[1]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:33aca486:::ROBJ18:head : object info inconsistent "
1030 err_strings
[2]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:5c7b2c47:::ROBJ16:head : candidate had a corrupt snapset"
1031 err_strings
[3]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:5c7b2c47:::ROBJ16:head : candidate had a missing snapset key"
1032 err_strings
[4]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:5c7b2c47:::ROBJ16:head : failed to pick suitable object info"
1033 err_strings
[5]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:86586531:::ROBJ8:head : attr value mismatch '_key1-ROBJ8', attr name mismatch '_key3-ROBJ8', attr name mismatch '_key2-ROBJ8'"
1034 err_strings
[6]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:bc819597:::ROBJ12:head : candidate had a stat error"
1035 err_strings
[7]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:c0c86b1d:::ROBJ14:head : candidate had a missing info key"
1036 err_strings
[8]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:c0c86b1d:::ROBJ14:head : candidate had a corrupt info"
1037 err_strings
[9]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:c0c86b1d:::ROBJ14:head : failed to pick suitable object info"
1038 err_strings
[10]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:ce3f1d6a:::ROBJ1:head : candidate size 9 info size 7 mismatch"
1039 err_strings
[11]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:ce3f1d6a:::ROBJ1:head : size 9 != size 7 from auth oi 3:ce3f1d6a:::ROBJ1:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od f5fba2c6 alloc_hint [[]0 0 0[]][)], size 9 != size 7 from shard 0"
1040 err_strings
[12]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:d60617f9:::ROBJ13:head : candidate had a stat error"
1041 err_strings
[13]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 3:f2a5b2a4:::ROBJ3:head : missing"
1042 err_strings
[14]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:ffdb2004:::ROBJ9:head : candidate size 1 info size 7 mismatch"
1043 err_strings
[15]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:ffdb2004:::ROBJ9:head : object info inconsistent "
1044 err_strings
[16]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 3:c0c86b1d:::ROBJ14:head : no '_' attr"
1045 err_strings
[17]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 3:5c7b2c47:::ROBJ16:head : can't decode 'snapset' attr buffer::malformed_input: .* no longer understand old encoding version 3 < 97"
1046 err_strings
[18]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub : stat mismatch, got 18/18 objects, 0/0 clones, 17/18 dirty, 17/18 omap, 0/0 pinned, 0/0 hit_set_archive, 0/0 whiteouts, 113/120 bytes, 0/0 manifest objects, 0/0 hit_set_archive bytes."
1047 err_strings
[19]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 1 missing, 7 inconsistent objects"
1048 err_strings
[20]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 17 errors"
1050 for err_string
in "${err_strings[@]}"
1052 if ! grep -q "$err_string" $dir/osd.
${primary}.log
1054 echo "Missing log message '$err_string'"
1055 ERRORS
=$
(expr $ERRORS + 1)
1059 rados list-inconsistent-pg
$poolname > $dir/json ||
return 1
1061 test $
(jq
'. | length' $dir/json
) = "1" ||
return 1
1063 test $
(jq
-r '.[0]' $dir/json
) = $pg ||
return 1
1065 rados list-inconsistent-obj
$pg > $dir/json ||
return 1
1066 # Get epoch for repair-get requests
1067 epoch
=$
(jq .epoch
$dir/json
)
1069 jq
"$jqfilter" << EOF | jq '.inconsistents' | python -c "$sortkeys" > $dir/checkcsjson
1092 "prior_version": "21'3",
1093 "last_reqid": "osd.1.0:57",
1107 "data_digest": "0x2ddbf8f5",
1108 "omap_digest": "0xf5fba2c6",
1109 "expected_object_size": 0,
1110 "expected_write_size": 0,
1111 "alloc_hint_flags": 0,
1119 "size_mismatch_info",
1120 "obj_size_info_mismatch"
1126 "selected_object_info": {
1137 "prior_version": "21'3",
1138 "last_reqid": "osd.1.0:57",
1141 "mtime": "2018-04-05 14:33:19.804040",
1142 "local_mtime": "2018-04-05 14:33:19.804839",
1152 "data_digest": "0x2ddbf8f5",
1153 "omap_digest": "0xf5fba2c6",
1154 "expected_object_size": 0,
1155 "expected_write_size": 0,
1156 "alloc_hint_flags": 0,
1162 "union_shard_errors": [
1163 "size_mismatch_info",
1164 "obj_size_info_mismatch"
1193 "selected_object_info": {
1204 "prior_version": "43'36",
1205 "last_reqid": "osd.1.0:55",
1219 "data_digest": "0x2ddbf8f5",
1220 "omap_digest": "0x067f306a",
1221 "expected_object_size": 0,
1222 "expected_write_size": 0,
1223 "alloc_hint_flags": 0,
1229 "union_shard_errors": [
1257 "selected_object_info": {
1268 "prior_version": "45'39",
1269 "last_reqid": "osd.1.0:58",
1283 "data_digest": "0x2ddbf8f5",
1284 "omap_digest": "0x6441854d",
1285 "expected_object_size": 0,
1286 "expected_write_size": 0,
1287 "alloc_hint_flags": 0,
1293 "union_shard_errors": [
1308 "object_info": "bad-val",
1325 "union_shard_errors": [
1352 "prior_version": "49'45",
1353 "last_reqid": "osd.1.0:48",
1356 "mtime": "2018-04-05 14:33:29.498969",
1357 "local_mtime": "2018-04-05 14:33:29.499890",
1367 "data_digest": "0x2ddbf8f5",
1368 "omap_digest": "0x2d2a4d6e",
1369 "expected_object_size": 0,
1370 "expected_write_size": 0,
1371 "alloc_hint_flags": 0,
1391 "selected_object_info": {
1402 "prior_version": "49'45",
1403 "last_reqid": "osd.1.0:48",
1417 "data_digest": "0x2ddbf8f5",
1418 "omap_digest": "0x2d2a4d6e",
1419 "expected_object_size": 0,
1420 "expected_write_size": 0,
1421 "alloc_hint_flags": 0,
1427 "union_shard_errors": [
1463 "snapset": "bad-val",
1467 "union_shard_errors": [
1474 "object_info_inconsistency"
1482 "selected_object_info": {
1483 "alloc_hint_flags": 255,
1484 "data_digest": "0x2ddbf8f5",
1485 "expected_object_size": 0,
1486 "expected_write_size": 0,
1506 "omap_digest": "0xddc3680f",
1517 "alloc_hint_flags": 0,
1518 "data_digest": "0x2ddbf8f5",
1519 "expected_object_size": 0,
1520 "expected_write_size": 0,
1540 "omap_digest": "0xddc3680f",
1554 "alloc_hint_flags": 255,
1555 "data_digest": "0x2ddbf8f5",
1556 "expected_object_size": 0,
1557 "expected_write_size": 0,
1577 "omap_digest": "0xddc3680f",
1589 "union_shard_errors": []
1607 "selected_object_info": {
1618 "prior_version": "25'9",
1619 "last_reqid": "osd.1.0:60",
1633 "data_digest": "0x2ddbf8f5",
1634 "omap_digest": "0x00b35dfd",
1635 "expected_object_size": 0,
1636 "expected_write_size": 0,
1637 "alloc_hint_flags": 0,
1643 "union_shard_errors": [
1662 "name": "key1-ROBJ8"
1666 "value": "val2-ROBJ8",
1667 "name": "key2-ROBJ8"
1679 "value": "val1-ROBJ8",
1680 "name": "key1-ROBJ8"
1684 "value": "val3-ROBJ8",
1685 "name": "key3-ROBJ8"
1694 "selected_object_info": {
1705 "prior_version": "79'65",
1706 "last_reqid": "client.4554.0:1",
1720 "data_digest": "0x2ddbf8f5",
1721 "omap_digest": "0xd6be81dc",
1722 "expected_object_size": 0,
1723 "expected_write_size": 0,
1724 "alloc_hint_flags": 0,
1730 "union_shard_errors": [],
1732 "attr_value_mismatch",
1733 "attr_name_mismatch"
1757 "prior_version": "51'64",
1758 "last_reqid": "client.4649.0:1",
1772 "data_digest": "0x2b63260d",
1773 "omap_digest": "0x2eecc539",
1774 "expected_object_size": 0,
1775 "expected_write_size": 0,
1776 "alloc_hint_flags": 0,
1799 "prior_version": "37'27",
1800 "last_reqid": "osd.1.0:63",
1803 "mtime": "2018-04-05 14:33:25.352485",
1804 "local_mtime": "2018-04-05 14:33:25.353746",
1814 "data_digest": "0x2ddbf8f5",
1815 "omap_digest": "0x2eecc539",
1816 "expected_object_size": 0,
1817 "expected_write_size": 0,
1818 "alloc_hint_flags": 0,
1826 "obj_size_info_mismatch"
1832 "selected_object_info": {
1843 "prior_version": "51'64",
1844 "last_reqid": "client.4649.0:1",
1858 "data_digest": "0x2b63260d",
1859 "omap_digest": "0x2eecc539",
1860 "expected_object_size": 0,
1861 "expected_write_size": 0,
1862 "alloc_hint_flags": 0,
1868 "union_shard_errors": [
1869 "obj_size_info_mismatch"
1872 "object_info_inconsistency"
1887 jq
"$jqfilter" $dir/json | jq
'.inconsistents' | python
-c "$sortkeys" > $dir/csjson
1888 multidiff
$dir/checkcsjson
$dir/csjson ||
test $getjson = "yes" ||
return 1
1889 if test $getjson = "yes"
1891 jq
'.' $dir/json
> save1.json
1894 if test "$LOCALRUN" = "yes" && which jsonschema
> /dev
/null
;
1896 jsonschema
-i $dir/json
$CEPH_ROOT/doc
/rados
/command
/list-inconsistent-obj.json ||
return 1
1900 # Change data and size again because digest was recomputed
1901 echo -n ZZZ
> $dir/change
1902 rados
--pool $poolname put
$objname $dir/change
1903 # Set one to an even older value
1904 objectstore_tool
$dir 0 $objname set-attr _
$dir/robj9-oi
1905 rm $dir/oi
$dir/change
1908 objectstore_tool
$dir 1 $objname get-attr _
> $dir/oi
1909 rados
--pool $poolname setomapval
$objname key2-
$objname val2-
$objname
1910 objectstore_tool
$dir 0 $objname set-attr _
$dir/oi
1911 objectstore_tool
$dir 1 $objname set-attr _
$dir/oi
1914 inject_eio rep data
$poolname ROBJ11
$dir 0 ||
return 1 # shard 0 of [1, 0], osd.1
1915 inject_eio rep mdata
$poolname ROBJ12
$dir 1 ||
return 1 # shard 1 of [1, 0], osd.0
1916 inject_eio rep mdata
$poolname ROBJ13
$dir 1 ||
return 1 # shard 1 of [1, 0], osd.0
1917 inject_eio rep data
$poolname ROBJ13
$dir 0 ||
return 1 # shard 0 of [1, 0], osd.1
1921 err_strings
[0]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:30259878:::ROBJ15:head : candidate had a missing info key"
1922 err_strings
[1]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:33aca486:::ROBJ18:head : data_digest 0xbd89c912 != data_digest 0x2ddbf8f5 from auth oi 3:33aca486:::ROBJ18:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 54 dd 2ddbf8f5 od ddc3680f alloc_hint [[]0 0 255[]][)], object info inconsistent "
1923 err_strings
[2]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:33aca486:::ROBJ18:head : data_digest 0xbd89c912 != data_digest 0x2ddbf8f5 from auth oi 3:33aca486:::ROBJ18:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 54 dd 2ddbf8f5 od ddc3680f alloc_hint [[]0 0 255[]][)]"
1924 err_strings
[3]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:33aca486:::ROBJ18:head : failed to pick suitable auth object"
1925 err_strings
[4]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:5c7b2c47:::ROBJ16:head : candidate had a corrupt snapset"
1926 err_strings
[5]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:5c7b2c47:::ROBJ16:head : candidate had a missing snapset key"
1927 err_strings
[6]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:5c7b2c47:::ROBJ16:head : failed to pick suitable object info"
1928 err_strings
[7]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:86586531:::ROBJ8:head : attr value mismatch '_key1-ROBJ8', attr name mismatch '_key3-ROBJ8', attr name mismatch '_key2-ROBJ8'"
1929 err_strings
[8]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:87abbf36:::ROBJ11:head : candidate had a read error"
1930 err_strings
[9]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:8aa5320e:::ROBJ17:head : data_digest 0x5af0c3ef != data_digest 0x2ddbf8f5 from auth oi 3:8aa5320e:::ROBJ17:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 51 dd 2ddbf8f5 od e9572720 alloc_hint [[]0 0 0[]][)]"
1931 err_strings
[10]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:8aa5320e:::ROBJ17:head : data_digest 0x5af0c3ef != data_digest 0x2ddbf8f5 from auth oi 3:8aa5320e:::ROBJ17:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 51 dd 2ddbf8f5 od e9572720 alloc_hint [[]0 0 0[]][)]"
1932 err_strings
[11]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:8aa5320e:::ROBJ17:head : failed to pick suitable auth object"
1933 err_strings
[12]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:8b55fa4b:::ROBJ7:head : omap_digest 0xefced57a != omap_digest 0x6a73cc07 from shard 1"
1934 err_strings
[13]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:8b55fa4b:::ROBJ7:head : omap_digest 0x6a73cc07 != omap_digest 0xefced57a from auth oi 3:8b55fa4b:::ROBJ7:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 21 dd 2ddbf8f5 od efced57a alloc_hint [[]0 0 0[]][)]"
1935 err_strings
[14]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:a53c12e8:::ROBJ6:head : omap_digest 0x689ee887 != omap_digest 0x179c919f from shard 1, omap_digest 0x689ee887 != omap_digest 0x179c919f from auth oi 3:a53c12e8:::ROBJ6:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 18 dd 2ddbf8f5 od 179c919f alloc_hint [[]0 0 0[]][)]"
1936 err_strings
[15]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:b1f19cbd:::ROBJ10:head : omap_digest 0xa8dd5adc != omap_digest 0xc2025a24 from auth oi 3:b1f19cbd:::ROBJ10:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 30 dd 2ddbf8f5 od c2025a24 alloc_hint [[]0 0 0[]][)]"
1937 err_strings
[16]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:b1f19cbd:::ROBJ10:head : omap_digest 0xa8dd5adc != omap_digest 0xc2025a24 from auth oi 3:b1f19cbd:::ROBJ10:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 30 dd 2ddbf8f5 od c2025a24 alloc_hint [[]0 0 0[]][)]"
1938 err_strings
[17]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:b1f19cbd:::ROBJ10:head : failed to pick suitable auth object"
1939 err_strings
[18]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:bc819597:::ROBJ12:head : candidate had a stat error"
1940 err_strings
[19]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:c0c86b1d:::ROBJ14:head : candidate had a missing info key"
1941 err_strings
[20]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:c0c86b1d:::ROBJ14:head : candidate had a corrupt info"
1942 err_strings
[21]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:c0c86b1d:::ROBJ14:head : failed to pick suitable object info"
1943 err_strings
[22]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:ce3f1d6a:::ROBJ1:head : candidate size 9 info size 7 mismatch"
1944 err_strings
[23]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:ce3f1d6a:::ROBJ1:head : data_digest 0x2d4a11c2 != data_digest 0x2ddbf8f5 from shard 0, data_digest 0x2d4a11c2 != data_digest 0x2ddbf8f5 from auth oi 3:ce3f1d6a:::ROBJ1:head[(][0-9]*'[0-9]* osd.1.0:65 dirty|omap|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od f5fba2c6 alloc_hint [[]0 0 0[]][)], size 9 != size 7 from auth oi 3:ce3f1d6a:::ROBJ1:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od f5fba2c6 alloc_hint [[]0 0 0[]][)], size 9 != size 7 from shard 0"
1945 err_strings
[24]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:d60617f9:::ROBJ13:head : candidate had a read error"
1946 err_strings
[25]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:d60617f9:::ROBJ13:head : candidate had a stat error"
1947 err_strings
[26]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:d60617f9:::ROBJ13:head : failed to pick suitable object info"
1948 err_strings
[27]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:e97ce31e:::ROBJ2:head : data_digest 0x578a4830 != data_digest 0x2ddbf8f5 from shard 1, data_digest 0x578a4830 != data_digest 0x2ddbf8f5 from auth oi 3:e97ce31e:::ROBJ2:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 6 dd 2ddbf8f5 od f8e11918 alloc_hint [[]0 0 0[]][)]"
1949 err_strings
[28]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 3:f2a5b2a4:::ROBJ3:head : missing"
1950 err_strings
[29]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:f4981d31:::ROBJ4:head : omap_digest 0xd7178dfe != omap_digest 0xe2d46ea4 from shard 1, omap_digest 0xd7178dfe != omap_digest 0xe2d46ea4 from auth oi 3:f4981d31:::ROBJ4:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 12 dd 2ddbf8f5 od e2d46ea4 alloc_hint [[]0 0 0[]][)]"
1951 err_strings
[30]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:f4bfd4d1:::ROBJ5:head : omap_digest 0x1a862a41 != omap_digest 0x6cac8f6 from shard 1"
1952 err_strings
[31]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:f4bfd4d1:::ROBJ5:head : omap_digest 0x6cac8f6 != omap_digest 0x1a862a41 from auth oi 3:f4bfd4d1:::ROBJ5:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 15 dd 2ddbf8f5 od 1a862a41 alloc_hint [[]0 0 0[]][)]"
1953 err_strings
[32]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:ffdb2004:::ROBJ9:head : candidate size 3 info size 7 mismatch"
1954 err_strings
[33]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:ffdb2004:::ROBJ9:head : object info inconsistent "
1955 err_strings
[34]="log_channel[(]cluster[)] log [[]ERR[]] : deep-scrub [0-9]*[.]0 3:c0c86b1d:::ROBJ14:head : no '_' attr"
1956 err_strings
[35]="log_channel[(]cluster[)] log [[]ERR[]] : deep-scrub [0-9]*[.]0 3:5c7b2c47:::ROBJ16:head : can't decode 'snapset' attr buffer::malformed_input: .* no longer understand old encoding version 3 < 97"
1957 err_strings
[36]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 deep-scrub : stat mismatch, got 18/18 objects, 0/0 clones, 17/18 dirty, 17/18 omap, 0/0 pinned, 0/0 hit_set_archive, 0/0 whiteouts, 115/116 bytes, 0/0 manifest objects, 0/0 hit_set_archive bytes."
1958 err_strings
[37]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 deep-scrub 1 missing, 11 inconsistent objects"
1959 err_strings
[38]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 deep-scrub 35 errors"
1961 for err_string
in "${err_strings[@]}"
1963 if ! grep -q "$err_string" $dir/osd.
${primary}.log
1965 echo "Missing log message '$err_string'"
1966 ERRORS
=$
(expr $ERRORS + 1)
1970 rados list-inconsistent-pg
$poolname > $dir/json ||
return 1
1972 test $
(jq
'. | length' $dir/json
) = "1" ||
return 1
1974 test $
(jq
-r '.[0]' $dir/json
) = $pg ||
return 1
1976 rados list-inconsistent-obj
$pg > $dir/json ||
return 1
1977 # Get epoch for repair-get requests
1978 epoch
=$
(jq .epoch
$dir/json
)
1980 jq
"$jqfilter" << EOF | jq '.inconsistents' | python -c "$sortkeys" > $dir/checkcsjson
1986 "data_digest": "0x2ddbf8f5",
1987 "omap_digest": "0xf5fba2c6",
2005 "prior_version": "21'3",
2006 "last_reqid": "osd.1.0:57",
2009 "mtime": "2018-04-05 14:33:19.804040",
2010 "local_mtime": "2018-04-05 14:33:19.804839",
2020 "data_digest": "0x2ddbf8f5",
2021 "omap_digest": "0xf5fba2c6",
2022 "expected_object_size": 0,
2023 "expected_write_size": 0,
2024 "alloc_hint_flags": 0,
2030 "data_digest": "0x2d4a11c2",
2031 "omap_digest": "0xf5fba2c6",
2034 "data_digest_mismatch_info",
2035 "size_mismatch_info",
2036 "obj_size_info_mismatch"
2042 "selected_object_info": {
2053 "prior_version": "21'3",
2054 "last_reqid": "osd.1.0:57",
2057 "mtime": "2018-04-05 14:33:19.804040",
2058 "local_mtime": "2018-04-05 14:33:19.804839",
2068 "data_digest": "0x2ddbf8f5",
2069 "omap_digest": "0xf5fba2c6",
2070 "expected_object_size": 0,
2071 "expected_write_size": 0,
2072 "alloc_hint_flags": 0,
2078 "union_shard_errors": [
2079 "data_digest_mismatch_info",
2080 "size_mismatch_info",
2081 "obj_size_info_mismatch"
2084 "data_digest_mismatch",
2098 "data_digest": "0x2ddbf8f5",
2099 "omap_digest": "0xa8dd5adc",
2102 "omap_digest_mismatch_info"
2108 "data_digest": "0x2ddbf8f5",
2109 "omap_digest": "0xa8dd5adc",
2112 "omap_digest_mismatch_info"
2118 "selected_object_info": {
2119 "alloc_hint_flags": 0,
2120 "data_digest": "0x2ddbf8f5",
2121 "expected_object_size": 0,
2122 "expected_write_size": 0,
2142 "omap_digest": "0xc2025a24",
2149 "union_shard_errors": [
2150 "omap_digest_mismatch_info"
2164 "data_digest": "0x2ddbf8f5",
2165 "omap_digest": "0xa03cef03",
2180 "selected_object_info": {
2191 "prior_version": "41'33",
2192 "last_reqid": "osd.1.0:51",
2195 "mtime": "2018-04-05 14:33:26.761286",
2196 "local_mtime": "2018-04-05 14:33:26.762368",
2206 "data_digest": "0x2ddbf8f5",
2207 "omap_digest": "0xa03cef03",
2208 "expected_object_size": 0,
2209 "expected_write_size": 0,
2210 "alloc_hint_flags": 0,
2216 "union_shard_errors": [
2238 "data_digest": "0x2ddbf8f5",
2239 "omap_digest": "0x067f306a",
2246 "selected_object_info": {
2257 "prior_version": "43'36",
2258 "last_reqid": "osd.1.0:55",
2261 "mtime": "2018-04-05 14:33:27.460958",
2262 "local_mtime": "2018-04-05 14:33:27.462109",
2272 "data_digest": "0x2ddbf8f5",
2273 "omap_digest": "0x067f306a",
2274 "expected_object_size": 0,
2275 "expected_write_size": 0,
2276 "alloc_hint_flags": 0,
2282 "union_shard_errors": [
2312 "union_shard_errors": [
2328 "object_info": "bad-val",
2329 "data_digest": "0x2ddbf8f5",
2330 "omap_digest": "0x4f14f849",
2339 "data_digest": "0x2ddbf8f5",
2340 "omap_digest": "0x4f14f849",
2349 "union_shard_errors": [
2376 "prior_version": "49'45",
2377 "last_reqid": "osd.1.0:48",
2380 "mtime": "2018-04-05 14:33:29.498969",
2381 "local_mtime": "2018-04-05 14:33:29.499890",
2391 "data_digest": "0x2ddbf8f5",
2392 "omap_digest": "0x2d2a4d6e",
2393 "expected_object_size": 0,
2394 "expected_write_size": 0,
2395 "alloc_hint_flags": 0,
2401 "data_digest": "0x2ddbf8f5",
2402 "omap_digest": "0x2d2a4d6e",
2409 "data_digest": "0x2ddbf8f5",
2410 "omap_digest": "0x2d2a4d6e",
2419 "selected_object_info": {
2430 "prior_version": "49'45",
2431 "last_reqid": "osd.1.0:48",
2434 "mtime": "2018-04-05 14:33:29.498969",
2435 "local_mtime": "2018-04-05 14:33:29.499890",
2445 "data_digest": "0x2ddbf8f5",
2446 "omap_digest": "0x2d2a4d6e",
2447 "expected_object_size": 0,
2448 "expected_write_size": 0,
2449 "alloc_hint_flags": 0,
2455 "union_shard_errors": [
2478 "data_digest": "0x2ddbf8f5",
2482 "omap_digest": "0x8b699207",
2488 "snapset": "bad-val",
2489 "data_digest": "0x2ddbf8f5",
2493 "omap_digest": "0x8b699207",
2499 "union_shard_errors": [
2512 "selected_object_info": {
2513 "alloc_hint_flags": 0,
2514 "data_digest": "0x2ddbf8f5",
2515 "expected_object_size": 0,
2516 "expected_write_size": 0,
2536 "omap_digest": "0xe9572720",
2545 "data_digest": "0x5af0c3ef",
2547 "data_digest_mismatch_info"
2549 "omap_digest": "0xe9572720",
2555 "data_digest": "0x5af0c3ef",
2557 "data_digest_mismatch_info"
2559 "omap_digest": "0xe9572720",
2565 "union_shard_errors": [
2566 "data_digest_mismatch_info"
2571 "object_info_inconsistency"
2579 "selected_object_info": {
2580 "alloc_hint_flags": 255,
2581 "data_digest": "0x2ddbf8f5",
2582 "expected_object_size": 0,
2583 "expected_write_size": 0,
2603 "omap_digest": "0xddc3680f",
2612 "data_digest": "0xbd89c912",
2614 "data_digest_mismatch_info"
2617 "alloc_hint_flags": 0,
2618 "data_digest": "0x2ddbf8f5",
2619 "expected_object_size": 0,
2620 "expected_write_size": 0,
2640 "omap_digest": "0xddc3680f",
2647 "omap_digest": "0xddc3680f",
2653 "data_digest": "0xbd89c912",
2655 "data_digest_mismatch_info"
2658 "alloc_hint_flags": 255,
2659 "data_digest": "0x2ddbf8f5",
2660 "expected_object_size": 0,
2661 "expected_write_size": 0,
2681 "omap_digest": "0xddc3680f",
2688 "omap_digest": "0xddc3680f",
2694 "union_shard_errors": [
2695 "data_digest_mismatch_info"
2701 "data_digest": "0x578a4830",
2702 "omap_digest": "0xf8e11918",
2705 "data_digest_mismatch_info"
2711 "data_digest": "0x2ddbf8f5",
2712 "omap_digest": "0xf8e11918",
2719 "selected_object_info": {
2730 "prior_version": "23'6",
2731 "last_reqid": "osd.1.0:59",
2734 "mtime": "2018-04-05 14:33:20.498756",
2735 "local_mtime": "2018-04-05 14:33:20.499704",
2745 "data_digest": "0x2ddbf8f5",
2746 "omap_digest": "0xf8e11918",
2747 "expected_object_size": 0,
2748 "expected_write_size": 0,
2749 "alloc_hint_flags": 0,
2755 "union_shard_errors": [
2756 "data_digest_mismatch_info"
2759 "data_digest_mismatch"
2772 "data_digest": "0x2ddbf8f5",
2773 "omap_digest": "0x00b35dfd",
2787 "selected_object_info": {
2798 "prior_version": "25'9",
2799 "last_reqid": "osd.1.0:60",
2802 "mtime": "2018-04-05 14:33:21.189382",
2803 "local_mtime": "2018-04-05 14:33:21.190446",
2813 "data_digest": "0x2ddbf8f5",
2814 "omap_digest": "0x00b35dfd",
2815 "expected_object_size": 0,
2816 "expected_write_size": 0,
2817 "alloc_hint_flags": 0,
2823 "union_shard_errors": [
2838 "data_digest": "0x2ddbf8f5",
2839 "omap_digest": "0xd7178dfe",
2842 "omap_digest_mismatch_info"
2848 "data_digest": "0x2ddbf8f5",
2849 "omap_digest": "0xe2d46ea4",
2856 "selected_object_info": {
2867 "prior_version": "27'12",
2868 "last_reqid": "osd.1.0:61",
2871 "mtime": "2018-04-05 14:33:21.862313",
2872 "local_mtime": "2018-04-05 14:33:21.863261",
2882 "data_digest": "0x2ddbf8f5",
2883 "omap_digest": "0xe2d46ea4",
2884 "expected_object_size": 0,
2885 "expected_write_size": 0,
2886 "alloc_hint_flags": 0,
2892 "union_shard_errors": [
2893 "omap_digest_mismatch_info"
2896 "omap_digest_mismatch"
2909 "data_digest": "0x2ddbf8f5",
2910 "omap_digest": "0x1a862a41",
2917 "data_digest": "0x2ddbf8f5",
2918 "omap_digest": "0x06cac8f6",
2921 "omap_digest_mismatch_info"
2927 "selected_object_info": {
2938 "prior_version": "29'15",
2939 "last_reqid": "osd.1.0:62",
2942 "mtime": "2018-04-05 14:33:22.589300",
2943 "local_mtime": "2018-04-05 14:33:22.590376",
2953 "data_digest": "0x2ddbf8f5",
2954 "omap_digest": "0x1a862a41",
2955 "expected_object_size": 0,
2956 "expected_write_size": 0,
2957 "alloc_hint_flags": 0,
2963 "union_shard_errors": [
2964 "omap_digest_mismatch_info"
2967 "omap_digest_mismatch"
2980 "data_digest": "0x2ddbf8f5",
2981 "omap_digest": "0x689ee887",
2984 "omap_digest_mismatch_info"
2990 "data_digest": "0x2ddbf8f5",
2991 "omap_digest": "0x179c919f",
2998 "selected_object_info": {
3009 "prior_version": "31'18",
3010 "last_reqid": "osd.1.0:53",
3013 "mtime": "2018-04-05 14:33:23.289188",
3014 "local_mtime": "2018-04-05 14:33:23.290130",
3024 "data_digest": "0x2ddbf8f5",
3025 "omap_digest": "0x179c919f",
3026 "expected_object_size": 0,
3027 "expected_write_size": 0,
3028 "alloc_hint_flags": 0,
3034 "union_shard_errors": [
3035 "omap_digest_mismatch_info"
3038 "omap_digest_mismatch"
3051 "data_digest": "0x2ddbf8f5",
3052 "omap_digest": "0xefced57a",
3059 "data_digest": "0x2ddbf8f5",
3060 "omap_digest": "0x6a73cc07",
3063 "omap_digest_mismatch_info"
3069 "selected_object_info": {
3080 "prior_version": "33'21",
3081 "last_reqid": "osd.1.0:52",
3084 "mtime": "2018-04-05 14:33:23.979658",
3085 "local_mtime": "2018-04-05 14:33:23.980731",
3095 "data_digest": "0x2ddbf8f5",
3096 "omap_digest": "0xefced57a",
3097 "expected_object_size": 0,
3098 "expected_write_size": 0,
3099 "alloc_hint_flags": 0,
3105 "union_shard_errors": [
3106 "omap_digest_mismatch_info"
3109 "omap_digest_mismatch"
3126 "name": "key1-ROBJ8"
3130 "value": "val2-ROBJ8",
3131 "name": "key2-ROBJ8"
3134 "data_digest": "0x2ddbf8f5",
3135 "omap_digest": "0xd6be81dc",
3145 "value": "val1-ROBJ8",
3146 "name": "key1-ROBJ8"
3150 "value": "val3-ROBJ8",
3151 "name": "key3-ROBJ8"
3154 "data_digest": "0x2ddbf8f5",
3155 "omap_digest": "0xd6be81dc",
3162 "selected_object_info": {
3173 "prior_version": "79'65",
3174 "last_reqid": "client.4554.0:1",
3177 "mtime": "2018-04-05 14:34:05.598688",
3178 "local_mtime": "2018-04-05 14:34:05.599698",
3188 "data_digest": "0x2ddbf8f5",
3189 "omap_digest": "0xd6be81dc",
3190 "expected_object_size": 0,
3191 "expected_write_size": 0,
3192 "alloc_hint_flags": 0,
3198 "union_shard_errors": [],
3200 "attr_value_mismatch",
3201 "attr_name_mismatch"
3225 "prior_version": "37'27",
3226 "last_reqid": "osd.1.0:63",
3229 "mtime": "2018-04-05 14:33:25.352485",
3230 "local_mtime": "2018-04-05 14:33:25.353746",
3240 "data_digest": "0x2ddbf8f5",
3241 "omap_digest": "0x2eecc539",
3242 "expected_object_size": 0,
3243 "expected_write_size": 0,
3244 "alloc_hint_flags": 0,
3250 "data_digest": "0x1f26fb26",
3251 "omap_digest": "0x2eecc539",
3254 "obj_size_info_mismatch"
3270 "version": "119'68",
3271 "prior_version": "51'64",
3272 "last_reqid": "client.4834.0:1",
3275 "mtime": "2018-04-05 14:35:01.500659",
3276 "local_mtime": "2018-04-05 14:35:01.502117",
3286 "data_digest": "0x1f26fb26",
3287 "omap_digest": "0x2eecc539",
3288 "expected_object_size": 0,
3289 "expected_write_size": 0,
3290 "alloc_hint_flags": 0,
3296 "data_digest": "0x1f26fb26",
3297 "omap_digest": "0x2eecc539",
3304 "selected_object_info": {
3314 "version": "119'68",
3315 "prior_version": "51'64",
3316 "last_reqid": "client.4834.0:1",
3319 "mtime": "2018-04-05 14:35:01.500659",
3320 "local_mtime": "2018-04-05 14:35:01.502117",
3330 "data_digest": "0x1f26fb26",
3331 "omap_digest": "0x2eecc539",
3332 "expected_object_size": 0,
3333 "expected_write_size": 0,
3334 "alloc_hint_flags": 0,
3340 "union_shard_errors": [
3341 "obj_size_info_mismatch"
3344 "object_info_inconsistency"
3359 jq
"$jqfilter" $dir/json | jq
'.inconsistents' | python
-c "$sortkeys" > $dir/csjson
3360 multidiff
$dir/checkcsjson
$dir/csjson ||
test $getjson = "yes" ||
return 1
3361 if test $getjson = "yes"
3363 jq
'.' $dir/json
> save2.json
3366 if test "$LOCALRUN" = "yes" && which jsonschema
> /dev
/null
;
3368 jsonschema
-i $dir/json
$CEPH_ROOT/doc
/rados
/command
/list-inconsistent-obj.json ||
return 1
3374 # This hangs if the repair doesn't work
3375 timeout
30 rados
-p $poolname get ROBJ17
$dir/robj17.out ||
return 1
3376 timeout
30 rados
-p $poolname get ROBJ18
$dir/robj18.out ||
return 1
3377 # Even though we couldn't repair all of the introduced errors, we can fix ROBJ17
3378 diff -q $dir/new.ROBJ17
$dir/robj17.out ||
return 1
3379 rm -f $dir/new.ROBJ17
$dir/robj17.out ||
return 1
3380 diff -q $dir/new.ROBJ18
$dir/robj18.out ||
return 1
3381 rm -f $dir/new.ROBJ18
$dir/robj18.out ||
return 1
3383 if [ $ERRORS != "0" ];
3385 echo "TEST FAILED WITH $ERRORS ERRORS"
3389 ceph osd pool
rm $poolname $poolname --yes-i-really-really-mean-it
3390 teardown
$dir ||
return 1
3395 # Test scrub errors for an erasure coded pool
3397 function corrupt_scrub_erasure
() {
3399 local allow_overwrites
=$2
3400 local poolname
=ecpool
3403 setup
$dir ||
return 1
3404 run_mon
$dir a ||
return 1
3405 run_mgr
$dir x ||
return 1
3406 for id
in $
(seq 0 2) ; do
3407 if [ "$allow_overwrites" = "true" ]; then
3408 run_osd_bluestore
$dir $id ||
return 1
3410 run_osd
$dir $id ||
return 1
3413 create_rbd_pool ||
return 1
3416 create_ec_pool
$poolname $allow_overwrites k
=2 m
=1 stripe_unit
=2K
--force ||
return 1
3417 wait_for_clean ||
return 1
3419 for i
in $
(seq 1 $total_objs) ; do
3421 add_something
$dir $poolname $objname ||
return 1
3423 local osd
=$
(expr $i % 2)
3427 # Size (deep scrub data_digest too)
3428 local payload
=UVWXYZZZ
3429 echo $payload > $dir/CORRUPT
3430 objectstore_tool
$dir $osd $objname set-bytes
$dir/CORRUPT ||
return 1
3435 dd if=/dev
/urandom of
=$dir/CORRUPT bs
=2048 count
=1
3436 objectstore_tool
$dir $osd $objname set-bytes
$dir/CORRUPT ||
return 1
3441 objectstore_tool
$dir $osd $objname remove ||
return 1
3445 rados
--pool $poolname setxattr
$objname key1-
$objname val1-
$objname ||
return 1
3446 rados
--pool $poolname setxattr
$objname key2-
$objname val2-
$objname ||
return 1
3449 echo -n bad-val
> $dir/bad-val
3450 objectstore_tool
$dir $osd $objname set-attr _key1-
$objname $dir/bad-val ||
return 1
3451 objectstore_tool
$dir $osd $objname rm-attr _key2-
$objname ||
return 1
3452 echo -n val3-
$objname > $dir/newval
3453 objectstore_tool
$dir $osd $objname set-attr _key3-
$objname $dir/newval ||
return 1
3454 rm $dir/bad-val
$dir/newval
3459 dd if=/dev
/urandom of
=$dir/CORRUPT bs
=2048 count
=2
3460 objectstore_tool
$dir $osd $objname set-bytes
$dir/CORRUPT ||
return 1
3464 objectstore_tool
$dir 0 $objname rm-attr hinfo_key ||
return 1
3465 echo -n bad-val
> $dir/bad-val
3466 objectstore_tool
$dir 1 $objname set-attr hinfo_key
$dir/bad-val ||
return 1
3470 local payload
=MAKETHISDIFFERENTFROMOTHEROBJECTS
3471 echo $payload > $dir/DIFFERENT
3472 rados
--pool $poolname put
$objname $dir/DIFFERENT ||
return 1
3474 # Get hinfo_key from EOBJ1
3475 objectstore_tool
$dir 0 EOBJ1 get-attr hinfo_key
> $dir/hinfo
3476 objectstore_tool
$dir 0 $objname set-attr hinfo_key
$dir/hinfo ||
return 1
3483 local pg
=$
(get_pg
$poolname EOBJ0
)
3487 rados list-inconsistent-pg
$poolname > $dir/json ||
return 1
3489 test $
(jq
'. | length' $dir/json
) = "1" ||
return 1
3491 test $
(jq
-r '.[0]' $dir/json
) = $pg ||
return 1
3493 rados list-inconsistent-obj
$pg > $dir/json ||
return 1
3494 # Get epoch for repair-get requests
3495 epoch
=$
(jq .epoch
$dir/json
)
3497 jq
"$jqfilter" << EOF | jq '.inconsistents' | python -c "$sortkeys" > $dir/checkcsjson
3521 "prior_version": "0'0",
3522 "last_reqid": "client.4184.0:1",
3534 "data_digest": "0x2ddbf8f5",
3535 "omap_digest": "0xffffffff",
3536 "expected_object_size": 0,
3537 "expected_write_size": 0,
3538 "alloc_hint_flags": 0,
3547 "size_mismatch_info",
3548 "obj_size_info_mismatch"
3561 "selected_object_info": {
3572 "prior_version": "0'0",
3573 "last_reqid": "client.4184.0:1",
3585 "data_digest": "0x2ddbf8f5",
3586 "omap_digest": "0xffffffff",
3587 "expected_object_size": 0,
3588 "expected_write_size": 0,
3589 "alloc_hint_flags": 0,
3595 "union_shard_errors": [
3596 "size_mismatch_info",
3597 "obj_size_info_mismatch"
3635 "selected_object_info": {
3646 "prior_version": "0'0",
3647 "last_reqid": "client.4252.0:1",
3659 "data_digest": "0x2ddbf8f5",
3660 "omap_digest": "0xffffffff",
3661 "expected_object_size": 0,
3662 "expected_write_size": 0,
3663 "alloc_hint_flags": 0,
3669 "union_shard_errors": [
3688 "name": "key1-EOBJ4"
3692 "value": "val2-EOBJ4",
3693 "name": "key2-EOBJ4"
3711 "value": "val1-EOBJ4",
3712 "name": "key1-EOBJ4"
3716 "value": "val2-EOBJ4",
3717 "name": "key2-EOBJ4"
3730 "value": "val1-EOBJ4",
3731 "name": "key1-EOBJ4"
3735 "value": "val3-EOBJ4",
3736 "name": "key3-EOBJ4"
3741 "selected_object_info": {
3752 "prior_version": "45'5",
3753 "last_reqid": "client.4294.0:1",
3765 "data_digest": "0x2ddbf8f5",
3766 "omap_digest": "0xffffffff",
3767 "expected_object_size": 0,
3768 "expected_write_size": 0,
3769 "alloc_hint_flags": 0,
3775 "union_shard_errors": [],
3777 "attr_value_mismatch",
3778 "attr_name_mismatch"
3809 "prior_version": "0'0",
3810 "last_reqid": "client.4382.0:1",
3822 "data_digest": "0x2ddbf8f5",
3823 "omap_digest": "0xffffffff",
3824 "expected_object_size": 0,
3825 "expected_write_size": 0,
3826 "alloc_hint_flags": 0,
3835 "size_mismatch_info",
3836 "obj_size_info_mismatch"
3849 "selected_object_info": {
3860 "prior_version": "0'0",
3861 "last_reqid": "client.4382.0:1",
3873 "data_digest": "0x2ddbf8f5",
3874 "omap_digest": "0xffffffff",
3875 "expected_object_size": 0,
3876 "expected_write_size": 0,
3877 "alloc_hint_flags": 0,
3883 "union_shard_errors": [
3884 "size_mismatch_info",
3885 "obj_size_info_mismatch"
3907 "selected_object_info": {
3918 "prior_version": "0'0",
3919 "last_reqid": "client.4418.0:1",
3931 "data_digest": "0x2ddbf8f5",
3932 "omap_digest": "0xffffffff",
3933 "expected_object_size": 0,
3934 "expected_write_size": 0,
3935 "alloc_hint_flags": 0,
3958 "hashinfo": "bad-val",
3968 "cumulative_shard_hashes": [
3982 "total_chunk_size": 2048
3986 "union_shard_errors": [
3993 "hinfo_inconsistency"
4002 "selected_object_info": {
4013 "prior_version": "75'9",
4014 "last_reqid": "client.4482.0:1",
4026 "data_digest": "0x136e4e27",
4027 "omap_digest": "0xffffffff",
4028 "expected_object_size": 0,
4029 "expected_write_size": 0,
4030 "alloc_hint_flags": 0,
4039 "cumulative_shard_hashes": [
4053 "total_chunk_size": 2048
4063 "cumulative_shard_hashes": [
4077 "total_chunk_size": 2048
4087 "cumulative_shard_hashes": [
4101 "total_chunk_size": 2048
4110 "union_shard_errors": []
4117 jq
"$jqfilter" $dir/json | jq
'.inconsistents' | python
-c "$sortkeys" > $dir/csjson
4118 multidiff
$dir/checkcsjson
$dir/csjson ||
test $getjson = "yes" ||
return 1
4119 if test $getjson = "yes"
4121 jq
'.' $dir/json
> save3.json
4124 if test "$LOCALRUN" = "yes" && which jsonschema
> /dev
/null
;
4126 jsonschema
-i $dir/json
$CEPH_ROOT/doc
/rados
/command
/list-inconsistent-obj.json ||
return 1
4131 rados list-inconsistent-pg
$poolname > $dir/json ||
return 1
4133 test $
(jq
'. | length' $dir/json
) = "1" ||
return 1
4135 test $
(jq
-r '.[0]' $dir/json
) = $pg ||
return 1
4137 rados list-inconsistent-obj
$pg > $dir/json ||
return 1
4138 # Get epoch for repair-get requests
4139 epoch
=$
(jq .epoch
$dir/json
)
4141 if [ "$allow_overwrites" = "true" ]
4143 jq
"$jqfilter" << EOF | jq '.inconsistents' | python -c "$sortkeys" > $dir/checkcsjson
4149 "data_digest": "0x00000000",
4150 "omap_digest": "0xffffffff",
4169 "prior_version": "0'0",
4170 "last_reqid": "client.4184.0:1",
4173 "mtime": "2018-04-05 14:31:33.837147",
4174 "local_mtime": "2018-04-05 14:31:33.840763",
4182 "data_digest": "0x2ddbf8f5",
4183 "omap_digest": "0xffffffff",
4184 "expected_object_size": 0,
4185 "expected_write_size": 0,
4186 "alloc_hint_flags": 0,
4196 "size_mismatch_info",
4197 "obj_size_info_mismatch"
4203 "data_digest": "0x00000000",
4204 "omap_digest": "0xffffffff",
4212 "selected_object_info": {
4223 "prior_version": "0'0",
4224 "last_reqid": "client.4184.0:1",
4227 "mtime": "2018-04-05 14:31:33.837147",
4228 "local_mtime": "2018-04-05 14:31:33.840763",
4236 "data_digest": "0x2ddbf8f5",
4237 "omap_digest": "0xffffffff",
4238 "expected_object_size": 0,
4239 "expected_write_size": 0,
4240 "alloc_hint_flags": 0,
4246 "union_shard_errors": [
4248 "size_mismatch_info",
4249 "obj_size_info_mismatch"
4265 "data_digest": "0x00000000",
4266 "omap_digest": "0xffffffff",
4282 "data_digest": "0x00000000",
4283 "omap_digest": "0xffffffff",
4291 "selected_object_info": {
4302 "prior_version": "0'0",
4303 "last_reqid": "client.4252.0:1",
4306 "mtime": "2018-04-05 14:31:46.841145",
4307 "local_mtime": "2018-04-05 14:31:46.844996",
4315 "data_digest": "0x2ddbf8f5",
4316 "omap_digest": "0xffffffff",
4317 "expected_object_size": 0,
4318 "expected_write_size": 0,
4319 "alloc_hint_flags": 0,
4325 "union_shard_errors": [
4344 "name": "key1-EOBJ4"
4348 "value": "val2-EOBJ4",
4349 "name": "key2-EOBJ4"
4352 "data_digest": "0x00000000",
4353 "omap_digest": "0xffffffff",
4364 "value": "val1-EOBJ4",
4365 "name": "key1-EOBJ4"
4369 "value": "val2-EOBJ4",
4370 "name": "key2-EOBJ4"
4373 "data_digest": "0x00000000",
4374 "omap_digest": "0xffffffff",
4385 "value": "val1-EOBJ4",
4386 "name": "key1-EOBJ4"
4390 "value": "val3-EOBJ4",
4391 "name": "key3-EOBJ4"
4394 "data_digest": "0x00000000",
4395 "omap_digest": "0xffffffff",
4403 "selected_object_info": {
4414 "prior_version": "45'5",
4415 "last_reqid": "client.4294.0:1",
4418 "mtime": "2018-04-05 14:31:54.663622",
4419 "local_mtime": "2018-04-05 14:31:54.664527",
4427 "data_digest": "0x2ddbf8f5",
4428 "omap_digest": "0xffffffff",
4429 "expected_object_size": 0,
4430 "expected_write_size": 0,
4431 "alloc_hint_flags": 0,
4437 "union_shard_errors": [],
4439 "attr_value_mismatch",
4440 "attr_name_mismatch"
4453 "data_digest": "0x00000000",
4454 "omap_digest": "0xffffffff",
4462 "data_digest": "0x00000000",
4463 "omap_digest": "0xffffffff",
4475 "prior_version": "0'0",
4476 "last_reqid": "client.4382.0:1",
4479 "mtime": "2018-04-05 14:32:12.929161",
4480 "local_mtime": "2018-04-05 14:32:12.934707",
4488 "data_digest": "0x2ddbf8f5",
4489 "omap_digest": "0xffffffff",
4490 "expected_object_size": 0,
4491 "expected_write_size": 0,
4492 "alloc_hint_flags": 0,
4500 "size_mismatch_info",
4501 "obj_size_info_mismatch"
4508 "data_digest": "0x00000000",
4509 "omap_digest": "0xffffffff",
4517 "selected_object_info": {
4528 "prior_version": "0'0",
4529 "last_reqid": "client.4382.0:1",
4532 "mtime": "2018-04-05 14:32:12.929161",
4533 "local_mtime": "2018-04-05 14:32:12.934707",
4541 "data_digest": "0x2ddbf8f5",
4542 "omap_digest": "0xffffffff",
4543 "expected_object_size": 0,
4544 "expected_write_size": 0,
4545 "alloc_hint_flags": 0,
4551 "union_shard_errors": [
4552 "size_mismatch_info",
4553 "obj_size_info_mismatch"
4575 "union_shard_errors": [
4580 "selected_object_info": {
4591 "prior_version": "0'0",
4592 "last_reqid": "client.4418.0:1",
4595 "mtime": "2018-04-05 14:32:20.634116",
4596 "local_mtime": "2018-04-05 14:32:20.637999",
4604 "data_digest": "0x2ddbf8f5",
4605 "omap_digest": "0xffffffff",
4606 "expected_object_size": 0,
4607 "expected_write_size": 0,
4608 "alloc_hint_flags": 0,
4634 "hashinfo": "bad-val"
4642 "omap_digest": "0xffffffff",
4643 "data_digest": "0x00000000",
4645 "cumulative_shard_hashes": [
4659 "total_chunk_size": 2048
4673 "hinfo_inconsistency"
4675 "union_shard_errors": [],
4676 "selected_object_info": {
4687 "prior_version": "75'9",
4688 "last_reqid": "client.4482.0:1",
4691 "mtime": "2018-04-05 14:32:33.058782",
4692 "local_mtime": "2018-04-05 14:32:33.059679",
4700 "data_digest": "0x136e4e27",
4701 "omap_digest": "0xffffffff",
4702 "expected_object_size": 0,
4703 "expected_write_size": 0,
4704 "alloc_hint_flags": 0,
4717 "omap_digest": "0xffffffff",
4718 "data_digest": "0x00000000",
4720 "cumulative_shard_hashes": [
4734 "total_chunk_size": 2048
4743 "omap_digest": "0xffffffff",
4744 "data_digest": "0x00000000",
4746 "cumulative_shard_hashes": [
4760 "total_chunk_size": 2048
4769 "omap_digest": "0xffffffff",
4770 "data_digest": "0x00000000",
4772 "cumulative_shard_hashes": [
4786 "total_chunk_size": 2048
4798 jq
"$jqfilter" << EOF | jq '.inconsistents' | python -c "$sortkeys" > $dir/checkcsjson
4804 "data_digest": "0x04cfa72f",
4805 "omap_digest": "0xffffffff",
4824 "prior_version": "0'0",
4825 "last_reqid": "client.4192.0:1",
4828 "mtime": "2018-04-05 14:30:10.688009",
4829 "local_mtime": "2018-04-05 14:30:10.691774",
4837 "data_digest": "0x2ddbf8f5",
4838 "omap_digest": "0xffffffff",
4839 "expected_object_size": 0,
4840 "expected_write_size": 0,
4841 "alloc_hint_flags": 0,
4851 "size_mismatch_info",
4852 "obj_size_info_mismatch"
4858 "data_digest": "0x04cfa72f",
4859 "omap_digest": "0xffffffff",
4867 "selected_object_info": {
4878 "prior_version": "0'0",
4879 "last_reqid": "client.4192.0:1",
4882 "mtime": "2018-04-05 14:30:10.688009",
4883 "local_mtime": "2018-04-05 14:30:10.691774",
4891 "data_digest": "0x2ddbf8f5",
4892 "omap_digest": "0xffffffff",
4893 "expected_object_size": 0,
4894 "expected_write_size": 0,
4895 "alloc_hint_flags": 0,
4901 "union_shard_errors": [
4903 "size_mismatch_info",
4904 "obj_size_info_mismatch"
4929 "data_digest": "0x04cfa72f",
4930 "omap_digest": "0xffffffff",
4938 "data_digest": "0x04cfa72f",
4939 "omap_digest": "0xffffffff",
4947 "selected_object_info": {
4958 "prior_version": "0'0",
4959 "last_reqid": "client.4224.0:1",
4962 "mtime": "2018-04-05 14:30:14.152945",
4963 "local_mtime": "2018-04-05 14:30:14.154014",
4971 "data_digest": "0x2ddbf8f5",
4972 "omap_digest": "0xffffffff",
4973 "expected_object_size": 0,
4974 "expected_write_size": 0,
4975 "alloc_hint_flags": 0,
4981 "union_shard_errors": [
4996 "data_digest": "0x04cfa72f",
4997 "omap_digest": "0xffffffff",
5013 "data_digest": "0x04cfa72f",
5014 "omap_digest": "0xffffffff",
5022 "selected_object_info": {
5033 "prior_version": "0'0",
5034 "last_reqid": "client.4258.0:1",
5037 "mtime": "2018-04-05 14:30:18.875544",
5038 "local_mtime": "2018-04-05 14:30:18.880153",
5046 "data_digest": "0x2ddbf8f5",
5047 "omap_digest": "0xffffffff",
5048 "expected_object_size": 0,
5049 "expected_write_size": 0,
5050 "alloc_hint_flags": 0,
5056 "union_shard_errors": [
5075 "name": "key1-EOBJ4"
5079 "value": "val2-EOBJ4",
5080 "name": "key2-EOBJ4"
5083 "data_digest": "0x04cfa72f",
5084 "omap_digest": "0xffffffff",
5097 "omap_digest": "0xffffffff",
5098 "data_digest": "0x04cfa72f",
5102 "value": "val1-EOBJ4",
5103 "name": "key1-EOBJ4"
5107 "value": "val2-EOBJ4",
5108 "name": "key2-EOBJ4"
5118 "omap_digest": "0xffffffff",
5119 "data_digest": "0x04cfa72f",
5123 "value": "val1-EOBJ4",
5124 "name": "key1-EOBJ4"
5128 "value": "val3-EOBJ4",
5129 "name": "key3-EOBJ4"
5134 "selected_object_info": {
5145 "prior_version": "45'5",
5146 "last_reqid": "client.4296.0:1",
5149 "mtime": "2018-04-05 14:30:22.271983",
5150 "local_mtime": "2018-04-05 14:30:22.272840",
5158 "data_digest": "0x2ddbf8f5",
5159 "omap_digest": "0xffffffff",
5160 "expected_object_size": 0,
5161 "expected_write_size": 0,
5162 "alloc_hint_flags": 0,
5168 "union_shard_errors": [],
5170 "attr_value_mismatch",
5171 "attr_name_mismatch"
5184 "data_digest": "0x04cfa72f",
5185 "omap_digest": "0xffffffff",
5204 "prior_version": "0'0",
5205 "last_reqid": "client.4384.0:1",
5208 "mtime": "2018-04-05 14:30:35.162395",
5209 "local_mtime": "2018-04-05 14:30:35.166390",
5217 "data_digest": "0x2ddbf8f5",
5218 "omap_digest": "0xffffffff",
5219 "expected_object_size": 0,
5220 "expected_write_size": 0,
5221 "alloc_hint_flags": 0,
5230 "size_mismatch_info",
5232 "obj_size_info_mismatch"
5238 "data_digest": "0x04cfa72f",
5239 "omap_digest": "0xffffffff",
5247 "selected_object_info": {
5258 "prior_version": "0'0",
5259 "last_reqid": "client.4384.0:1",
5262 "mtime": "2018-04-05 14:30:35.162395",
5263 "local_mtime": "2018-04-05 14:30:35.166390",
5271 "data_digest": "0x2ddbf8f5",
5272 "omap_digest": "0xffffffff",
5273 "expected_object_size": 0,
5274 "expected_write_size": 0,
5275 "alloc_hint_flags": 0,
5281 "union_shard_errors": [
5282 "size_mismatch_info",
5284 "obj_size_info_mismatch"
5306 "union_shard_errors": [
5311 "selected_object_info": {
5322 "prior_version": "0'0",
5323 "last_reqid": "client.4420.0:1",
5326 "mtime": "2018-04-05 14:30:40.914673",
5327 "local_mtime": "2018-04-05 14:30:40.917705",
5335 "data_digest": "0x2ddbf8f5",
5336 "omap_digest": "0xffffffff",
5337 "expected_object_size": 0,
5338 "expected_write_size": 0,
5339 "alloc_hint_flags": 0,
5365 "hashinfo": "bad-val"
5373 "omap_digest": "0xffffffff",
5374 "data_digest": "0x04cfa72f",
5376 "cumulative_shard_hashes": [
5390 "total_chunk_size": 2048
5404 "hinfo_inconsistency"
5406 "union_shard_errors": [
5409 "selected_object_info": {
5420 "prior_version": "75'9",
5421 "last_reqid": "client.4486.0:1",
5424 "mtime": "2018-04-05 14:30:50.995009",
5425 "local_mtime": "2018-04-05 14:30:50.996112",
5433 "data_digest": "0x136e4e27",
5434 "omap_digest": "0xffffffff",
5435 "expected_object_size": 0,
5436 "expected_write_size": 0,
5437 "alloc_hint_flags": 0,
5453 "cumulative_shard_hashes": [
5467 "total_chunk_size": 2048
5476 "omap_digest": "0xffffffff",
5477 "data_digest": "0x5b7455a8",
5479 "cumulative_shard_hashes": [
5493 "total_chunk_size": 2048
5502 "omap_digest": "0xffffffff",
5503 "data_digest": "0x5b7455a8",
5505 "cumulative_shard_hashes": [
5519 "total_chunk_size": 2048
5531 jq
"$jqfilter" $dir/json | jq
'.inconsistents' | python
-c "$sortkeys" > $dir/csjson
5532 multidiff
$dir/checkcsjson
$dir/csjson ||
test $getjson = "yes" ||
return 1
5533 if test $getjson = "yes"
5535 if [ "$allow_overwrites" = "true" ]
5541 jq
'.' $dir/json
> save
${num}.json
5544 if test "$LOCALRUN" = "yes" && which jsonschema
> /dev
/null
;
5546 jsonschema
-i $dir/json
$CEPH_ROOT/doc
/rados
/command
/list-inconsistent-obj.json ||
return 1
5549 ceph osd pool
rm $poolname $poolname --yes-i-really-really-mean-it
5550 teardown
$dir ||
return 1
5553 function TEST_corrupt_scrub_erasure_appends
() {
5554 corrupt_scrub_erasure
$1 false
5557 function TEST_corrupt_scrub_erasure_overwrites
() {
5558 if [ "$use_ec_overwrite" = "true" ]; then
5559 corrupt_scrub_erasure
$1 true
5564 # Test to make sure that a periodic scrub won't cause deep-scrub info to be lost
5566 function TEST_periodic_scrub_replicated
() {
5568 local poolname
=psr_pool
5571 setup
$dir ||
return 1
5572 run_mon
$dir a
--osd_pool_default_size=2 ||
return 1
5573 run_mgr
$dir x ||
return 1
5574 local ceph_osd_args
="--osd-scrub-interval-randomize-ratio=0 --osd-deep-scrub-randomize-ratio=0 "
5575 ceph_osd_args
+="--osd_scrub_backoff_ratio=0"
5576 run_osd
$dir 0 $ceph_osd_args ||
return 1
5577 run_osd
$dir 1 $ceph_osd_args ||
return 1
5578 create_rbd_pool ||
return 1
5579 wait_for_clean ||
return 1
5581 create_pool
$poolname 1 1 ||
return 1
5582 wait_for_clean ||
return 1
5585 add_something
$dir $poolname $objname scrub ||
return 1
5586 local primary
=$
(get_primary
$poolname $objname)
5587 local pg
=$
(get_pg
$poolname $objname)
5589 # Add deep-scrub only error
5590 local payload
=UVWXYZ
5591 echo $payload > $dir/CORRUPT
5592 # Uses $ceph_osd_args for osd restart
5593 objectstore_tool
$dir $osd $objname set-bytes
$dir/CORRUPT ||
return 1
5595 # No scrub information available, so expect failure
5597 ! rados list-inconsistent-obj
$pg | jq
'.' ||
return 1
5600 pg_deep_scrub
$pg ||
return 1
5602 # Make sure bad object found
5603 rados list-inconsistent-obj
$pg | jq
'.' |
grep -q $objname ||
return 1
5606 local last_scrub
=$
(get_last_scrub_stamp
$pg)
5607 # Fake a schedule scrub
5608 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${primary}) \
5609 trigger_scrub
$pg ||
return 1
5610 # Wait for schedule regular scrub
5611 wait_for_scrub
$pg "$last_scrub"
5613 # It needed to be upgraded
5614 grep -q "Deep scrub errors, upgrading scrub to deep-scrub" $dir/osd.
${primary}.log ||
return 1
5616 # Bad object still known
5617 rados list-inconsistent-obj
$pg | jq
'.' |
grep -q $objname ||
return 1
5619 # Can't upgrade with this set
5620 ceph osd
set nodeep-scrub
5621 # Let map change propagate to OSDs
5625 # Fake a schedule scrub
5626 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${primary}) \
5627 trigger_scrub
$pg ||
return 1
5628 # Wait for schedule regular scrub
5629 # to notice scrub and skip it
5631 for i
in $
(seq 14 -1 0)
5634 ! grep -q "Regular scrub skipped due to deep-scrub errors and nodeep-scrub set" $dir/osd.
${primary}.log ||
{ found
=true
; break; }
5635 echo Time left
: $i seconds
5637 test $found = "true" ||
return 1
5639 # Bad object still known
5640 rados list-inconsistent-obj
$pg | jq
'.' |
grep -q $objname ||
return 1
5643 # Request a regular scrub and it will be done
5645 grep -q "Regular scrub request, deep-scrub details will be lost" $dir/osd.
${primary}.log ||
return 1
5647 # deep-scrub error is no longer present
5648 rados list-inconsistent-obj
$pg | jq
'.' |
grep -qv $objname ||
return 1
5651 function TEST_scrub_warning
() {
5653 local poolname
=psr_pool
5658 local i7_days
=$
(calc
$i1_day \
* 7)
5659 local i14_days
=$
(calc
$i1_day \
* 14)
5661 local conf_overdue_seconds
=$
(calc
$i7_days + $i1_day + \
( $i7_days \
* $overdue \
) )
5662 local pool_overdue_seconds
=$
(calc
$i14_days + $i1_day + \
( $i14_days \
* $overdue \
) )
5664 setup
$dir ||
return 1
5665 run_mon
$dir a
--osd_pool_default_size=1 ||
return 1
5666 run_mgr
$dir x
--mon_warn_pg_not_scrubbed_ratio=${overdue} --mon_warn_pg_not_deep_scrubbed_ratio=${overdue} ||
return 1
5667 run_osd
$dir 0 $ceph_osd_args --osd_scrub_backoff_ratio=0 ||
return 1
5669 for i
in $
(seq 1 $
(expr $scrubs + $deep_scrubs))
5671 create_pool
$poolname-$i 1 1 ||
return 1
5672 wait_for_clean ||
return 1
5675 ceph osd pool
set $poolname-$i scrub_max_interval
$i14_days
5677 if [ $i = $
(expr $scrubs + 1) ];
5679 ceph osd pool
set $poolname-$i deep_scrub_interval
$i14_days
5686 ceph osd
set noscrub ||
return 1
5687 ceph osd
set nodeep-scrub ||
return 1
5688 ceph config
set global osd_scrub_interval_randomize_ratio
0
5689 ceph config
set global osd_deep_scrub_randomize_ratio
0
5690 ceph config
set global osd_scrub_max_interval
${i7_days}
5691 ceph config
set global osd_deep_scrub_interval
${i7_days}
5693 # Fake schedule scrubs
5694 for i
in $
(seq 1 $scrubs)
5698 overdue_seconds
=$pool_overdue_seconds
5700 overdue_seconds
=$conf_overdue_seconds
5702 CEPH_ARGS
='' ceph daemon $
(get_asok_path osd.
${primary}) \
5703 trigger_scrub
${i}.0 $(expr ${overdue_seconds} + ${i}00) ||
return 1
5705 # Fake schedule deep scrubs
5706 for i
in $
(seq $
(expr $scrubs + 1) $
(expr $scrubs + $deep_scrubs))
5708 if [ $i = "$(expr $scrubs + 1)" ];
5710 overdue_seconds
=$pool_overdue_seconds
5712 overdue_seconds
=$conf_overdue_seconds
5714 CEPH_ARGS
='' ceph daemon $
(get_asok_path osd.
${primary}) \
5715 trigger_deep_scrub
${i}.0 $(expr ${overdue_seconds} + ${i}00) ||
return 1
5721 ceph health |
grep -q "$deep_scrubs pgs not deep-scrubbed in time" ||
return 1
5722 ceph health |
grep -q "$scrubs pgs not scrubbed in time" ||
return 1
5723 COUNT
=$
(ceph health detail |
grep "not scrubbed since" |
wc -l)
5724 if [ "$COUNT" != $scrubs ]; then
5725 ceph health detail |
grep "not scrubbed since"
5728 COUNT
=$
(ceph health detail |
grep "not deep-scrubbed since" |
wc -l)
5729 if [ "$COUNT" != $deep_scrubs ]; then
5730 ceph health detail |
grep "not deep-scrubbed since"
5737 # Corrupt snapset in replicated pool
5739 function TEST_corrupt_snapset_scrub_rep
() {
5741 local poolname
=csr_pool
5744 setup
$dir ||
return 1
5745 run_mon
$dir a
--osd_pool_default_size=2 ||
return 1
5746 run_mgr
$dir x ||
return 1
5747 run_osd
$dir 0 ||
return 1
5748 run_osd
$dir 1 ||
return 1
5749 create_rbd_pool ||
return 1
5750 wait_for_clean ||
return 1
5752 create_pool foo
1 ||
return 1
5753 create_pool
$poolname 1 1 ||
return 1
5754 wait_for_clean ||
return 1
5756 for i
in $
(seq 1 $total_objs) ; do
5758 add_something
$dir $poolname $objname ||
return 1
5760 rados
--pool $poolname setomapheader
$objname hdr-
$objname ||
return 1
5761 rados
--pool $poolname setomapval
$objname key-
$objname val-
$objname ||
return 1
5764 local pg
=$
(get_pg
$poolname ROBJ0
)
5765 local primary
=$
(get_primary
$poolname ROBJ0
)
5767 rados
-p $poolname mksnap snap1
5768 echo -n head_of_snapshot_data
> $dir/change
5770 for i
in $
(seq 1 $total_objs) ; do
5773 # Alternate corruption between osd.0 and osd.1
5774 local osd
=$
(expr $i % 2)
5778 rados
--pool $poolname put
$objname $dir/change
5779 objectstore_tool
$dir $osd --head $objname clear-snapset corrupt ||
return 1
5783 rados
--pool $poolname put
$objname $dir/change
5784 objectstore_tool
$dir $osd --head $objname clear-snapset corrupt ||
return 1
5793 rados list-inconsistent-pg
$poolname > $dir/json ||
return 1
5795 test $
(jq
'. | length' $dir/json
) = "1" ||
return 1
5797 test $
(jq
-r '.[0]' $dir/json
) = $pg ||
return 1
5799 rados list-inconsistent-obj
$pg > $dir/json ||
return 1
5801 jq
"$jqfilter" << EOF | jq '.inconsistents' | python -c "$sortkeys" > $dir/checkcsjson
5814 "snapset_inconsistency"
5816 "union_shard_errors": [],
5817 "selected_object_info": {
5828 "prior_version": "21'3",
5829 "last_reqid": "client.4195.0:1",
5832 "mtime": "2018-04-05 14:35:43.286117",
5833 "local_mtime": "2018-04-05 14:35:43.288990",
5842 "data_digest": "0x53acb008",
5843 "omap_digest": "0xffffffff",
5844 "expected_object_size": 0,
5845 "expected_write_size": 0,
5846 "alloc_hint_flags": 0,
5901 "snapset_inconsistency"
5903 "union_shard_errors": [],
5904 "selected_object_info": {
5915 "prior_version": "23'6",
5916 "last_reqid": "client.4223.0:1",
5919 "mtime": "2018-04-05 14:35:48.326856",
5920 "local_mtime": "2018-04-05 14:35:48.328097",
5929 "data_digest": "0x53acb008",
5930 "omap_digest": "0xffffffff",
5931 "expected_object_size": 0,
5932 "expected_write_size": 0,
5933 "alloc_hint_flags": 0,
5983 jq
"$jqfilter" $dir/json | jq
'.inconsistents' | python
-c "$sortkeys" > $dir/csjson
5984 multidiff
$dir/checkcsjson
$dir/csjson ||
test $getjson = "yes" ||
return 1
5985 if test $getjson = "yes"
5987 jq
'.' $dir/json
> save6.json
5990 if test "$LOCALRUN" = "yes" && which jsonschema
> /dev
/null
;
5992 jsonschema
-i $dir/json
$CEPH_ROOT/doc
/rados
/command
/list-inconsistent-obj.json ||
return 1
5996 declare -a err_strings
5997 err_strings
[0]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid [0-9]*:.*:::ROBJ1:head : snapset inconsistent"
5998 err_strings
[1]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid [0-9]*:.*:::ROBJ2:head : snapset inconsistent"
5999 err_strings
[2]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 [0-9]*:.*:::ROBJ1:1 : is an unexpected clone"
6000 err_strings
[3]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub : stat mismatch, got 3/4 objects, 1/2 clones, 3/4 dirty, 3/4 omap, 0/0 pinned, 0/0 hit_set_archive, 0/0 whiteouts, 49/56 bytes, 0/0 manifest objects, 0/0 hit_set_archive bytes."
6001 err_strings
[4]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 0 missing, 2 inconsistent objects"
6002 err_strings
[5]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 4 errors"
6004 for err_string
in "${err_strings[@]}"
6006 if ! grep -q "$err_string" $dir/osd.
${primary}.log
6008 echo "Missing log message '$err_string'"
6009 ERRORS
=$
(expr $ERRORS + 1)
6013 if [ $ERRORS != "0" ];
6015 echo "TEST FAILED WITH $ERRORS ERRORS"
6019 ceph osd pool
rm $poolname $poolname --yes-i-really-really-mean-it
6020 teardown
$dir ||
return 1
6023 function TEST_request_scrub_priority
() {
6025 local poolname
=psr_pool
6030 setup
$dir ||
return 1
6031 run_mon
$dir a
--osd_pool_default_size=1 ||
return 1
6032 run_mgr
$dir x ||
return 1
6033 local ceph_osd_args
="--osd-scrub-interval-randomize-ratio=0 --osd-deep-scrub-randomize-ratio=0 "
6034 ceph_osd_args
+="--osd_scrub_backoff_ratio=0"
6035 run_osd
$dir 0 $ceph_osd_args ||
return 1
6037 create_pool
$poolname $PGS $PGS ||
return 1
6038 wait_for_clean ||
return 1
6041 add_something
$dir $poolname $objname noscrub ||
return 1
6042 local primary
=$
(get_primary
$poolname $objname)
6043 local pg
=$
(get_pg
$poolname $objname)
6044 poolid
=$
(ceph osd dump |
grep "^pool.*[']${poolname}[']" |
awk '{ print $2 }')
6047 for i
in $
(seq 0 $
(expr $PGS - 1))
6049 opg
="${poolid}.${i}"
6050 if [ "$opg" = "$pg" ]; then
6053 otherpgs
="${otherpgs}${opg} "
6054 local other_last_scrub
=$
(get_last_scrub_stamp
$pg)
6055 # Fake a schedule scrub
6056 CEPH_ARGS
='' ceph
--admin-daemon $
(get_asok_path osd.
${primary}) \
6057 trigger_scrub
$opg ||
return 1
6063 # Request a regular scrub and it will be done
6064 local last_scrub
=$
(get_last_scrub_stamp
$pg)
6067 ceph osd
unset noscrub ||
return 1
6068 ceph osd
unset nodeep-scrub ||
return 1
6070 wait_for_scrub
$pg "$last_scrub"
6072 for opg
in $otherpgs $pg
6074 wait_for_scrub
$opg "$other_last_scrub"
6077 # Verify that the requested scrub ran first
6078 grep "log_channel.*scrub ok" $dir/osd.
${primary}.log |
head -1 |
sed 's/.*[[]DBG[]]//' |
grep -q $pg ||
return 1
6084 main osd-scrub-repair
"$@"
6087 # compile-command: "cd build ; make -j4 && \
6088 # ../qa/run-standalone.sh osd-scrub-repair.sh"