3 # Copyright (C) 2015 Red Hat <contact@redhat.com>
5 # Author: David Zafman <dzafman@redhat.com>
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU Library Public License as published by
9 # the Free Software Foundation; either version 2, or (at your option)
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU Library Public License for more details.
17 source $CEPH_ROOT/qa
/standalone
/ceph-helpers.sh
19 # Test development and debugging
20 # Set to "yes" in order to ignore diff errors and save results to update test
23 jqfilter
='.inconsistents'
24 sortkeys
='import json; import sys ; JSON=sys.stdin.read() ; ud = json.loads(JSON) ; print json.dumps(ud, sort_keys=True, indent=2)'
30 export CEPH_MON
="127.0.0.1:7121" # git grep '\<7121\>' : there must be only one
32 CEPH_ARGS
+="--fsid=$(uuidgen) --auth-supported=none "
33 CEPH_ARGS
+="--mon-host=$CEPH_MON "
35 local funcs
=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
36 for func
in $funcs ; do
37 setup
$dir ||
return 1
38 $func $dir ||
return 1
39 teardown
$dir ||
return 1
43 function create_scenario
() {
50 rados
-p $poolname mksnap snap
${SNAP}
51 dd if=/dev
/urandom of
=$TESTDATA bs
=256 count
=${SNAP}
52 rados
-p $poolname put obj1
$TESTDATA
53 rados
-p $poolname put obj5
$TESTDATA
54 rados
-p $poolname put obj3
$TESTDATA
56 do rados
-p $poolname put obj
${i} $TESTDATA
60 rados
-p $poolname mksnap snap
${SNAP}
61 dd if=/dev
/urandom of
=$TESTDATA bs
=256 count
=${SNAP}
62 rados
-p $poolname put obj5
$TESTDATA
65 rados
-p $poolname mksnap snap
${SNAP}
66 dd if=/dev
/urandom of
=$TESTDATA bs
=256 count
=${SNAP}
67 rados
-p $poolname put obj3
$TESTDATA
70 rados
-p $poolname mksnap snap
${SNAP}
71 dd if=/dev
/urandom of
=$TESTDATA bs
=256 count
=${SNAP}
72 rados
-p $poolname put obj5
$TESTDATA
73 rados
-p $poolname put obj2
$TESTDATA
76 rados
-p $poolname mksnap snap
${SNAP}
78 rados
-p $poolname mksnap snap
${SNAP}
79 dd if=/dev
/urandom of
=$TESTDATA bs
=256 count
=${SNAP}
80 rados
-p $poolname put obj5
$TESTDATA
83 rados
-p $poolname mksnap snap
${SNAP}
85 rados
-p $poolname rm obj4
86 rados
-p $poolname rm obj2
88 kill_daemons
$dir TERM osd ||
return 1
90 # Don't need to use ceph_objectstore_tool() function because osd stopped
92 JSON
="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj1)"
93 ceph-objectstore-tool
--data-path $dir/${osd} "$JSON" --force remove
95 JSON
="$(ceph-objectstore-tool --data-path $dir/${osd} --op list obj5 | grep \"snapid\":2)"
96 ceph-objectstore-tool
--data-path $dir/${osd} "$JSON" remove
98 JSON
="$(ceph-objectstore-tool --data-path $dir/${osd} --op list obj5 | grep \"snapid\":1)"
100 ceph-objectstore-tool
--data-path $dir/${osd} "$JSON" remove
102 JSON
="$(ceph-objectstore-tool --data-path $dir/${osd} --op list obj5 | grep \"snapid\":4)"
103 dd if=/dev
/urandom of
=$TESTDATA bs
=256 count
=18
104 ceph-objectstore-tool
--data-path $dir/${osd} "$JSON" set-bytes
$TESTDATA
106 JSON
="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj3)"
107 dd if=/dev
/urandom of
=$TESTDATA bs
=256 count
=15
108 ceph-objectstore-tool
--data-path $dir/${osd} "$JSON" set-bytes
$TESTDATA
110 JSON
="$(ceph-objectstore-tool --data-path $dir/${osd} --op list obj4 | grep \"snapid\":7)"
111 ceph-objectstore-tool
--data-path $dir/${osd} "$JSON" remove
113 JSON
="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj2)"
114 ceph-objectstore-tool
--data-path $dir/${osd} "$JSON" rm-attr snapset
116 # Create a clone which isn't in snapset and doesn't have object info
117 JSON
="$(echo "$OBJ5SAVE" | sed s/snapid\":1/snapid\":7/)"
118 dd if=/dev
/urandom of
=$TESTDATA bs
=256 count
=7
119 ceph-objectstore-tool
--data-path $dir/${osd} "$JSON" set-bytes
$TESTDATA
121 JSON
="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj6)"
122 ceph-objectstore-tool
--data-path $dir/${osd} "$JSON" clear-snapset
123 JSON
="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj7)"
124 ceph-objectstore-tool
--data-path $dir/${osd} "$JSON" clear-snapset corrupt
125 JSON
="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj8)"
126 ceph-objectstore-tool
--data-path $dir/${osd} "$JSON" clear-snapset
seq
127 JSON
="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj9)"
128 ceph-objectstore-tool
--data-path $dir/${osd} "$JSON" clear-snapset clone_size
129 JSON
="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj10)"
130 ceph-objectstore-tool
--data-path $dir/${osd} "$JSON" clear-snapset clone_overlap
131 JSON
="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj11)"
132 ceph-objectstore-tool
--data-path $dir/${osd} "$JSON" clear-snapset clones
133 JSON
="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj12)"
134 ceph-objectstore-tool
--data-path $dir/${osd} "$JSON" clear-snapset
head
135 JSON
="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj13)"
136 ceph-objectstore-tool
--data-path $dir/${osd} "$JSON" clear-snapset snaps
137 JSON
="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj14)"
138 ceph-objectstore-tool
--data-path $dir/${osd} "$JSON" clear-snapset size
140 echo "garbage" > $dir/bad
141 JSON
="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj15)"
142 ceph-objectstore-tool
--data-path $dir/${osd} "$JSON" set-attr snapset
$dir/bad
146 function TEST_scrub_snaps
() {
152 TESTDATA
="testdata.$$"
154 run_mon
$dir a
--osd_pool_default_size=$OSDS ||
return 1
155 run_mgr
$dir x ||
return 1
156 for osd
in $
(seq 0 $
(expr $OSDS - 1))
158 run_osd
$dir $osd ||
return 1
161 # Create a pool with a single pg
162 create_pool
$poolname 1 1
163 wait_for_clean ||
return 1
164 poolid
=$
(ceph osd dump |
grep "^pool.*[']test[']" |
awk '{ print $2 }')
166 dd if=/dev
/urandom of
=$TESTDATA bs
=1032 count
=1
167 for i
in `seq 1 $OBJS`
169 rados
-p $poolname put obj
${i} $TESTDATA
172 local primary
=$
(get_primary
$poolname obj1
)
174 create_scenario
$dir $poolname $TESTDATA $primary
178 for osd
in $
(seq 0 $
(expr $OSDS - 1))
180 run_osd
$dir $osd ||
return 1
183 local pgid
="${poolid}.0"
184 if ! pg_scrub
"$pgid" ; then
189 test "$(grep "_scan_snaps start
" $dir/osd.${primary}.log | wc -l)" = "2" ||
return 1
191 rados list-inconsistent-pg
$poolname > $dir/json ||
return 1
193 test $
(jq
'. | length' $dir/json
) = "1" ||
return 1
195 test $
(jq
-r '.[0]' $dir/json
) = $pgid ||
return 1
197 rados list-inconsistent-obj
$pgid > $dir/json ||
return 1
199 # The injected snapshot errors with a single copy pool doesn't
200 # see object errors because all the issues are detected by
202 jq
"$jqfilter" << EOF | python -c "$sortkeys" > $dir/checkcsjson
209 jq
"$jqfilter" $dir/json | python
-c "$sortkeys" > $dir/csjson
210 diff ${DIFFCOLOPTS} $dir/checkcsjson
$dir/csjson ||
test $getjson = "yes" ||
return 1
212 rados list-inconsistent-snapset
$pgid > $dir/json ||
return 1
214 jq
"$jqfilter" << EOF | python -c "$sortkeys" > $dir/checkcsjson
693 jq
"$jqfilter" $dir/json | python
-c "$sortkeys" > $dir/csjson
694 diff ${DIFFCOLOPTS} $dir/checkcsjson
$dir/csjson ||
test $getjson = "yes" ||
return 1
695 if test $getjson = "yes"
697 jq
'.' $dir/json
> save1.json
700 if test "$LOCALRUN" = "yes" && which jsonschema
> /dev
/null
;
702 jsonschema
-i $dir/json
$CEPH_ROOT/doc
/rados
/command
/list-inconsistent-snap.json ||
return 1
705 pidfiles
=$
(find $dir 2>/dev
/null |
grep 'osd[^/]*\.pid')
707 for pidfile
in ${pidfiles}
709 pids
+="$(cat $pidfile) "
714 rados
-p $poolname rmsnap snap
$i
723 echo "OSD Crash occurred"
724 ERRORS
=$
(expr $ERRORS + 1)
728 kill_daemons
$dir ||
return 1
730 declare -a err_strings
731 err_strings
[0]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*::obj10:.* is missing in clone_overlap"
732 err_strings
[1]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*::obj5:7 no '_' attr"
733 err_strings
[2]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*::obj5:7 is an unexpected clone"
734 err_strings
[3]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*::obj5:4 on disk size [(]4608[)] does not match object info size [(]512[)] adjusted for ondisk to [(]512[)]"
735 err_strings
[4]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj5:head expected clone .*:::obj5:2"
736 err_strings
[5]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj5:head expected clone .*:::obj5:1"
737 err_strings
[6]="log_channel[(]cluster[)] log [[]INF[]] : scrub [0-9]*[.]0 .*:::obj5:head 2 missing clone[(]s[)]"
738 err_strings
[7]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj12:head snapset.head_exists=false, but head exists"
739 err_strings
[8]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj8:head snaps.seq not set"
740 err_strings
[9]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj7:head snapset.head_exists=false, but head exists"
741 err_strings
[10]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj7:1 is an unexpected clone"
742 err_strings
[11]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj3:head on disk size [(]3840[)] does not match object info size [(]768[)] adjusted for ondisk to [(]768[)]"
743 err_strings
[12]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj6:1 is an unexpected clone"
744 err_strings
[13]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj2:head no 'snapset' attr"
745 err_strings
[14]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj2:7 clone ignored due to missing snapset"
746 err_strings
[15]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj2:4 clone ignored due to missing snapset"
747 err_strings
[16]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj4:head expected clone .*:::obj4:7"
748 err_strings
[17]="log_channel[(]cluster[)] log [[]INF[]] : scrub [0-9]*[.]0 .*:::obj4:head 1 missing clone[(]s[)]"
749 err_strings
[18]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj1:1 is an unexpected clone"
750 err_strings
[19]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj9:1 is missing in clone_size"
751 err_strings
[20]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj11:1 is an unexpected clone"
752 err_strings
[21]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj14:1 size 1032 != clone_size 1033"
753 err_strings
[22]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 22 errors"
754 err_strings
[23]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj15:head can't decode 'snapset' attr buffer"
756 for err_string
in "${err_strings[@]}"
758 if ! grep "$err_string" $dir/osd.
${primary}.log
> /dev
/null
;
760 echo "Missing log message '$err_string'"
761 ERRORS
=$
(expr $ERRORS + 1)
765 if [ $ERRORS != "0" ];
767 echo "TEST FAILED WITH $ERRORS ERRORS"
775 function _scrub_snaps_multi
() {
782 TESTDATA
="testdata.$$"
784 run_mon
$dir a
--osd_pool_default_size=$OSDS ||
return 1
785 run_mgr
$dir x ||
return 1
786 for osd
in $
(seq 0 $
(expr $OSDS - 1))
788 run_osd
$dir $osd ||
return 1
791 # Create a pool with a single pg
792 create_pool
$poolname 1 1
793 wait_for_clean ||
return 1
794 poolid
=$
(ceph osd dump |
grep "^pool.*[']test[']" |
awk '{ print $2 }')
796 dd if=/dev
/urandom of
=$TESTDATA bs
=1032 count
=1
797 for i
in `seq 1 $OBJS`
799 rados
-p $poolname put obj
${i} $TESTDATA
802 local primary
=$
(get_primary
$poolname obj1
)
803 local replica
=$
(get_not_primary
$poolname obj1
)
805 eval create_scenario
$dir $poolname $TESTDATA \$
$which
809 for osd
in $
(seq 0 $
(expr $OSDS - 1))
811 run_osd
$dir $osd ||
return 1
814 local pgid
="${poolid}.0"
815 if ! pg_scrub
"$pgid" ; then
820 test "$(grep "_scan_snaps start
" $dir/osd.${primary}.log | wc -l)" -gt "3" ||
return 1
821 test "$(grep "_scan_snaps start
" $dir/osd.${replica}.log | wc -l)" -gt "3" ||
return 1
823 rados list-inconsistent-pg
$poolname > $dir/json ||
return 1
825 test $
(jq
'. | length' $dir/json
) = "1" ||
return 1
827 test $
(jq
-r '.[0]' $dir/json
) = $pgid ||
return 1
829 rados list-inconsistent-obj
$pgid --format=json-pretty
831 rados list-inconsistent-snapset
$pgid > $dir/json ||
return 1
833 # Since all of the snapshots on the primary is consistent there are no errors here
834 if [ $which = "replica" ];
837 jq
"$jqfilter" << EOF | python -c "$sortkeys" > $dir/checkcsjson
846 jq
"$jqfilter" << EOF | python -c "$sortkeys" > $dir/checkcsjson
1180 jq
"$jqfilter" $dir/json | python
-c "$sortkeys" > $dir/csjson
1181 diff ${DIFFCOLOPTS} $dir/checkcsjson
$dir/csjson ||
test $getjson = "yes" ||
return 1
1182 if test $getjson = "yes"
1184 jq
'.' $dir/json
> save1.json
1187 if test "$LOCALRUN" = "yes" && which jsonschema
> /dev
/null
;
1189 jsonschema
-i $dir/json
$CEPH_ROOT/doc
/rados
/command
/list-inconsistent-snap.json ||
return 1
1192 pidfiles
=$
(find $dir 2>/dev
/null |
grep 'osd[^/]*\.pid')
1194 for pidfile
in ${pidfiles}
1196 pids
+="$(cat $pidfile) "
1199 # When removing snapshots with a corrupt replica, it crashes.
1200 # See http://tracker.ceph.com/issues/23875
1201 if [ $which = "primary" ];
1205 rados
-p $poolname rmsnap snap
$i
1215 echo "OSD Crash occurred"
1216 ERRORS
=$
(expr $ERRORS + 1)
1220 kill_daemons
$dir ||
return 1
1222 declare -a err_strings
1223 err_strings
[0]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1] missing .*:::obj4:7"
1224 err_strings
[1]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1]: soid .*:::obj3:head size 3840 != size 768 from auth oi"
1225 err_strings
[2]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1] missing .*:::obj5:1"
1226 err_strings
[3]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1] missing .*:::obj5:2"
1227 err_strings
[4]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1]: soid .*:::obj5:4 size 4608 != size 512 from auth oi"
1228 err_strings
[5]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid .*:::obj5:7: failed to pick suitable object info"
1229 err_strings
[6]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1] missing .*:::obj1:head"
1230 err_strings
[7]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub ${scruberrors} errors"
1232 for err_string
in "${err_strings[@]}"
1234 if ! grep "$err_string" $dir/osd.
${primary}.log
> /dev
/null
;
1236 echo "Missing log message '$err_string'"
1237 ERRORS
=$
(expr $ERRORS + 1)
1241 if [ $ERRORS != "0" ];
1243 echo "TEST FAILED WITH $ERRORS ERRORS"
1251 function TEST_scrub_snaps_replica
() {
1253 ORIG_ARGS
=$CEPH_ARGS
1254 CEPH_ARGS
+=" --osd_scrub_chunk_min=3 --osd_scrub_chunk_max=3"
1255 _scrub_snaps_multi
$dir replica
1257 CEPH_ARGS
=$ORIG_ARGS
1261 function TEST_scrub_snaps_primary
() {
1263 ORIG_ARGS
=$CEPH_ARGS
1264 CEPH_ARGS
+=" --osd_scrub_chunk_min=3 --osd_scrub_chunk_max=3"
1265 _scrub_snaps_multi
$dir primary
1267 CEPH_ARGS
=$ORIG_ARGS
1271 main osd-scrub-snaps
"$@"
1274 # compile-command: "cd build ; make -j4 && \
1275 # ../qa/run-standalone.sh osd-scrub-snaps.sh"