3 # Copyright (C) 2015 Red Hat <contact@redhat.com>
5 # Author: David Zafman <dzafman@redhat.com>
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU Library Public License as published by
9 # the Free Software Foundation; either version 2, or (at your option)
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU Library Public License for more details.
17 source $CEPH_ROOT/qa
/standalone
/ceph-helpers.sh
19 # Test development and debugging
20 # Set to "yes" in order to ignore diff errors and save results to update test
23 jqfilter
='.inconsistents'
24 sortkeys
='import json; import sys ; JSON=sys.stdin.read() ; ud = json.loads(JSON) ; print json.dumps(ud, sort_keys=True, indent=2)'
30 export CEPH_MON
="127.0.0.1:7121" # git grep '\<7121\>' : there must be only one
32 CEPH_ARGS
+="--fsid=$(uuidgen) --auth-supported=none "
33 CEPH_ARGS
+="--mon-host=$CEPH_MON "
35 export -n CEPH_CLI_TEST_DUP_COMMAND
36 local funcs
=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
37 for func
in $funcs ; do
38 setup
$dir ||
return 1
39 $func $dir ||
return 1
40 teardown
$dir ||
return 1
44 function create_scenario
() {
51 rados
-p $poolname mksnap snap
${SNAP}
52 dd if=/dev
/urandom of
=$TESTDATA bs
=256 count
=${SNAP}
53 rados
-p $poolname put obj1
$TESTDATA
54 rados
-p $poolname put obj5
$TESTDATA
55 rados
-p $poolname put obj3
$TESTDATA
57 do rados
-p $poolname put obj
${i} $TESTDATA
61 rados
-p $poolname mksnap snap
${SNAP}
62 dd if=/dev
/urandom of
=$TESTDATA bs
=256 count
=${SNAP}
63 rados
-p $poolname put obj5
$TESTDATA
66 rados
-p $poolname mksnap snap
${SNAP}
67 dd if=/dev
/urandom of
=$TESTDATA bs
=256 count
=${SNAP}
68 rados
-p $poolname put obj3
$TESTDATA
71 rados
-p $poolname mksnap snap
${SNAP}
72 dd if=/dev
/urandom of
=$TESTDATA bs
=256 count
=${SNAP}
73 rados
-p $poolname put obj5
$TESTDATA
74 rados
-p $poolname put obj2
$TESTDATA
77 rados
-p $poolname mksnap snap
${SNAP}
79 rados
-p $poolname mksnap snap
${SNAP}
80 dd if=/dev
/urandom of
=$TESTDATA bs
=256 count
=${SNAP}
81 rados
-p $poolname put obj5
$TESTDATA
84 rados
-p $poolname mksnap snap
${SNAP}
86 rados
-p $poolname rm obj4
87 rados
-p $poolname rm obj16
88 rados
-p $poolname rm obj2
90 kill_daemons
$dir TERM osd ||
return 1
92 # Don't need to use ceph_objectstore_tool() function because osd stopped
94 JSON
="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj1)"
95 ceph-objectstore-tool
--data-path $dir/${osd} "$JSON" --force remove ||
return 1
97 JSON
="$(ceph-objectstore-tool --data-path $dir/${osd} --op list obj5 | grep \"snapid\":2)"
98 ceph-objectstore-tool
--data-path $dir/${osd} "$JSON" remove ||
return 1
100 JSON
="$(ceph-objectstore-tool --data-path $dir/${osd} --op list obj5 | grep \"snapid\":1)"
102 # Starts with a snapmap
103 ceph-kvstore-tool bluestore-kv
$dir/${osd} list
2> /dev
/null
> $dir/drk.log
104 grep "^M.*MAP_.*[.]1[.]obj5[.][.]$" $dir/drk.log ||
return 1
105 ceph-objectstore-tool
--data-path $dir/${osd} --rmtype nosnapmap
"$JSON" remove ||
return 1
106 # Check that snapmap is stil there
107 ceph-kvstore-tool bluestore-kv
$dir/${osd} list
2> /dev
/null
> $dir/drk.log
108 grep "^M.*MAP_.*[.]1[.]obj5[.][.]$" $dir/drk.log ||
return 1
111 JSON
="$(ceph-objectstore-tool --data-path $dir/${osd} --op list obj5 | grep \"snapid\":4)"
112 dd if=/dev
/urandom of
=$TESTDATA bs
=256 count
=18
113 ceph-objectstore-tool
--data-path $dir/${osd} "$JSON" set-bytes
$TESTDATA ||
return 1
115 JSON
="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj3)"
116 dd if=/dev
/urandom of
=$TESTDATA bs
=256 count
=15
117 ceph-objectstore-tool
--data-path $dir/${osd} "$JSON" set-bytes
$TESTDATA ||
return 1
119 JSON
="$(ceph-objectstore-tool --data-path $dir/${osd} --op list obj4 | grep \"snapid\":7)"
120 ceph-objectstore-tool
--data-path $dir/${osd} "$JSON" remove ||
return 1
122 # Starts with a snapmap
123 ceph-kvstore-tool bluestore-kv
$dir/${osd} list
2> /dev
/null
> $dir/drk.log
124 grep "^M.*MAP_.*[.]7[.]obj16[.][.]$" $dir/drk.log ||
return 1
125 JSON
="$(ceph-objectstore-tool --data-path $dir/${osd} --op list obj16 | grep \"snapid\":7)"
126 ceph-objectstore-tool
--data-path $dir/${osd} --rmtype snapmap
"$JSON" remove ||
return 1
127 # Check that snapmap is now removed
128 ceph-kvstore-tool bluestore-kv
$dir/${osd} list
2> /dev
/null
> $dir/drk.log
129 ! grep "^M.*MAP_.*[.]7[.]obj16[.][.]$" $dir/drk.log ||
return 1
132 JSON
="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj2)"
133 ceph-objectstore-tool
--data-path $dir/${osd} "$JSON" rm-attr snapset ||
return 1
135 # Create a clone which isn't in snapset and doesn't have object info
136 JSON
="$(echo "$OBJ5SAVE" | sed s/snapid\":1/snapid\":7/)"
137 dd if=/dev
/urandom of
=$TESTDATA bs
=256 count
=7
138 ceph-objectstore-tool
--data-path $dir/${osd} "$JSON" set-bytes
$TESTDATA ||
return 1
140 JSON
="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj6)"
141 ceph-objectstore-tool
--data-path $dir/${osd} "$JSON" clear-snapset ||
return 1
142 JSON
="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj7)"
143 ceph-objectstore-tool
--data-path $dir/${osd} "$JSON" clear-snapset corrupt ||
return 1
144 JSON
="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj8)"
145 ceph-objectstore-tool
--data-path $dir/${osd} "$JSON" clear-snapset
seq ||
return 1
146 JSON
="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj9)"
147 ceph-objectstore-tool
--data-path $dir/${osd} "$JSON" clear-snapset clone_size ||
return 1
148 JSON
="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj10)"
149 ceph-objectstore-tool
--data-path $dir/${osd} "$JSON" clear-snapset clone_overlap ||
return 1
150 JSON
="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj11)"
151 ceph-objectstore-tool
--data-path $dir/${osd} "$JSON" clear-snapset clones ||
return 1
152 JSON
="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj12)"
153 ceph-objectstore-tool
--data-path $dir/${osd} "$JSON" clear-snapset
head ||
return 1
154 JSON
="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj13)"
155 ceph-objectstore-tool
--data-path $dir/${osd} "$JSON" clear-snapset snaps ||
return 1
156 JSON
="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj14)"
157 ceph-objectstore-tool
--data-path $dir/${osd} "$JSON" clear-snapset size ||
return 1
159 echo "garbage" > $dir/bad
160 JSON
="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj15)"
161 ceph-objectstore-tool
--data-path $dir/${osd} "$JSON" set-attr snapset
$dir/bad ||
return 1
166 function TEST_scrub_snaps
() {
172 TESTDATA
="testdata.$$"
174 run_mon
$dir a
--osd_pool_default_size=$OSDS ||
return 1
175 run_mgr
$dir x ||
return 1
176 for osd
in $
(seq 0 $
(expr $OSDS - 1))
178 run_osd
$dir $osd ||
return 1
181 # All scrubs done manually. Don't want any unexpected scheduled scrubs.
182 ceph osd
set noscrub ||
return 1
183 ceph osd
set nodeep-scrub ||
return 1
185 # Create a pool with a single pg
186 create_pool
$poolname 1 1
187 wait_for_clean ||
return 1
188 poolid
=$
(ceph osd dump |
grep "^pool.*[']test[']" |
awk '{ print $2 }')
190 dd if=/dev
/urandom of
=$TESTDATA bs
=1032 count
=1
191 for i
in `seq 1 $OBJS`
193 rados
-p $poolname put obj
${i} $TESTDATA
196 local primary
=$
(get_primary
$poolname obj1
)
198 create_scenario
$dir $poolname $TESTDATA $primary ||
return 1
202 for osd
in $
(seq 0 $
(expr $OSDS - 1))
204 run_osd
$dir $osd ||
return 1
207 local pgid
="${poolid}.0"
208 if ! pg_scrub
"$pgid" ; then
212 test "$(grep "_scan_snaps start
" $dir/osd.${primary}.log | wc -l)" = "2" ||
return 1
214 rados list-inconsistent-pg
$poolname > $dir/json ||
return 1
216 test $
(jq
'. | length' $dir/json
) = "1" ||
return 1
218 test $
(jq
-r '.[0]' $dir/json
) = $pgid ||
return 1
220 rados list-inconsistent-obj
$pgid > $dir/json ||
return 1
222 # The injected snapshot errors with a single copy pool doesn't
223 # see object errors because all the issues are detected by
225 jq
"$jqfilter" << EOF | python -c "$sortkeys" > $dir/checkcsjson
232 jq
"$jqfilter" $dir/json | python
-c "$sortkeys" > $dir/csjson
233 multidiff
$dir/checkcsjson
$dir/csjson ||
test $getjson = "yes" ||
return 1
235 rados list-inconsistent-snapset
$pgid > $dir/json ||
return 1
237 jq
"$jqfilter" << EOF | python -c "$sortkeys" > $dir/checkcsjson
677 jq
"$jqfilter" $dir/json | python
-c "$sortkeys" > $dir/csjson
678 multidiff
$dir/checkcsjson
$dir/csjson ||
test $getjson = "yes" ||
return 1
679 if test $getjson = "yes"
681 jq
'.' $dir/json
> save1.json
684 if test "$LOCALRUN" = "yes" && which jsonschema
> /dev
/null
;
686 jsonschema
-i $dir/json
$CEPH_ROOT/doc
/rados
/command
/list-inconsistent-snap.json ||
return 1
689 pidfiles
=$
(find $dir 2>/dev
/null |
grep 'osd[^/]*\.pid')
691 for pidfile
in ${pidfiles}
693 pids
+="$(cat $pidfile) "
700 rados
-p $poolname rmsnap snap
$i
704 while ceph pg dump pgs |
grep -q snaptrim
;
706 if ceph pg dump pgs |
grep -q snaptrim_error
;
712 if (( $loop >= 10 )) ; then
713 ERRORS
=$
(expr $ERRORS + 1)
723 echo "OSD Crash occurred"
724 ERRORS
=$
(expr $ERRORS + 1)
728 kill_daemons
$dir ||
return 1
730 declare -a err_strings
731 err_strings
[0]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*::obj10:.* : is missing in clone_overlap"
732 err_strings
[1]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*::obj5:7 : no '_' attr"
733 err_strings
[2]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*::obj5:7 : is an unexpected clone"
734 err_strings
[3]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*::obj5:4 : on disk size [(]4608[)] does not match object info size [(]512[)] adjusted for ondisk to [(]512[)]"
735 err_strings
[4]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj5:head : expected clone .*:::obj5:2"
736 err_strings
[5]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj5:head : expected clone .*:::obj5:1"
737 err_strings
[6]="log_channel[(]cluster[)] log [[]INF[]] : scrub [0-9]*[.]0 .*:::obj5:head : 2 missing clone[(]s[)]"
738 err_strings
[7]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj8:head : snaps.seq not set"
739 err_strings
[8]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj7:1 : is an unexpected clone"
740 err_strings
[9]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj3:head : on disk size [(]3840[)] does not match object info size [(]768[)] adjusted for ondisk to [(]768[)]"
741 err_strings
[10]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj6:1 : is an unexpected clone"
742 err_strings
[11]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj2:head : no 'snapset' attr"
743 err_strings
[12]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj2:7 : clone ignored due to missing snapset"
744 err_strings
[13]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj2:4 : clone ignored due to missing snapset"
745 err_strings
[14]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj4:head : expected clone .*:::obj4:7"
746 err_strings
[15]="log_channel[(]cluster[)] log [[]INF[]] : scrub [0-9]*[.]0 .*:::obj4:head : 1 missing clone[(]s[)]"
747 err_strings
[16]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj1:1 : is an unexpected clone"
748 err_strings
[17]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj9:1 : is missing in clone_size"
749 err_strings
[18]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj11:1 : is an unexpected clone"
750 err_strings
[19]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj14:1 : size 1032 != clone_size 1033"
751 err_strings
[20]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 20 errors"
752 err_strings
[21]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj15:head : can't decode 'snapset' attr buffer"
753 err_strings
[22]="log_channel[(]cluster[)] log [[]ERR[]] : osd[.][0-9]* found snap mapper error on pg 1.0 oid 1:461f8b5e:::obj16:7 snaps missing in mapper, should be: 1,2,3,4,5,6,7 was r -2...repaired"
755 for err_string
in "${err_strings[@]}"
757 if ! grep "$err_string" $dir/osd.
${primary}.log
> /dev
/null
;
759 echo "Missing log message '$err_string'"
760 ERRORS
=$
(expr $ERRORS + 1)
764 if [ $ERRORS != "0" ];
766 echo "TEST FAILED WITH $ERRORS ERRORS"
774 function _scrub_snaps_multi
() {
781 TESTDATA
="testdata.$$"
783 run_mon
$dir a
--osd_pool_default_size=$OSDS ||
return 1
784 run_mgr
$dir x ||
return 1
785 for osd
in $
(seq 0 $
(expr $OSDS - 1))
787 run_osd
$dir $osd ||
return 1
790 # All scrubs done manually. Don't want any unexpected scheduled scrubs.
791 ceph osd
set noscrub ||
return 1
792 ceph osd
set nodeep-scrub ||
return 1
794 # Create a pool with a single pg
795 create_pool
$poolname 1 1
796 wait_for_clean ||
return 1
797 poolid
=$
(ceph osd dump |
grep "^pool.*[']test[']" |
awk '{ print $2 }')
799 dd if=/dev
/urandom of
=$TESTDATA bs
=1032 count
=1
800 for i
in `seq 1 $OBJS`
802 rados
-p $poolname put obj
${i} $TESTDATA
805 local primary
=$
(get_primary
$poolname obj1
)
806 local replica
=$
(get_not_primary
$poolname obj1
)
808 eval create_scenario
$dir $poolname $TESTDATA \$
$which ||
return 1
812 for osd
in $
(seq 0 $
(expr $OSDS - 1))
814 run_osd
$dir $osd ||
return 1
817 local pgid
="${poolid}.0"
818 if ! pg_scrub
"$pgid" ; then
822 test "$(grep "_scan_snaps start
" $dir/osd.${primary}.log | wc -l)" -gt "3" ||
return 1
823 test "$(grep "_scan_snaps start
" $dir/osd.${replica}.log | wc -l)" -gt "3" ||
return 1
825 rados list-inconsistent-pg
$poolname > $dir/json ||
return 1
827 test $
(jq
'. | length' $dir/json
) = "1" ||
return 1
829 test $
(jq
-r '.[0]' $dir/json
) = $pgid ||
return 1
831 rados list-inconsistent-obj
$pgid --format=json-pretty
833 rados list-inconsistent-snapset
$pgid > $dir/json ||
return 1
835 # Since all of the snapshots on the primary is consistent there are no errors here
836 if [ $which = "replica" ];
839 jq
"$jqfilter" << EOF | python -c "$sortkeys" > $dir/checkcsjson
848 jq
"$jqfilter" << EOF | python -c "$sortkeys" > $dir/checkcsjson
1145 jq
"$jqfilter" $dir/json | python
-c "$sortkeys" > $dir/csjson
1146 multidiff
$dir/checkcsjson
$dir/csjson ||
test $getjson = "yes" ||
return 1
1147 if test $getjson = "yes"
1149 jq
'.' $dir/json
> save1.json
1152 if test "$LOCALRUN" = "yes" && which jsonschema
> /dev
/null
;
1154 jsonschema
-i $dir/json
$CEPH_ROOT/doc
/rados
/command
/list-inconsistent-snap.json ||
return 1
1157 pidfiles
=$
(find $dir 2>/dev
/null |
grep 'osd[^/]*\.pid')
1159 for pidfile
in ${pidfiles}
1161 pids
+="$(cat $pidfile) "
1166 # When removing snapshots with a corrupt replica, it crashes.
1167 # See http://tracker.ceph.com/issues/23875
1168 if [ $which = "primary" ];
1172 rados
-p $poolname rmsnap snap
$i
1176 while ceph pg dump pgs |
grep -q snaptrim
;
1178 if ceph pg dump pgs |
grep -q snaptrim_error
;
1184 if (( $loop >= 10 )) ; then
1185 ERRORS
=$
(expr $ERRORS + 1)
1196 echo "OSD Crash occurred"
1197 ERRORS
=$
(expr $ERRORS + 1)
1201 kill_daemons
$dir ||
return 1
1203 declare -a err_strings
1204 err_strings
[0]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1] .*:::obj4:7 : missing"
1205 err_strings
[1]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1] soid .*:::obj3:head : size 3840 != size 768 from auth oi"
1206 err_strings
[2]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1] .*:::obj5:1 : missing"
1207 err_strings
[3]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1] .*:::obj5:2 : missing"
1208 err_strings
[4]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1] soid .*:::obj5:4 : size 4608 != size 512 from auth oi"
1209 err_strings
[5]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid .*:::obj5:7 : failed to pick suitable object info"
1210 err_strings
[6]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1] .*:::obj1:head : missing"
1211 err_strings
[7]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub ${scruberrors} errors"
1213 for err_string
in "${err_strings[@]}"
1215 if ! grep "$err_string" $dir/osd.
${primary}.log
> /dev
/null
;
1217 echo "Missing log message '$err_string'"
1218 ERRORS
=$
(expr $ERRORS + 1)
1222 # Check replica specific messages
1223 declare -a rep_err_strings
1224 osd
=$
(eval echo \$
$which)
1225 rep_err_strings
[0]="log_channel[(]cluster[)] log [[]ERR[]] : osd[.][0-9]* found snap mapper error on pg 1.0 oid 1:461f8b5e:::obj16:7 snaps missing in mapper, should be: 1,2,3,4,5,6,7 was r -2...repaired"
1226 for err_string
in "${rep_err_strings[@]}"
1228 if ! grep "$err_string" $dir/osd.
${osd}.log
> /dev
/null
;
1230 echo "Missing log message '$err_string'"
1231 ERRORS
=$
(expr $ERRORS + 1)
1235 if [ $ERRORS != "0" ];
1237 echo "TEST FAILED WITH $ERRORS ERRORS"
1245 function TEST_scrub_snaps_replica
() {
1247 ORIG_ARGS
=$CEPH_ARGS
1248 CEPH_ARGS
+=" --osd_scrub_chunk_min=3 --osd_scrub_chunk_max=3"
1249 _scrub_snaps_multi
$dir replica
1251 CEPH_ARGS
=$ORIG_ARGS
1255 function TEST_scrub_snaps_primary
() {
1257 ORIG_ARGS
=$CEPH_ARGS
1258 CEPH_ARGS
+=" --osd_scrub_chunk_min=3 --osd_scrub_chunk_max=3"
1259 _scrub_snaps_multi
$dir primary
1261 CEPH_ARGS
=$ORIG_ARGS
1265 main osd-scrub-snaps
"$@"
1268 # compile-command: "cd build ; make -j4 && \
1269 # ../qa/run-standalone.sh osd-scrub-snaps.sh"