]> git.proxmox.com Git - ceph.git/blob - ceph/qa/standalone/scrub/osd-scrub-test.sh
import 15.2.5
[ceph.git] / ceph / qa / standalone / scrub / osd-scrub-test.sh
1 #!/usr/bin/env bash
2 #
3 # Copyright (C) 2018 Red Hat <contact@redhat.com>
4 #
5 # Author: David Zafman <dzafman@redhat.com>
6 #
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU Library Public License as published by
9 # the Free Software Foundation; either version 2, or (at your option)
10 # any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU Library Public License for more details.
16 #
17 source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
18
19 function run() {
20 local dir=$1
21 shift
22
23 export CEPH_MON="127.0.0.1:7138" # git grep '\<7138\>' : there must be only one
24 export CEPH_ARGS
25 CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
26 CEPH_ARGS+="--mon-host=$CEPH_MON "
27
28 export -n CEPH_CLI_TEST_DUP_COMMAND
29 local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
30 for func in $funcs ; do
31 $func $dir || return 1
32 done
33 }
34
35 function TEST_scrub_test() {
36 local dir=$1
37 local poolname=test
38 local OSDS=3
39 local objects=15
40
41 TESTDATA="testdata.$$"
42
43 setup $dir || return 1
44 run_mon $dir a --osd_pool_default_size=3 || return 1
45 run_mgr $dir x || return 1
46 for osd in $(seq 0 $(expr $OSDS - 1))
47 do
48 run_osd $dir $osd || return 1
49 done
50
51 # Create a pool with a single pg
52 create_pool $poolname 1 1
53 wait_for_clean || return 1
54 poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }')
55
56 dd if=/dev/urandom of=$TESTDATA bs=1032 count=1
57 for i in `seq 1 $objects`
58 do
59 rados -p $poolname put obj${i} $TESTDATA
60 done
61 rm -f $TESTDATA
62
63 local primary=$(get_primary $poolname obj1)
64 local otherosd=$(get_not_primary $poolname obj1)
65 if [ "$otherosd" = "2" ];
66 then
67 local anotherosd="0"
68 else
69 local anotherosd="2"
70 fi
71
72 objectstore_tool $dir $anotherosd obj1 set-bytes /etc/fstab
73
74 local pgid="${poolid}.0"
75 pg_deep_scrub "$pgid" || return 1
76
77 ceph pg dump pgs | grep ^${pgid} | grep -q -- +inconsistent || return 1
78 test "$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_scrub_errors')" = "2" || return 1
79
80 ceph osd out $primary
81 wait_for_clean || return 1
82
83 pg_deep_scrub "$pgid" || return 1
84
85 test "$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_scrub_errors')" = "2" || return 1
86 test "$(ceph pg $pgid query | jq '.peer_info[0].stats.stat_sum.num_scrub_errors')" = "2" || return 1
87 ceph pg dump pgs | grep ^${pgid} | grep -q -- +inconsistent || return 1
88
89 ceph osd in $primary
90 wait_for_clean || return 1
91
92 repair "$pgid" || return 1
93 wait_for_clean || return 1
94
95 # This sets up the test after we've repaired with previous primary has old value
96 test "$(ceph pg $pgid query | jq '.peer_info[0].stats.stat_sum.num_scrub_errors')" = "2" || return 1
97 ceph pg dump pgs | grep ^${pgid} | grep -vq -- +inconsistent || return 1
98
99 ceph osd out $primary
100 wait_for_clean || return 1
101
102 test "$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_scrub_errors')" = "0" || return 1
103 test "$(ceph pg $pgid query | jq '.peer_info[0].stats.stat_sum.num_scrub_errors')" = "0" || return 1
104 test "$(ceph pg $pgid query | jq '.peer_info[1].stats.stat_sum.num_scrub_errors')" = "0" || return 1
105 ceph pg dump pgs | grep ^${pgid} | grep -vq -- +inconsistent || return 1
106
107 teardown $dir || return 1
108 }
109
110 # Grab year-month-day
111 DATESED="s/\([0-9]*-[0-9]*-[0-9]*\).*/\1/"
112 DATEFORMAT="%Y-%m-%d"
113
114 function check_dump_scrubs() {
115 local primary=$1
116 local sched_time_check="$2"
117 local deadline_check="$3"
118
119 DS="$(CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${primary}) dump_scrubs)"
120 # use eval to drop double-quotes
121 eval SCHED_TIME=$(echo $DS | jq '.[0].sched_time')
122 test $(echo $SCHED_TIME | sed $DATESED) = $(date +${DATEFORMAT} -d "now + $sched_time_check") || return 1
123 # use eval to drop double-quotes
124 eval DEADLINE=$(echo $DS | jq '.[0].deadline')
125 test $(echo $DEADLINE | sed $DATESED) = $(date +${DATEFORMAT} -d "now + $deadline_check") || return 1
126 }
127
128 function TEST_interval_changes() {
129 local poolname=test
130 local OSDS=2
131 local objects=10
132 # Don't assume how internal defaults are set
133 local day="$(expr 24 \* 60 \* 60)"
134 local week="$(expr $day \* 7)"
135 local min_interval=$day
136 local max_interval=$week
137 local WAIT_FOR_UPDATE=15
138
139 TESTDATA="testdata.$$"
140
141 setup $dir || return 1
142 # This min scrub interval results in 30 seconds backoff time
143 run_mon $dir a --osd_pool_default_size=$OSDS || return 1
144 run_mgr $dir x || return 1
145 for osd in $(seq 0 $(expr $OSDS - 1))
146 do
147 run_osd $dir $osd --osd_scrub_min_interval=$min_interval --osd_scrub_max_interval=$max_interval --osd_scrub_interval_randomize_ratio=0 || return 1
148 done
149
150 # Create a pool with a single pg
151 create_pool $poolname 1 1
152 wait_for_clean || return 1
153 local poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }')
154
155 dd if=/dev/urandom of=$TESTDATA bs=1032 count=1
156 for i in `seq 1 $objects`
157 do
158 rados -p $poolname put obj${i} $TESTDATA
159 done
160 rm -f $TESTDATA
161
162 local primary=$(get_primary $poolname obj1)
163
164 # Check initial settings from above (min 1 day, min 1 week)
165 check_dump_scrubs $primary "1 day" "1 week" || return 1
166
167 # Change global osd_scrub_min_interval to 2 days
168 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${primary}) config set osd_scrub_min_interval $(expr $day \* 2)
169 sleep $WAIT_FOR_UPDATE
170 check_dump_scrubs $primary "2 days" "1 week" || return 1
171
172 # Change global osd_scrub_max_interval to 2 weeks
173 CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${primary}) config set osd_scrub_max_interval $(expr $week \* 2)
174 sleep $WAIT_FOR_UPDATE
175 check_dump_scrubs $primary "2 days" "2 week" || return 1
176
177 # Change pool osd_scrub_min_interval to 3 days
178 ceph osd pool set $poolname scrub_min_interval $(expr $day \* 3)
179 sleep $WAIT_FOR_UPDATE
180 check_dump_scrubs $primary "3 days" "2 week" || return 1
181
182 # Change pool osd_scrub_max_interval to 3 weeks
183 ceph osd pool set $poolname scrub_max_interval $(expr $week \* 3)
184 sleep $WAIT_FOR_UPDATE
185 check_dump_scrubs $primary "3 days" "3 week" || return 1
186
187 teardown $dir || return 1
188 }
189
190 function TEST_scrub_extented_sleep() {
191 local dir=$1
192 local poolname=test
193 local OSDS=3
194 local objects=15
195
196 TESTDATA="testdata.$$"
197
198 setup $dir || return 1
199 run_mon $dir a --osd_pool_default_size=3 || return 1
200 run_mgr $dir x || return 1
201 local scrub_begin_hour=$(date -d '2 hour ago' +"%H" | sed 's/^0//')
202 local scrub_end_hour=$(date -d '1 hour ago' +"%H" | sed 's/^0//')
203 for osd in $(seq 0 $(expr $OSDS - 1))
204 do
205 run_osd $dir $osd --osd_scrub_sleep=0 \
206 --osd_scrub_extended_sleep=10 \
207 --bluestore_cache_autotune=false \
208 --osd_scrub_begin_hour=$scrub_begin_hour \
209 --osd_scrub_end_hour=$scrub_end_hour || return 1
210 done
211
212 # Create a pool with a single pg
213 create_pool $poolname 1 1
214 wait_for_clean || return 1
215
216 # Trigger a scrub on a PG
217 local pgid=$(get_pg $poolname SOMETHING)
218 local primary=$(get_primary $poolname SOMETHING)
219 local last_scrub=$(get_last_scrub_stamp $pgid)
220 ceph tell $pgid scrub || return 1
221
222 # Due to the long delay, the scrub should not be done within 3 seconds
223 for ((i=0; i < 3; i++)); do
224 if test "$(get_last_scrub_stamp $pgid)" '>' "$last_scrub" ; then
225 return 1
226 fi
227 sleep 1
228 done
229
230 teardown $dir || return 1
231 }
232
233 function _scrub_abort() {
234 local dir=$1
235 local poolname=test
236 local OSDS=3
237 local objects=1000
238 local type=$2
239
240 TESTDATA="testdata.$$"
241 if test $type = "scrub";
242 then
243 stopscrub="noscrub"
244 check="noscrub"
245 else
246 stopscrub="nodeep-scrub"
247 check="nodeep_scrub"
248 fi
249
250
251 setup $dir || return 1
252 run_mon $dir a --osd_pool_default_size=3 || return 1
253 run_mgr $dir x || return 1
254 for osd in $(seq 0 $(expr $OSDS - 1))
255 do
256 run_osd $dir $osd --osd_pool_default_pg_autoscale_mode=off \
257 --osd_deep_scrub_randomize_ratio=0.0 \
258 --osd_scrub_sleep=5.0 \
259 --osd_scrub_interval_randomize_ratio=0 || return 1
260 done
261
262 # Create a pool with a single pg
263 create_pool $poolname 1 1
264 wait_for_clean || return 1
265 poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }')
266
267 dd if=/dev/urandom of=$TESTDATA bs=1032 count=1
268 for i in `seq 1 $objects`
269 do
270 rados -p $poolname put obj${i} $TESTDATA
271 done
272 rm -f $TESTDATA
273
274 local primary=$(get_primary $poolname obj1)
275 local pgid="${poolid}.0"
276
277 ceph tell $pgid $type || return 1
278 # deep-scrub won't start without scrub noticing
279 if [ "$type" = "deep_scrub" ];
280 then
281 ceph tell $pgid scrub || return 1
282 fi
283
284 # Wait for scrubbing to start
285 set -o pipefail
286 found="no"
287 for i in $(seq 0 200)
288 do
289 flush_pg_stats
290 if ceph pg dump pgs | grep ^$pgid| grep -q "scrubbing"
291 then
292 found="yes"
293 #ceph pg dump pgs
294 break
295 fi
296 done
297 set +o pipefail
298
299 if test $found = "no";
300 then
301 echo "Scrubbing never started"
302 return 1
303 fi
304
305 ceph osd set $stopscrub
306
307 # Wait for scrubbing to end
308 set -o pipefail
309 for i in $(seq 0 200)
310 do
311 flush_pg_stats
312 if ceph pg dump pgs | grep ^$pgid | grep -q "scrubbing"
313 then
314 continue
315 fi
316 #ceph pg dump pgs
317 break
318 done
319 set +o pipefail
320
321 sleep 5
322
323 if ! grep "$check set, aborting" $dir/osd.${primary}.log
324 then
325 echo "Abort not seen in log"
326 return 1
327 fi
328
329 local last_scrub=$(get_last_scrub_stamp $pgid)
330 ceph osd unset noscrub
331 TIMEOUT=$(($objects / 2))
332 wait_for_scrub $pgid "$last_scrub" || return 1
333
334 teardown $dir || return 1
335 }
336
337 function TEST_scrub_abort() {
338 local dir=$1
339 _scrub_abort $dir scrub
340 }
341
342 function TEST_deep_scrub_abort() {
343 local dir=$1
344 _scrub_abort $dir deep_scrub
345 }
346
347 main osd-scrub-test "$@"
348
349 # Local Variables:
350 # compile-command: "cd build ; make -j4 && \
351 # ../qa/run-standalone.sh osd-scrub-test.sh"
352 # End: