]> git.proxmox.com Git - ceph.git/blob - ceph/qa/standalone/scrub/osd-scrub-snaps.sh
update ceph source to reef 18.1.2
[ceph.git] / ceph / qa / standalone / scrub / osd-scrub-snaps.sh
1 #!/usr/bin/env bash
2 #
3 # Copyright (C) 2015 Red Hat <contact@redhat.com>
4 #
5 # Author: David Zafman <dzafman@redhat.com>
6 #
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU Library Public License as published by
9 # the Free Software Foundation; either version 2, or (at your option)
10 # any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU Library Public License for more details.
16 #
17 source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
18
19 # Test development and debugging
20 # Set to "yes" in order to ignore diff errors and save results to update test
21 getjson="no"
22
23 jqfilter='.inconsistents'
24 sortkeys='import json; import sys ; JSON=sys.stdin.read() ; ud = json.loads(JSON) ; print ( json.dumps(ud, sort_keys=True, indent=2) )'
25
26 function run() {
27 local dir=$1
28 shift
29
30 export CEPH_MON="127.0.0.1:7121" # git grep '\<7121\>' : there must be only one
31 export CEPH_ARGS
32 CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
33 CEPH_ARGS+="--mon-host=$CEPH_MON "
34
35 export -n CEPH_CLI_TEST_DUP_COMMAND
36 local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
37 for func in $funcs ; do
38 setup $dir || return 1
39 $func $dir || return 1
40 teardown $dir || return 1
41 done
42 }
43
44 function create_scenario() {
45 local dir=$1
46 local poolname=$2
47 local TESTDATA=$3
48 local osd=$4
49
50 SNAP=1
51 rados -p $poolname mksnap snap${SNAP}
52 dd if=/dev/urandom of=$TESTDATA bs=256 count=${SNAP}
53 rados -p $poolname put obj1 $TESTDATA
54 rados -p $poolname put obj5 $TESTDATA
55 rados -p $poolname put obj3 $TESTDATA
56 for i in `seq 6 14`
57 do rados -p $poolname put obj${i} $TESTDATA
58 done
59
60 SNAP=2
61 rados -p $poolname mksnap snap${SNAP}
62 dd if=/dev/urandom of=$TESTDATA bs=256 count=${SNAP}
63 rados -p $poolname put obj5 $TESTDATA
64
65 SNAP=3
66 rados -p $poolname mksnap snap${SNAP}
67 dd if=/dev/urandom of=$TESTDATA bs=256 count=${SNAP}
68 rados -p $poolname put obj3 $TESTDATA
69
70 SNAP=4
71 rados -p $poolname mksnap snap${SNAP}
72 dd if=/dev/urandom of=$TESTDATA bs=256 count=${SNAP}
73 rados -p $poolname put obj5 $TESTDATA
74 rados -p $poolname put obj2 $TESTDATA
75
76 SNAP=5
77 rados -p $poolname mksnap snap${SNAP}
78 SNAP=6
79 rados -p $poolname mksnap snap${SNAP}
80 dd if=/dev/urandom of=$TESTDATA bs=256 count=${SNAP}
81 rados -p $poolname put obj5 $TESTDATA
82
83 SNAP=7
84 rados -p $poolname mksnap snap${SNAP}
85
86 rados -p $poolname rm obj4
87 rados -p $poolname rm obj16
88 rados -p $poolname rm obj2
89
90 kill_daemons $dir TERM osd || return 1
91
92 # Don't need to use ceph_objectstore_tool() function because osd stopped
93
94 JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj1)"
95 ceph-objectstore-tool --data-path $dir/${osd} "$JSON" --force remove || return 1
96
97 JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --op list obj5 | grep \"snapid\":2)"
98 ceph-objectstore-tool --data-path $dir/${osd} "$JSON" remove || return 1
99
100 JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --op list obj5 | grep \"snapid\":1)"
101 OBJ5SAVE="$JSON"
102 # Starts with a snapmap
103 ceph-kvstore-tool bluestore-kv $dir/${osd} list 2> /dev/null > $dir/drk.log
104 grep SNA_ $dir/drk.log
105 grep "^[pm].*SNA_.*[.]1[.]obj5[.][.]$" $dir/drk.log || return 1
106 ceph-objectstore-tool --data-path $dir/${osd} --rmtype nosnapmap "$JSON" remove || return 1
107 # Check that snapmap is stil there
108 ceph-kvstore-tool bluestore-kv $dir/${osd} list 2> /dev/null > $dir/drk.log
109 grep SNA_ $dir/drk.log
110 grep "^[pm].*SNA_.*[.]1[.]obj5[.][.]$" $dir/drk.log || return 1
111 rm -f $dir/drk.log
112
113 JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --op list obj5 | grep \"snapid\":4)"
114 dd if=/dev/urandom of=$TESTDATA bs=256 count=18
115 ceph-objectstore-tool --data-path $dir/${osd} "$JSON" set-bytes $TESTDATA || return 1
116
117 JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj3)"
118 dd if=/dev/urandom of=$TESTDATA bs=256 count=15
119 ceph-objectstore-tool --data-path $dir/${osd} "$JSON" set-bytes $TESTDATA || return 1
120
121 JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --op list obj4 | grep \"snapid\":7)"
122 ceph-objectstore-tool --data-path $dir/${osd} "$JSON" remove || return 1
123
124 # Starts with a snapmap
125 ceph-kvstore-tool bluestore-kv $dir/${osd} list 2> /dev/null > $dir/drk.log
126 grep SNA_ $dir/drk.log
127 grep "^[pm].*SNA_.*[.]7[.]obj16[.][.]$" $dir/drk.log || return 1
128 JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --op list obj16 | grep \"snapid\":7)"
129 ceph-objectstore-tool --data-path $dir/${osd} --rmtype snapmap "$JSON" remove || return 1
130 # Check that snapmap is now removed
131 ceph-kvstore-tool bluestore-kv $dir/${osd} list 2> /dev/null > $dir/drk.log
132 grep SNA_ $dir/drk.log
133 ! grep "^[pm].*SNA_.*[.]7[.]obj16[.][.]$" $dir/drk.log || return 1
134 rm -f $dir/drk.log
135
136 JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj2)"
137 ceph-objectstore-tool --data-path $dir/${osd} "$JSON" rm-attr snapset || return 1
138
139 # Create a clone which isn't in snapset and doesn't have object info
140 JSON="$(echo "$OBJ5SAVE" | sed s/snapid\":1/snapid\":7/)"
141 dd if=/dev/urandom of=$TESTDATA bs=256 count=7
142 ceph-objectstore-tool --data-path $dir/${osd} "$JSON" set-bytes $TESTDATA || return 1
143
144 JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj6)"
145 ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset || return 1
146 JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj7)"
147 ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset corrupt || return 1
148 JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj8)"
149 ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset seq || return 1
150 JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj9)"
151 ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset clone_size || return 1
152 JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj10)"
153 ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset clone_overlap || return 1
154 JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj11)"
155 ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset clones || return 1
156 JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj12)"
157 ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset head || return 1
158 JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj13)"
159 ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset snaps || return 1
160 JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj14)"
161 ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset size || return 1
162
163 echo "garbage" > $dir/bad
164 JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj15)"
165 ceph-objectstore-tool --data-path $dir/${osd} "$JSON" set-attr snapset $dir/bad || return 1
166 rm -f $dir/bad
167 return 0
168 }
169
170 function TEST_scrub_snaps() {
171 local dir=$1
172 local poolname=test
173 local OBJS=16
174 local OSDS=1
175
176 TESTDATA="testdata.$$"
177
178 run_mon $dir a --osd_pool_default_size=$OSDS || return 1
179 run_mgr $dir x || return 1
180 for osd in $(seq 0 $(expr $OSDS - 1))
181 do
182 run_osd $dir $osd || return 1
183 done
184
185 # All scrubs done manually. Don't want any unexpected scheduled scrubs.
186 ceph osd set noscrub || return 1
187 ceph osd set nodeep-scrub || return 1
188
189 # Create a pool with a single pg
190 create_pool $poolname 1 1
191 wait_for_clean || return 1
192 poolid=$(ceph osd dump | grep "^pool.*[']test[']" | awk '{ print $2 }')
193
194 dd if=/dev/urandom of=$TESTDATA bs=1032 count=1
195 for i in `seq 1 $OBJS`
196 do
197 rados -p $poolname put obj${i} $TESTDATA
198 done
199
200 local primary=$(get_primary $poolname obj1)
201
202 create_scenario $dir $poolname $TESTDATA $primary || return 1
203
204 rm -f $TESTDATA
205
206 for osd in $(seq 0 $(expr $OSDS - 1))
207 do
208 activate_osd $dir $osd || return 1
209 done
210 ceph tell osd.* config set osd_shallow_scrub_chunk_max 25
211 ceph tell osd.* config set osd_shallow_scrub_chunk_min 5
212 ceph tell osd.* config set osd_pg_stat_report_interval_max 1
213
214
215 wait_for_clean || return 1
216
217 ceph tell osd.* config get osd_shallow_scrub_chunk_max
218 ceph tell osd.* config get osd_shallow_scrub_chunk_min
219 ceph tell osd.* config get osd_pg_stat_report_interval_max
220 ceph tell osd.* config get osd_scrub_chunk_max
221 ceph tell osd.* config get osd_scrub_chunk_min
222
223 local pgid="${poolid}.0"
224 if ! pg_scrub "$pgid" ; then
225 return 1
226 fi
227
228 test "$(grep "_scan_snaps start" $dir/osd.${primary}.log | wc -l)" = "2" || return 1
229
230 rados list-inconsistent-pg $poolname > $dir/json || return 1
231 # Check pg count
232 test $(jq '. | length' $dir/json) = "1" || return 1
233 # Check pgid
234 test $(jq -r '.[0]' $dir/json) = $pgid || return 1
235
236 rados list-inconsistent-obj $pgid > $dir/json || return 1
237
238 # The injected snapshot errors with a single copy pool doesn't
239 # see object errors because all the issues are detected by
240 # comparing copies.
241 jq "$jqfilter" << EOF | python3 -c "$sortkeys" > $dir/checkcsjson
242 {
243 "epoch": 17,
244 "inconsistents": []
245 }
246 EOF
247
248 jq "$jqfilter" $dir/json | python3 -c "$sortkeys" > $dir/csjson
249 multidiff $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1
250
251 rados list-inconsistent-snapset $pgid > $dir/json || return 1
252
253 jq "$jqfilter" << EOF | python3 -c "$sortkeys" > $dir/checkcsjson
254 {
255 "inconsistents": [
256 {
257 "errors": [
258 "headless"
259 ],
260 "snap": 1,
261 "locator": "",
262 "nspace": "",
263 "name": "obj1"
264 },
265 {
266 "errors": [
267 "size_mismatch"
268 ],
269 "snap": 1,
270 "locator": "",
271 "nspace": "",
272 "name": "obj10"
273 },
274 {
275 "errors": [
276 "headless"
277 ],
278 "snap": 1,
279 "locator": "",
280 "nspace": "",
281 "name": "obj11"
282 },
283 {
284 "errors": [
285 "size_mismatch"
286 ],
287 "snap": 1,
288 "locator": "",
289 "nspace": "",
290 "name": "obj14"
291 },
292 {
293 "errors": [
294 "headless"
295 ],
296 "snap": 1,
297 "locator": "",
298 "nspace": "",
299 "name": "obj6"
300 },
301 {
302 "errors": [
303 "headless"
304 ],
305 "snap": 1,
306 "locator": "",
307 "nspace": "",
308 "name": "obj7"
309 },
310 {
311 "errors": [
312 "size_mismatch"
313 ],
314 "snap": 1,
315 "locator": "",
316 "nspace": "",
317 "name": "obj9"
318 },
319 {
320 "errors": [
321 "headless"
322 ],
323 "snap": 4,
324 "locator": "",
325 "nspace": "",
326 "name": "obj2"
327 },
328 {
329 "errors": [
330 "size_mismatch"
331 ],
332 "snap": 4,
333 "locator": "",
334 "nspace": "",
335 "name": "obj5"
336 },
337 {
338 "errors": [
339 "headless"
340 ],
341 "snap": 7,
342 "locator": "",
343 "nspace": "",
344 "name": "obj2"
345 },
346 {
347 "errors": [
348 "info_missing",
349 "headless"
350 ],
351 "snap": 7,
352 "locator": "",
353 "nspace": "",
354 "name": "obj5"
355 },
356 {
357 "name": "obj10",
358 "nspace": "",
359 "locator": "",
360 "snap": "head",
361 "snapset": {
362 "seq": 1,
363 "clones": [
364 {
365 "snap": 1,
366 "size": 1032,
367 "overlap": "????",
368 "snaps": [
369 1
370 ]
371 }
372 ]
373 },
374 "errors": []
375 },
376 {
377 "extra clones": [
378 1
379 ],
380 "errors": [
381 "extra_clones"
382 ],
383 "snap": "head",
384 "locator": "",
385 "nspace": "",
386 "name": "obj11",
387 "snapset": {
388 "seq": 1,
389 "clones": []
390 }
391 },
392 {
393 "name": "obj14",
394 "nspace": "",
395 "locator": "",
396 "snap": "head",
397 "snapset": {
398 "seq": 1,
399 "clones": [
400 {
401 "snap": 1,
402 "size": 1033,
403 "overlap": "[]",
404 "snaps": [
405 1
406 ]
407 }
408 ]
409 },
410 "errors": []
411 },
412 {
413 "errors": [
414 "snapset_corrupted"
415 ],
416 "snap": "head",
417 "locator": "",
418 "nspace": "",
419 "name": "obj15"
420 },
421 {
422 "extra clones": [
423 7,
424 4
425 ],
426 "errors": [
427 "snapset_missing",
428 "extra_clones"
429 ],
430 "snap": "head",
431 "locator": "",
432 "nspace": "",
433 "name": "obj2"
434 },
435 {
436 "errors": [
437 "size_mismatch"
438 ],
439 "snap": "head",
440 "locator": "",
441 "nspace": "",
442 "name": "obj3",
443 "snapset": {
444 "seq": 3,
445 "clones": [
446 {
447 "snap": 1,
448 "size": 1032,
449 "overlap": "[]",
450 "snaps": [
451 1
452 ]
453 },
454 {
455 "snap": 3,
456 "size": 256,
457 "overlap": "[]",
458 "snaps": [
459 3,
460 2
461 ]
462 }
463 ]
464 }
465 },
466 {
467 "missing": [
468 7
469 ],
470 "errors": [
471 "clone_missing"
472 ],
473 "snap": "head",
474 "locator": "",
475 "nspace": "",
476 "name": "obj4",
477 "snapset": {
478 "seq": 7,
479 "clones": [
480 {
481 "snap": 7,
482 "size": 1032,
483 "overlap": "[]",
484 "snaps": [
485 7,
486 6,
487 5,
488 4,
489 3,
490 2,
491 1
492 ]
493 }
494 ]
495 }
496 },
497 {
498 "missing": [
499 2,
500 1
501 ],
502 "extra clones": [
503 7
504 ],
505 "errors": [
506 "extra_clones",
507 "clone_missing"
508 ],
509 "snap": "head",
510 "locator": "",
511 "nspace": "",
512 "name": "obj5",
513 "snapset": {
514 "seq": 6,
515 "clones": [
516 {
517 "snap": 1,
518 "size": 1032,
519 "overlap": "[]",
520 "snaps": [
521 1
522 ]
523 },
524 {
525 "snap": 2,
526 "size": 256,
527 "overlap": "[]",
528 "snaps": [
529 2
530 ]
531 },
532 {
533 "snap": 4,
534 "size": 512,
535 "overlap": "[]",
536 "snaps": [
537 4,
538 3
539 ]
540 },
541 {
542 "snap": 6,
543 "size": 1024,
544 "overlap": "[]",
545 "snaps": [
546 6,
547 5
548 ]
549 }
550 ]
551 }
552 },
553 {
554 "extra clones": [
555 1
556 ],
557 "errors": [
558 "extra_clones"
559 ],
560 "snap": "head",
561 "locator": "",
562 "nspace": "",
563 "name": "obj6",
564 "snapset": {
565 "seq": 1,
566 "clones": []
567 }
568 },
569 {
570 "extra clones": [
571 1
572 ],
573 "errors": [
574 "extra_clones"
575 ],
576 "snap": "head",
577 "locator": "",
578 "nspace": "",
579 "name": "obj7",
580 "snapset": {
581 "seq": 0,
582 "clones": []
583 }
584 },
585 {
586 "errors": [
587 "snapset_error"
588 ],
589 "snap": "head",
590 "locator": "",
591 "nspace": "",
592 "name": "obj8",
593 "snapset": {
594 "seq": 0,
595 "clones": [
596 {
597 "snap": 1,
598 "size": 1032,
599 "overlap": "[]",
600 "snaps": [
601 1
602 ]
603 }
604 ]
605 }
606 },
607 {
608 "name": "obj9",
609 "nspace": "",
610 "locator": "",
611 "snap": "head",
612 "snapset": {
613 "seq": 1,
614 "clones": [
615 {
616 "snap": 1,
617 "size": "????",
618 "overlap": "[]",
619 "snaps": [
620 1
621 ]
622 }
623 ]
624 },
625 "errors": []
626 }
627 ],
628 "epoch": 20
629 }
630 EOF
631
632 jq "$jqfilter" $dir/json | python3 -c "$sortkeys" > $dir/csjson
633 multidiff $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1
634 if test $getjson = "yes"
635 then
636 jq '.' $dir/json > save1.json
637 fi
638
639 if test "$LOCALRUN" = "yes" && which jsonschema > /dev/null;
640 then
641 jsonschema -i $dir/json $CEPH_ROOT/doc/rados/command/list-inconsistent-snap.json || return 1
642 fi
643
644 pidfiles=$(find $dir 2>/dev/null | grep 'osd[^/]*\.pid')
645 pids=""
646 for pidfile in ${pidfiles}
647 do
648 pids+="$(cat $pidfile) "
649 done
650
651 ERRORS=0
652
653 for i in `seq 1 7`
654 do
655 rados -p $poolname rmsnap snap$i
656 done
657 sleep 5
658 local -i loop=0
659 while ceph pg dump pgs | grep -q snaptrim;
660 do
661 if ceph pg dump pgs | grep -q snaptrim_error;
662 then
663 break
664 fi
665 sleep 2
666 loop+=1
667 if (( $loop >= 10 )) ; then
668 ERRORS=$(expr $ERRORS + 1)
669 break
670 fi
671 done
672 ceph pg dump pgs
673
674 for pid in $pids
675 do
676 if ! kill -0 $pid
677 then
678 echo "OSD Crash occurred"
679 ERRORS=$(expr $ERRORS + 1)
680 fi
681 done
682
683 kill_daemons $dir || return 1
684
685 declare -a err_strings
686 err_strings[0]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*::obj10:.* : is missing in clone_overlap"
687 err_strings[1]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*::obj5:7 : no '_' attr"
688 err_strings[2]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*::obj5:7 : is an unexpected clone"
689 err_strings[3]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*::obj5:4 : on disk size [(]4608[)] does not match object info size [(]512[)] adjusted for ondisk to [(]512[)]"
690 err_strings[4]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj5:head : expected clone .*:::obj5:2"
691 err_strings[5]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj5:head : expected clone .*:::obj5:1"
692 err_strings[6]="log_channel[(]cluster[)] log [[]INF[]] : scrub [0-9]*[.]0 .*:::obj5:head : 2 missing clone[(]s[)]"
693 err_strings[7]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj8:head : snaps.seq not set"
694 err_strings[8]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj7:1 : is an unexpected clone"
695 err_strings[9]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj3:head : on disk size [(]3840[)] does not match object info size [(]768[)] adjusted for ondisk to [(]768[)]"
696 err_strings[10]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj6:1 : is an unexpected clone"
697 err_strings[11]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj2:head : no 'snapset' attr"
698 err_strings[12]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj2:7 : clone ignored due to missing snapset"
699 err_strings[13]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj2:4 : clone ignored due to missing snapset"
700 err_strings[14]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj4:head : expected clone .*:::obj4:7"
701 err_strings[15]="log_channel[(]cluster[)] log [[]INF[]] : scrub [0-9]*[.]0 .*:::obj4:head : 1 missing clone[(]s[)]"
702 err_strings[16]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj1:1 : is an unexpected clone"
703 err_strings[17]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj9:1 : is missing in clone_size"
704 err_strings[18]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj11:1 : is an unexpected clone"
705 err_strings[19]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj14:1 : size 1032 != clone_size 1033"
706 err_strings[20]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 20 errors"
707 err_strings[21]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj15:head : can't decode 'snapset' attr "
708 err_strings[22]="log_channel[(]cluster[)] log [[]ERR[]] : osd[.][0-9]* found snap mapper error on pg 1.0 oid 1:461f8b5e:::obj16:7 snaps missing in mapper, should be: {1, 2, 3, 4, 5, 6, 7} ...repaired"
709
710 for err_string in "${err_strings[@]}"
711 do
712 if ! grep "$err_string" $dir/osd.${primary}.log > /dev/null;
713 then
714 echo "Missing log message '$err_string'"
715 ERRORS=$(expr $ERRORS + 1)
716 fi
717 done
718
719 if [ $ERRORS != "0" ];
720 then
721 echo "TEST FAILED WITH $ERRORS ERRORS"
722 return 1
723 fi
724
725 echo "TEST PASSED"
726 return 0
727 }
728
729 function _scrub_snaps_multi() {
730 local dir=$1
731 local poolname=test
732 local OBJS=16
733 local OSDS=2
734 local which=$2
735
736 TESTDATA="testdata.$$"
737
738 run_mon $dir a --osd_pool_default_size=$OSDS || return 1
739 run_mgr $dir x || return 1
740 for osd in $(seq 0 $(expr $OSDS - 1))
741 do
742 run_osd $dir $osd || return 1
743 done
744
745 # All scrubs done manually. Don't want any unexpected scheduled scrubs.
746 ceph osd set noscrub || return 1
747 ceph osd set nodeep-scrub || return 1
748
749 # Create a pool with a single pg
750 create_pool $poolname 1 1
751 wait_for_clean || return 1
752 poolid=$(ceph osd dump | grep "^pool.*[']test[']" | awk '{ print $2 }')
753
754 dd if=/dev/urandom of=$TESTDATA bs=1032 count=1
755 for i in `seq 1 $OBJS`
756 do
757 rados -p $poolname put obj${i} $TESTDATA
758 done
759
760 local primary=$(get_primary $poolname obj1)
761 local replica=$(get_not_primary $poolname obj1)
762
763 eval create_scenario $dir $poolname $TESTDATA \$$which || return 1
764
765 rm -f $TESTDATA
766
767 for osd in $(seq 0 $(expr $OSDS - 1))
768 do
769 activate_osd $dir $osd || return 1
770 done
771
772 ceph tell osd.* config set osd_shallow_scrub_chunk_max 3
773 ceph tell osd.* config set osd_shallow_scrub_chunk_min 3
774 ceph tell osd.* config set osd_scrub_chunk_min 3
775 ceph tell osd.* config set osd_pg_stat_report_interval_max 1
776 wait_for_clean || return 1
777
778 local pgid="${poolid}.0"
779 if ! pg_scrub "$pgid" ; then
780 return 1
781 fi
782
783 test "$(grep "_scan_snaps start" $dir/osd.${primary}.log | wc -l)" -gt "3" || return 1
784 test "$(grep "_scan_snaps start" $dir/osd.${replica}.log | wc -l)" -gt "3" || return 1
785
786 rados list-inconsistent-pg $poolname > $dir/json || return 1
787 # Check pg count
788 test $(jq '. | length' $dir/json) = "1" || return 1
789 # Check pgid
790 test $(jq -r '.[0]' $dir/json) = $pgid || return 1
791
792 rados list-inconsistent-obj $pgid --format=json-pretty
793
794 rados list-inconsistent-snapset $pgid > $dir/json || return 1
795
796 # Since all of the snapshots on the primary is consistent there are no errors here
797 if [ $which = "replica" ];
798 then
799 scruberrors="20"
800 jq "$jqfilter" << EOF | python3 -c "$sortkeys" > $dir/checkcsjson
801 {
802 "epoch": 23,
803 "inconsistents": []
804 }
805 EOF
806
807 else
808 scruberrors="30"
809 jq "$jqfilter" << EOF | python3 -c "$sortkeys" > $dir/checkcsjson
810 {
811 "epoch": 23,
812 "inconsistents": [
813 {
814 "name": "obj10",
815 "nspace": "",
816 "locator": "",
817 "snap": 1,
818 "errors": [
819 "size_mismatch"
820 ]
821 },
822 {
823 "name": "obj11",
824 "nspace": "",
825 "locator": "",
826 "snap": 1,
827 "errors": [
828 "headless"
829 ]
830 },
831 {
832 "name": "obj14",
833 "nspace": "",
834 "locator": "",
835 "snap": 1,
836 "errors": [
837 "size_mismatch"
838 ]
839 },
840 {
841 "name": "obj6",
842 "nspace": "",
843 "locator": "",
844 "snap": 1,
845 "errors": [
846 "headless"
847 ]
848 },
849 {
850 "name": "obj7",
851 "nspace": "",
852 "locator": "",
853 "snap": 1,
854 "errors": [
855 "headless"
856 ]
857 },
858 {
859 "name": "obj9",
860 "nspace": "",
861 "locator": "",
862 "snap": 1,
863 "errors": [
864 "size_mismatch"
865 ]
866 },
867 {
868 "name": "obj5",
869 "nspace": "",
870 "locator": "",
871 "snap": 7,
872 "errors": [
873 "info_missing",
874 "headless"
875 ]
876 },
877 {
878 "name": "obj10",
879 "nspace": "",
880 "locator": "",
881 "snap": "head",
882 "snapset": {
883 "seq": 1,
884 "clones": [
885 {
886 "snap": 1,
887 "size": 1032,
888 "overlap": "????",
889 "snaps": [
890 1
891 ]
892 }
893 ]
894 },
895 "errors": []
896 },
897 {
898 "name": "obj11",
899 "nspace": "",
900 "locator": "",
901 "snap": "head",
902 "snapset": {
903 "seq": 1,
904 "clones": []
905 },
906 "errors": [
907 "extra_clones"
908 ],
909 "extra clones": [
910 1
911 ]
912 },
913 {
914 "name": "obj14",
915 "nspace": "",
916 "locator": "",
917 "snap": "head",
918 "snapset": {
919 "seq": 1,
920 "clones": [
921 {
922 "snap": 1,
923 "size": 1033,
924 "overlap": "[]",
925 "snaps": [
926 1
927 ]
928 }
929 ]
930 },
931 "errors": []
932 },
933 {
934 "name": "obj5",
935 "nspace": "",
936 "locator": "",
937 "snap": "head",
938 "snapset": {
939 "seq": 6,
940 "clones": [
941 {
942 "snap": 1,
943 "size": 1032,
944 "overlap": "[]",
945 "snaps": [
946 1
947 ]
948 },
949 {
950 "snap": 2,
951 "size": 256,
952 "overlap": "[]",
953 "snaps": [
954 2
955 ]
956 },
957 {
958 "snap": 4,
959 "size": 512,
960 "overlap": "[]",
961 "snaps": [
962 4,
963 3
964 ]
965 },
966 {
967 "snap": 6,
968 "size": 1024,
969 "overlap": "[]",
970 "snaps": [
971 6,
972 5
973 ]
974 }
975 ]
976 },
977 "errors": [
978 "extra_clones"
979 ],
980 "extra clones": [
981 7
982 ]
983 },
984 {
985 "name": "obj6",
986 "nspace": "",
987 "locator": "",
988 "snap": "head",
989 "snapset": {
990 "seq": 1,
991 "clones": []
992 },
993 "errors": [
994 "extra_clones"
995 ],
996 "extra clones": [
997 1
998 ]
999 },
1000 {
1001 "name": "obj7",
1002 "nspace": "",
1003 "locator": "",
1004 "snap": "head",
1005 "snapset": {
1006 "seq": 0,
1007 "clones": []
1008 },
1009 "errors": [
1010 "extra_clones"
1011 ],
1012 "extra clones": [
1013 1
1014 ]
1015 },
1016 {
1017 "name": "obj8",
1018 "nspace": "",
1019 "locator": "",
1020 "snap": "head",
1021 "snapset": {
1022 "seq": 0,
1023 "clones": [
1024 {
1025 "snap": 1,
1026 "size": 1032,
1027 "overlap": "[]",
1028 "snaps": [
1029 1
1030 ]
1031 }
1032 ]
1033 },
1034 "errors": [
1035 "snapset_error"
1036 ]
1037 },
1038 {
1039 "name": "obj9",
1040 "nspace": "",
1041 "locator": "",
1042 "snap": "head",
1043 "snapset": {
1044 "seq": 1,
1045 "clones": [
1046 {
1047 "snap": 1,
1048 "size": "????",
1049 "overlap": "[]",
1050 "snaps": [
1051 1
1052 ]
1053 }
1054 ]
1055 },
1056 "errors": []
1057 }
1058 ]
1059 }
1060 EOF
1061 fi
1062
1063 jq "$jqfilter" $dir/json | python3 -c "$sortkeys" > $dir/csjson
1064 multidiff $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1
1065 if test $getjson = "yes"
1066 then
1067 jq '.' $dir/json > save1.json
1068 fi
1069
1070 if test "$LOCALRUN" = "yes" && which jsonschema > /dev/null;
1071 then
1072 jsonschema -i $dir/json $CEPH_ROOT/doc/rados/command/list-inconsistent-snap.json || return 1
1073 fi
1074
1075 pidfiles=$(find $dir 2>/dev/null | grep 'osd[^/]*\.pid')
1076 pids=""
1077 for pidfile in ${pidfiles}
1078 do
1079 pids+="$(cat $pidfile) "
1080 done
1081
1082 ERRORS=0
1083
1084 # When removing snapshots with a corrupt replica, it crashes.
1085 # See http://tracker.ceph.com/issues/23875
1086 if [ $which = "primary" ];
1087 then
1088 for i in `seq 1 7`
1089 do
1090 rados -p $poolname rmsnap snap$i
1091 done
1092 sleep 5
1093 local -i loop=0
1094 while ceph pg dump pgs | grep -q snaptrim;
1095 do
1096 if ceph pg dump pgs | grep -q snaptrim_error;
1097 then
1098 break
1099 fi
1100 sleep 2
1101 loop+=1
1102 if (( $loop >= 10 )) ; then
1103 ERRORS=$(expr $ERRORS + 1)
1104 break
1105 fi
1106 done
1107 fi
1108 ceph pg dump pgs
1109
1110 for pid in $pids
1111 do
1112 if ! kill -0 $pid
1113 then
1114 echo "OSD Crash occurred"
1115 ERRORS=$(expr $ERRORS + 1)
1116 fi
1117 done
1118
1119 kill_daemons $dir || return 1
1120
1121 declare -a err_strings
1122 err_strings[0]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1] .*:::obj4:7 : missing"
1123 err_strings[1]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1] soid .*:::obj3:head : size 3840 != size 768 from auth oi"
1124 err_strings[2]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1] .*:::obj5:1 : missing"
1125 err_strings[3]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1] .*:::obj5:2 : missing"
1126 err_strings[4]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1] soid .*:::obj5:4 : size 4608 != size 512 from auth oi"
1127 err_strings[5]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid .*:::obj5:7 : failed to pick suitable object info"
1128 err_strings[6]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1] .*:::obj1:head : missing"
1129 err_strings[7]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub ${scruberrors} errors"
1130
1131 for err_string in "${err_strings[@]}"
1132 do
1133 if ! grep "$err_string" $dir/osd.${primary}.log > /dev/null;
1134 then
1135 echo "Missing log message '$err_string'"
1136 ERRORS=$(expr $ERRORS + 1)
1137 fi
1138 done
1139
1140 # Check replica specific messages
1141 declare -a rep_err_strings
1142 osd=$(eval echo \$$which)
1143 rep_err_strings[0]="log_channel[(]cluster[)] log [[]ERR[]] : osd[.][0-9]* found snap mapper error on pg 1.0 oid 1:461f8b5e:::obj16:7 snaps missing in mapper, should be: {1, 2, 3, 4, 5, 6, 7} ...repaired"
1144 for err_string in "${rep_err_strings[@]}"
1145 do
1146 if ! grep "$err_string" $dir/osd.${osd}.log > /dev/null;
1147 then
1148 echo "Missing log message '$err_string'"
1149 ERRORS=$(expr $ERRORS + 1)
1150 fi
1151 done
1152
1153 if [ $ERRORS != "0" ];
1154 then
1155 echo "TEST FAILED WITH $ERRORS ERRORS"
1156 return 1
1157 fi
1158
1159 echo "TEST PASSED"
1160 return 0
1161 }
1162
1163 function TEST_scrub_snaps_replica() {
1164 local dir=$1
1165 ORIG_ARGS=$CEPH_ARGS
1166 CEPH_ARGS+=" --osd_scrub_chunk_min=3 --osd_scrub_chunk_max=20 --osd_shallow_scrub_chunk_min=3 --osd_shallow_scrub_chunk_max=3 --osd_pg_stat_report_interval_max=1"
1167 _scrub_snaps_multi $dir replica
1168 err=$?
1169 CEPH_ARGS=$ORIG_ARGS
1170 return $err
1171 }
1172
1173 function TEST_scrub_snaps_primary() {
1174 local dir=$1
1175 ORIG_ARGS=$CEPH_ARGS
1176 CEPH_ARGS+=" --osd_scrub_chunk_min=3 --osd_scrub_chunk_max=20 --osd_shallow_scrub_chunk_min=3 --osd_shallow_scrub_chunk_max=3 --osd_pg_stat_report_interval_max=1"
1177 _scrub_snaps_multi $dir primary
1178 err=$?
1179 CEPH_ARGS=$ORIG_ARGS
1180 return $err
1181 }
1182
1183 main osd-scrub-snaps "$@"
1184
1185 # Local Variables:
1186 # compile-command: "cd build ; make -j4 && \
1187 # ../qa/run-standalone.sh osd-scrub-snaps.sh"
1188 # End: