]> git.proxmox.com Git - ceph.git/blob - ceph/qa/standalone/scrub/osd-scrub-snaps.sh
update sources to 12.2.7
[ceph.git] / ceph / qa / standalone / scrub / osd-scrub-snaps.sh
1 #! /bin/bash
2 #
3 # Copyright (C) 2015 Red Hat <contact@redhat.com>
4 #
5 # Author: David Zafman <dzafman@redhat.com>
6 #
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU Library Public License as published by
9 # the Free Software Foundation; either version 2, or (at your option)
10 # any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU Library Public License for more details.
16 #
17 source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
18
19 # Test development and debugging
20 # Set to "yes" in order to ignore diff errors and save results to update test
21 getjson="no"
22
23 jqfilter='.inconsistents'
24 sortkeys='import json; import sys ; JSON=sys.stdin.read() ; ud = json.loads(JSON) ; print json.dumps(ud, sort_keys=True, indent=2)'
25
26 function run() {
27 local dir=$1
28 shift
29
30 export CEPH_MON="127.0.0.1:7121" # git grep '\<7121\>' : there must be only one
31 export CEPH_ARGS
32 CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
33 CEPH_ARGS+="--mon-host=$CEPH_MON "
34
35 local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
36 for func in $funcs ; do
37 setup $dir || return 1
38 $func $dir || return 1
39 teardown $dir || return 1
40 done
41 }
42
43 function create_scenario() {
44 local dir=$1
45 local poolname=$2
46 local TESTDATA=$3
47 local osd=$4
48
49 SNAP=1
50 rados -p $poolname mksnap snap${SNAP}
51 dd if=/dev/urandom of=$TESTDATA bs=256 count=${SNAP}
52 rados -p $poolname put obj1 $TESTDATA
53 rados -p $poolname put obj5 $TESTDATA
54 rados -p $poolname put obj3 $TESTDATA
55 for i in `seq 6 14`
56 do rados -p $poolname put obj${i} $TESTDATA
57 done
58
59 SNAP=2
60 rados -p $poolname mksnap snap${SNAP}
61 dd if=/dev/urandom of=$TESTDATA bs=256 count=${SNAP}
62 rados -p $poolname put obj5 $TESTDATA
63
64 SNAP=3
65 rados -p $poolname mksnap snap${SNAP}
66 dd if=/dev/urandom of=$TESTDATA bs=256 count=${SNAP}
67 rados -p $poolname put obj3 $TESTDATA
68
69 SNAP=4
70 rados -p $poolname mksnap snap${SNAP}
71 dd if=/dev/urandom of=$TESTDATA bs=256 count=${SNAP}
72 rados -p $poolname put obj5 $TESTDATA
73 rados -p $poolname put obj2 $TESTDATA
74
75 SNAP=5
76 rados -p $poolname mksnap snap${SNAP}
77 SNAP=6
78 rados -p $poolname mksnap snap${SNAP}
79 dd if=/dev/urandom of=$TESTDATA bs=256 count=${SNAP}
80 rados -p $poolname put obj5 $TESTDATA
81
82 SNAP=7
83 rados -p $poolname mksnap snap${SNAP}
84
85 rados -p $poolname rm obj4
86 rados -p $poolname rm obj2
87
88 kill_daemons $dir TERM osd || return 1
89
90 # Don't need to use ceph_objectstore_tool() function because osd stopped
91
92 JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj1)"
93 ceph-objectstore-tool --data-path $dir/${osd} "$JSON" --force remove
94
95 JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --op list obj5 | grep \"snapid\":2)"
96 ceph-objectstore-tool --data-path $dir/${osd} "$JSON" remove
97
98 JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --op list obj5 | grep \"snapid\":1)"
99 OBJ5SAVE="$JSON"
100 ceph-objectstore-tool --data-path $dir/${osd} "$JSON" remove
101
102 JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --op list obj5 | grep \"snapid\":4)"
103 dd if=/dev/urandom of=$TESTDATA bs=256 count=18
104 ceph-objectstore-tool --data-path $dir/${osd} "$JSON" set-bytes $TESTDATA
105
106 JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj3)"
107 dd if=/dev/urandom of=$TESTDATA bs=256 count=15
108 ceph-objectstore-tool --data-path $dir/${osd} "$JSON" set-bytes $TESTDATA
109
110 JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --op list obj4 | grep \"snapid\":7)"
111 ceph-objectstore-tool --data-path $dir/${osd} "$JSON" remove
112
113 JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj2)"
114 ceph-objectstore-tool --data-path $dir/${osd} "$JSON" rm-attr snapset
115
116 # Create a clone which isn't in snapset and doesn't have object info
117 JSON="$(echo "$OBJ5SAVE" | sed s/snapid\":1/snapid\":7/)"
118 dd if=/dev/urandom of=$TESTDATA bs=256 count=7
119 ceph-objectstore-tool --data-path $dir/${osd} "$JSON" set-bytes $TESTDATA
120
121 JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj6)"
122 ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset
123 JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj7)"
124 ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset corrupt
125 JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj8)"
126 ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset seq
127 JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj9)"
128 ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset clone_size
129 JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj10)"
130 ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset clone_overlap
131 JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj11)"
132 ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset clones
133 JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj12)"
134 ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset head
135 JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj13)"
136 ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset snaps
137 JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj14)"
138 ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset size
139
140 echo "garbage" > $dir/bad
141 JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj15)"
142 ceph-objectstore-tool --data-path $dir/${osd} "$JSON" set-attr snapset $dir/bad
143 rm -f $dir/bad
144 }
145
146 function TEST_scrub_snaps() {
147 local dir=$1
148 local poolname=test
149 local OBJS=15
150 local OSDS=1
151
152 TESTDATA="testdata.$$"
153
154 run_mon $dir a --osd_pool_default_size=$OSDS || return 1
155 run_mgr $dir x || return 1
156 for osd in $(seq 0 $(expr $OSDS - 1))
157 do
158 run_osd $dir $osd || return 1
159 done
160
161 # Create a pool with a single pg
162 create_pool $poolname 1 1
163 wait_for_clean || return 1
164 poolid=$(ceph osd dump | grep "^pool.*[']test[']" | awk '{ print $2 }')
165
166 dd if=/dev/urandom of=$TESTDATA bs=1032 count=1
167 for i in `seq 1 $OBJS`
168 do
169 rados -p $poolname put obj${i} $TESTDATA
170 done
171
172 local primary=$(get_primary $poolname obj1)
173
174 create_scenario $dir $poolname $TESTDATA $primary
175
176 rm -f $TESTDATA
177
178 for osd in $(seq 0 $(expr $OSDS - 1))
179 do
180 run_osd $dir $osd || return 1
181 done
182
183 local pgid="${poolid}.0"
184 if ! pg_scrub "$pgid" ; then
185 cat $dir/osd.0.log
186 return 1
187 fi
188
189 test "$(grep "_scan_snaps start" $dir/osd.${primary}.log | wc -l)" = "2" || return 1
190
191 rados list-inconsistent-pg $poolname > $dir/json || return 1
192 # Check pg count
193 test $(jq '. | length' $dir/json) = "1" || return 1
194 # Check pgid
195 test $(jq -r '.[0]' $dir/json) = $pgid || return 1
196
197 rados list-inconsistent-obj $pgid > $dir/json || return 1
198
199 # The injected snapshot errors with a single copy pool doesn't
200 # see object errors because all the issues are detected by
201 # comparing copies.
202 jq "$jqfilter" << EOF | python -c "$sortkeys" > $dir/checkcsjson
203 {
204 "epoch": 17,
205 "inconsistents": []
206 }
207 EOF
208
209 jq "$jqfilter" $dir/json | python -c "$sortkeys" > $dir/csjson
210 diff ${DIFFCOLOPTS} $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1
211
212 rados list-inconsistent-snapset $pgid > $dir/json || return 1
213
214 jq "$jqfilter" << EOF | python -c "$sortkeys" > $dir/checkcsjson
215 {
216 "inconsistents": [
217 {
218 "errors": [
219 "headless"
220 ],
221 "snap": 1,
222 "locator": "",
223 "nspace": "",
224 "name": "obj1"
225 },
226 {
227 "errors": [
228 "size_mismatch"
229 ],
230 "snap": 1,
231 "locator": "",
232 "nspace": "",
233 "name": "obj10"
234 },
235 {
236 "errors": [
237 "headless"
238 ],
239 "snap": 1,
240 "locator": "",
241 "nspace": "",
242 "name": "obj11"
243 },
244 {
245 "errors": [
246 "size_mismatch"
247 ],
248 "snap": 1,
249 "locator": "",
250 "nspace": "",
251 "name": "obj14"
252 },
253 {
254 "errors": [
255 "headless"
256 ],
257 "snap": 1,
258 "locator": "",
259 "nspace": "",
260 "name": "obj6"
261 },
262 {
263 "errors": [
264 "headless"
265 ],
266 "snap": 1,
267 "locator": "",
268 "nspace": "",
269 "name": "obj7"
270 },
271 {
272 "errors": [
273 "size_mismatch"
274 ],
275 "snap": 1,
276 "locator": "",
277 "nspace": "",
278 "name": "obj9"
279 },
280 {
281 "errors": [
282 "headless"
283 ],
284 "snap": 4,
285 "locator": "",
286 "nspace": "",
287 "name": "obj2"
288 },
289 {
290 "errors": [
291 "size_mismatch"
292 ],
293 "snap": 4,
294 "locator": "",
295 "nspace": "",
296 "name": "obj5"
297 },
298 {
299 "errors": [
300 "headless"
301 ],
302 "snap": 7,
303 "locator": "",
304 "nspace": "",
305 "name": "obj2"
306 },
307 {
308 "errors": [
309 "info_missing",
310 "headless"
311 ],
312 "snap": 7,
313 "locator": "",
314 "nspace": "",
315 "name": "obj5"
316 },
317 {
318 "name": "obj10",
319 "nspace": "",
320 "locator": "",
321 "snap": "head",
322 "snapset": {
323 "head_exists": 1,
324 "snap_context": {
325 "seq": 1,
326 "snaps": [
327 1
328 ]
329 },
330 "clones": [
331 {
332 "snap": 1,
333 "size": 1032,
334 "overlap": "????",
335 "snaps": [
336 1
337 ]
338 }
339 ]
340 },
341 "errors": []
342 },
343 {
344 "extra clones": [
345 1
346 ],
347 "errors": [
348 "extra_clones"
349 ],
350 "snap": "head",
351 "locator": "",
352 "nspace": "",
353 "name": "obj11",
354 "snapset": {
355 "head_exists": 1,
356 "snap_context": {
357 "seq": 1,
358 "snaps": [
359 1
360 ]
361 },
362 "clones": []
363 }
364 },
365 {
366 "errors": [
367 "head_mismatch"
368 ],
369 "snap": "head",
370 "locator": "",
371 "nspace": "",
372 "name": "obj12",
373 "snapset": {
374 "head_exists": 0,
375 "snap_context": {
376 "seq": 1,
377 "snaps": [
378 1
379 ]
380 },
381 "clones": [
382 {
383 "snap": 1,
384 "size": 1032,
385 "overlap": "[]",
386 "snaps": [
387 1
388 ]
389 }
390 ]
391 }
392 },
393 {
394 "name": "obj14",
395 "nspace": "",
396 "locator": "",
397 "snap": "head",
398 "snapset": {
399 "head_exists": 1,
400 "snap_context": {
401 "seq": 1,
402 "snaps": [
403 1
404 ]
405 },
406 "clones": [
407 {
408 "snap": 1,
409 "size": 1033,
410 "overlap": "[]",
411 "snaps": [
412 1
413 ]
414 }
415 ]
416 },
417 "errors": []
418 },
419 {
420 "errors": [
421 "snapset_corrupted"
422 ],
423 "snap": "head",
424 "locator": "",
425 "nspace": "",
426 "name": "obj15"
427 },
428 {
429 "extra clones": [
430 7,
431 4
432 ],
433 "errors": [
434 "snapset_missing",
435 "extra_clones"
436 ],
437 "snap": "head",
438 "locator": "",
439 "nspace": "",
440 "name": "obj2"
441 },
442 {
443 "errors": [
444 "size_mismatch"
445 ],
446 "snap": "head",
447 "locator": "",
448 "nspace": "",
449 "name": "obj3",
450 "snapset": {
451 "head_exists": 1,
452 "snap_context": {
453 "seq": 3,
454 "snaps": [
455 3,
456 2,
457 1
458 ]
459 },
460 "clones": [
461 {
462 "snap": 1,
463 "size": 1032,
464 "overlap": "[]",
465 "snaps": [
466 1
467 ]
468 },
469 {
470 "snap": 3,
471 "size": 256,
472 "overlap": "[]",
473 "snaps": [
474 3,
475 2
476 ]
477 }
478 ]
479 }
480 },
481 {
482 "missing": [
483 7
484 ],
485 "errors": [
486 "clone_missing"
487 ],
488 "snap": "head",
489 "locator": "",
490 "nspace": "",
491 "name": "obj4",
492 "snapset": {
493 "head_exists": 1,
494 "snap_context": {
495 "seq": 7,
496 "snaps": [
497 7,
498 6,
499 5,
500 4,
501 3,
502 2,
503 1
504 ]
505 },
506 "clones": [
507 {
508 "snap": 7,
509 "size": 1032,
510 "overlap": "[]",
511 "snaps": [
512 7,
513 6,
514 5,
515 4,
516 3,
517 2,
518 1
519 ]
520 }
521 ]
522 }
523 },
524 {
525 "missing": [
526 2,
527 1
528 ],
529 "extra clones": [
530 7
531 ],
532 "errors": [
533 "extra_clones",
534 "clone_missing"
535 ],
536 "snap": "head",
537 "locator": "",
538 "nspace": "",
539 "name": "obj5",
540 "snapset": {
541 "head_exists": 1,
542 "snap_context": {
543 "seq": 6,
544 "snaps": [
545 6,
546 5,
547 4,
548 3,
549 2,
550 1
551 ]
552 },
553 "clones": [
554 {
555 "snap": 1,
556 "size": 1032,
557 "overlap": "[]",
558 "snaps": [
559 1
560 ]
561 },
562 {
563 "snap": 2,
564 "size": 256,
565 "overlap": "[]",
566 "snaps": [
567 2
568 ]
569 },
570 {
571 "snap": 4,
572 "size": 512,
573 "overlap": "[]",
574 "snaps": [
575 4,
576 3
577 ]
578 },
579 {
580 "snap": 6,
581 "size": 1024,
582 "overlap": "[]",
583 "snaps": [
584 6,
585 5
586 ]
587 }
588 ]
589 }
590 },
591 {
592 "extra clones": [
593 1
594 ],
595 "errors": [
596 "extra_clones"
597 ],
598 "snap": "head",
599 "locator": "",
600 "nspace": "",
601 "name": "obj6",
602 "snapset": {
603 "head_exists": 1,
604 "snap_context": {
605 "seq": 1,
606 "snaps": [
607 1
608 ]
609 },
610 "clones": []
611 }
612 },
613 {
614 "extra clones": [
615 1
616 ],
617 "errors": [
618 "head_mismatch",
619 "extra_clones"
620 ],
621 "snap": "head",
622 "locator": "",
623 "nspace": "",
624 "name": "obj7",
625 "snapset": {
626 "head_exists": 0,
627 "snap_context": {
628 "seq": 0,
629 "snaps": []
630 },
631 "clones": []
632 }
633 },
634 {
635 "errors": [
636 "snapset_error"
637 ],
638 "snap": "head",
639 "locator": "",
640 "nspace": "",
641 "name": "obj8",
642 "snapset": {
643 "head_exists": 1,
644 "snap_context": {
645 "seq": 0,
646 "snaps": [
647 1
648 ]
649 },
650 "clones": [
651 {
652 "snap": 1,
653 "size": 1032,
654 "overlap": "[]",
655 "snaps": [
656 1
657 ]
658 }
659 ]
660 }
661 },
662 {
663 "name": "obj9",
664 "nspace": "",
665 "locator": "",
666 "snap": "head",
667 "snapset": {
668 "head_exists": 1,
669 "snap_context": {
670 "seq": 1,
671 "snaps": [
672 1
673 ]
674 },
675 "clones": [
676 {
677 "snap": 1,
678 "size": "????",
679 "overlap": "[]",
680 "snaps": [
681 1
682 ]
683 }
684 ]
685 },
686 "errors": []
687 }
688 ],
689 "epoch": 20
690 }
691 EOF
692
693 jq "$jqfilter" $dir/json | python -c "$sortkeys" > $dir/csjson
694 diff ${DIFFCOLOPTS} $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1
695 if test $getjson = "yes"
696 then
697 jq '.' $dir/json > save1.json
698 fi
699
700 if test "$LOCALRUN" = "yes" && which jsonschema > /dev/null;
701 then
702 jsonschema -i $dir/json $CEPH_ROOT/doc/rados/command/list-inconsistent-snap.json || return 1
703 fi
704
705 pidfiles=$(find $dir 2>/dev/null | grep 'osd[^/]*\.pid')
706 pids=""
707 for pidfile in ${pidfiles}
708 do
709 pids+="$(cat $pidfile) "
710 done
711
712 for i in `seq 1 7`
713 do
714 rados -p $poolname rmsnap snap$i
715 done
716
717 ERRORS=0
718
719 for pid in $pids
720 do
721 if ! kill -0 $pid
722 then
723 echo "OSD Crash occurred"
724 ERRORS=$(expr $ERRORS + 1)
725 fi
726 done
727
728 kill_daemons $dir || return 1
729
730 declare -a err_strings
731 err_strings[0]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*::obj10:.* is missing in clone_overlap"
732 err_strings[1]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*::obj5:7 no '_' attr"
733 err_strings[2]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*::obj5:7 is an unexpected clone"
734 err_strings[3]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*::obj5:4 on disk size [(]4608[)] does not match object info size [(]512[)] adjusted for ondisk to [(]512[)]"
735 err_strings[4]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj5:head expected clone .*:::obj5:2"
736 err_strings[5]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj5:head expected clone .*:::obj5:1"
737 err_strings[6]="log_channel[(]cluster[)] log [[]INF[]] : scrub [0-9]*[.]0 .*:::obj5:head 2 missing clone[(]s[)]"
738 err_strings[7]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj12:head snapset.head_exists=false, but head exists"
739 err_strings[8]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj8:head snaps.seq not set"
740 err_strings[9]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj7:head snapset.head_exists=false, but head exists"
741 err_strings[10]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj7:1 is an unexpected clone"
742 err_strings[11]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj3:head on disk size [(]3840[)] does not match object info size [(]768[)] adjusted for ondisk to [(]768[)]"
743 err_strings[12]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj6:1 is an unexpected clone"
744 err_strings[13]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj2:head no 'snapset' attr"
745 err_strings[14]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj2:7 clone ignored due to missing snapset"
746 err_strings[15]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj2:4 clone ignored due to missing snapset"
747 err_strings[16]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj4:head expected clone .*:::obj4:7"
748 err_strings[17]="log_channel[(]cluster[)] log [[]INF[]] : scrub [0-9]*[.]0 .*:::obj4:head 1 missing clone[(]s[)]"
749 err_strings[18]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj1:1 is an unexpected clone"
750 err_strings[19]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj9:1 is missing in clone_size"
751 err_strings[20]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj11:1 is an unexpected clone"
752 err_strings[21]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj14:1 size 1032 != clone_size 1033"
753 err_strings[22]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 22 errors"
754 err_strings[23]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj15:head can't decode 'snapset' attr buffer"
755
756 for err_string in "${err_strings[@]}"
757 do
758 if ! grep "$err_string" $dir/osd.${primary}.log > /dev/null;
759 then
760 echo "Missing log message '$err_string'"
761 ERRORS=$(expr $ERRORS + 1)
762 fi
763 done
764
765 if [ $ERRORS != "0" ];
766 then
767 echo "TEST FAILED WITH $ERRORS ERRORS"
768 return 1
769 fi
770
771 echo "TEST PASSED"
772 return 0
773 }
774
775 function _scrub_snaps_multi() {
776 local dir=$1
777 local poolname=test
778 local OBJS=15
779 local OSDS=2
780 local which=$2
781
782 TESTDATA="testdata.$$"
783
784 run_mon $dir a --osd_pool_default_size=$OSDS || return 1
785 run_mgr $dir x || return 1
786 for osd in $(seq 0 $(expr $OSDS - 1))
787 do
788 run_osd $dir $osd || return 1
789 done
790
791 # Create a pool with a single pg
792 create_pool $poolname 1 1
793 wait_for_clean || return 1
794 poolid=$(ceph osd dump | grep "^pool.*[']test[']" | awk '{ print $2 }')
795
796 dd if=/dev/urandom of=$TESTDATA bs=1032 count=1
797 for i in `seq 1 $OBJS`
798 do
799 rados -p $poolname put obj${i} $TESTDATA
800 done
801
802 local primary=$(get_primary $poolname obj1)
803 local replica=$(get_not_primary $poolname obj1)
804
805 eval create_scenario $dir $poolname $TESTDATA \$$which
806
807 rm -f $TESTDATA
808
809 for osd in $(seq 0 $(expr $OSDS - 1))
810 do
811 run_osd $dir $osd || return 1
812 done
813
814 local pgid="${poolid}.0"
815 if ! pg_scrub "$pgid" ; then
816 cat $dir/osd.0.log
817 return 1
818 fi
819
820 test "$(grep "_scan_snaps start" $dir/osd.${primary}.log | wc -l)" -gt "3" || return 1
821 test "$(grep "_scan_snaps start" $dir/osd.${replica}.log | wc -l)" -gt "3" || return 1
822
823 rados list-inconsistent-pg $poolname > $dir/json || return 1
824 # Check pg count
825 test $(jq '. | length' $dir/json) = "1" || return 1
826 # Check pgid
827 test $(jq -r '.[0]' $dir/json) = $pgid || return 1
828
829 rados list-inconsistent-obj $pgid --format=json-pretty
830
831 rados list-inconsistent-snapset $pgid > $dir/json || return 1
832
833 # Since all of the snapshots on the primary is consistent there are no errors here
834 if [ $which = "replica" ];
835 then
836 scruberrors="21"
837 jq "$jqfilter" << EOF | python -c "$sortkeys" > $dir/checkcsjson
838 {
839 "epoch": 23,
840 "inconsistents": []
841 }
842 EOF
843
844 else
845 scruberrors="33"
846 jq "$jqfilter" << EOF | python -c "$sortkeys" > $dir/checkcsjson
847 {
848 "epoch": 23,
849 "inconsistents": [
850 {
851 "name": "obj10",
852 "nspace": "",
853 "locator": "",
854 "snap": 1,
855 "errors": [
856 "size_mismatch"
857 ]
858 },
859 {
860 "name": "obj11",
861 "nspace": "",
862 "locator": "",
863 "snap": 1,
864 "errors": [
865 "headless"
866 ]
867 },
868 {
869 "name": "obj14",
870 "nspace": "",
871 "locator": "",
872 "snap": 1,
873 "errors": [
874 "size_mismatch"
875 ]
876 },
877 {
878 "name": "obj6",
879 "nspace": "",
880 "locator": "",
881 "snap": 1,
882 "errors": [
883 "headless"
884 ]
885 },
886 {
887 "name": "obj7",
888 "nspace": "",
889 "locator": "",
890 "snap": 1,
891 "errors": [
892 "headless"
893 ]
894 },
895 {
896 "name": "obj9",
897 "nspace": "",
898 "locator": "",
899 "snap": 1,
900 "errors": [
901 "size_mismatch"
902 ]
903 },
904 {
905 "name": "obj5",
906 "nspace": "",
907 "locator": "",
908 "snap": 7,
909 "errors": [
910 "info_missing",
911 "headless"
912 ]
913 },
914 {
915 "name": "obj10",
916 "nspace": "",
917 "locator": "",
918 "snap": "head",
919 "snapset": {
920 "head_exists": 1,
921 "snap_context": {
922 "seq": 1,
923 "snaps": [
924 1
925 ]
926 },
927 "clones": [
928 {
929 "snap": 1,
930 "size": 1032,
931 "overlap": "????",
932 "snaps": [
933 1
934 ]
935 }
936 ]
937 },
938 "errors": []
939 },
940 {
941 "name": "obj11",
942 "nspace": "",
943 "locator": "",
944 "snap": "head",
945 "snapset": {
946 "head_exists": 1,
947 "snap_context": {
948 "seq": 1,
949 "snaps": [
950 1
951 ]
952 },
953 "clones": []
954 },
955 "errors": [
956 "extra_clones"
957 ],
958 "extra clones": [
959 1
960 ]
961 },
962 {
963 "errors": [
964 "head_mismatch"
965 ],
966 "locator": "",
967 "name": "obj12",
968 "nspace": "",
969 "snap": "head",
970 "snapset": {
971 "clones": [
972 {
973 "overlap": "[]",
974 "size": 1032,
975 "snap": 1,
976 "snaps": [
977 1
978 ]
979 }
980 ],
981 "head_exists": 0,
982 "snap_context": {
983 "seq": 1,
984 "snaps": [
985 1
986 ]
987 }
988 }
989 },
990 {
991 "name": "obj14",
992 "nspace": "",
993 "locator": "",
994 "snap": "head",
995 "snapset": {
996 "head_exists": 1,
997 "snap_context": {
998 "seq": 1,
999 "snaps": [
1000 1
1001 ]
1002 },
1003 "clones": [
1004 {
1005 "snap": 1,
1006 "size": 1033,
1007 "overlap": "[]",
1008 "snaps": [
1009 1
1010 ]
1011 }
1012 ]
1013 },
1014 "errors": []
1015 },
1016 {
1017 "name": "obj5",
1018 "nspace": "",
1019 "locator": "",
1020 "snap": "head",
1021 "snapset": {
1022 "head_exists": 1,
1023 "snap_context": {
1024 "seq": 6,
1025 "snaps": [
1026 6,
1027 5,
1028 4,
1029 3,
1030 2,
1031 1
1032 ]
1033 },
1034 "clones": [
1035 {
1036 "snap": 1,
1037 "size": 1032,
1038 "overlap": "[]",
1039 "snaps": [
1040 1
1041 ]
1042 },
1043 {
1044 "snap": 2,
1045 "size": 256,
1046 "overlap": "[]",
1047 "snaps": [
1048 2
1049 ]
1050 },
1051 {
1052 "snap": 4,
1053 "size": 512,
1054 "overlap": "[]",
1055 "snaps": [
1056 4,
1057 3
1058 ]
1059 },
1060 {
1061 "snap": 6,
1062 "size": 1024,
1063 "overlap": "[]",
1064 "snaps": [
1065 6,
1066 5
1067 ]
1068 }
1069 ]
1070 },
1071 "errors": [
1072 "extra_clones"
1073 ],
1074 "extra clones": [
1075 7
1076 ]
1077 },
1078 {
1079 "name": "obj6",
1080 "nspace": "",
1081 "locator": "",
1082 "snap": "head",
1083 "snapset": {
1084 "head_exists": 1,
1085 "snap_context": {
1086 "seq": 1,
1087 "snaps": [
1088 1
1089 ]
1090 },
1091 "clones": []
1092 },
1093 "errors": [
1094 "extra_clones"
1095 ],
1096 "extra clones": [
1097 1
1098 ]
1099 },
1100 {
1101 "name": "obj7",
1102 "nspace": "",
1103 "locator": "",
1104 "snap": "head",
1105 "snapset": {
1106 "head_exists": 0,
1107 "snap_context": {
1108 "seq": 0,
1109 "snaps": []
1110 },
1111 "clones": []
1112 },
1113 "errors": [
1114 "head_mismatch",
1115 "extra_clones"
1116 ],
1117 "extra clones": [
1118 1
1119 ]
1120 },
1121 {
1122 "name": "obj8",
1123 "nspace": "",
1124 "locator": "",
1125 "snap": "head",
1126 "snapset": {
1127 "head_exists": 1,
1128 "snap_context": {
1129 "seq": 0,
1130 "snaps": [
1131 1
1132 ]
1133 },
1134 "clones": [
1135 {
1136 "snap": 1,
1137 "size": 1032,
1138 "overlap": "[]",
1139 "snaps": [
1140 1
1141 ]
1142 }
1143 ]
1144 },
1145 "errors": [
1146 "snapset_error"
1147 ]
1148 },
1149 {
1150 "name": "obj9",
1151 "nspace": "",
1152 "locator": "",
1153 "snap": "head",
1154 "snapset": {
1155 "head_exists": 1,
1156 "snap_context": {
1157 "seq": 1,
1158 "snaps": [
1159 1
1160 ]
1161 },
1162 "clones": [
1163 {
1164 "snap": 1,
1165 "size": "????",
1166 "overlap": "[]",
1167 "snaps": [
1168 1
1169 ]
1170 }
1171 ]
1172 },
1173 "errors": []
1174 }
1175 ]
1176 }
1177 EOF
1178 fi
1179
1180 jq "$jqfilter" $dir/json | python -c "$sortkeys" > $dir/csjson
1181 diff ${DIFFCOLOPTS} $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1
1182 if test $getjson = "yes"
1183 then
1184 jq '.' $dir/json > save1.json
1185 fi
1186
1187 if test "$LOCALRUN" = "yes" && which jsonschema > /dev/null;
1188 then
1189 jsonschema -i $dir/json $CEPH_ROOT/doc/rados/command/list-inconsistent-snap.json || return 1
1190 fi
1191
1192 pidfiles=$(find $dir 2>/dev/null | grep 'osd[^/]*\.pid')
1193 pids=""
1194 for pidfile in ${pidfiles}
1195 do
1196 pids+="$(cat $pidfile) "
1197 done
1198
1199 # When removing snapshots with a corrupt replica, it crashes.
1200 # See http://tracker.ceph.com/issues/23875
1201 if [ $which = "primary" ];
1202 then
1203 for i in `seq 1 7`
1204 do
1205 rados -p $poolname rmsnap snap$i
1206 done
1207 fi
1208
1209 ERRORS=0
1210
1211 for pid in $pids
1212 do
1213 if ! kill -0 $pid
1214 then
1215 echo "OSD Crash occurred"
1216 ERRORS=$(expr $ERRORS + 1)
1217 fi
1218 done
1219
1220 kill_daemons $dir || return 1
1221
1222 declare -a err_strings
1223 err_strings[0]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1] missing .*:::obj4:7"
1224 err_strings[1]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1]: soid .*:::obj3:head size 3840 != size 768 from auth oi"
1225 err_strings[2]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1] missing .*:::obj5:1"
1226 err_strings[3]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1] missing .*:::obj5:2"
1227 err_strings[4]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1]: soid .*:::obj5:4 size 4608 != size 512 from auth oi"
1228 err_strings[5]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid .*:::obj5:7: failed to pick suitable object info"
1229 err_strings[6]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1] missing .*:::obj1:head"
1230 err_strings[7]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub ${scruberrors} errors"
1231
1232 for err_string in "${err_strings[@]}"
1233 do
1234 if ! grep "$err_string" $dir/osd.${primary}.log > /dev/null;
1235 then
1236 echo "Missing log message '$err_string'"
1237 ERRORS=$(expr $ERRORS + 1)
1238 fi
1239 done
1240
1241 if [ $ERRORS != "0" ];
1242 then
1243 echo "TEST FAILED WITH $ERRORS ERRORS"
1244 return 1
1245 fi
1246
1247 echo "TEST PASSED"
1248 return 0
1249 }
1250
1251 function TEST_scrub_snaps_replica() {
1252 local dir=$1
1253 ORIG_ARGS=$CEPH_ARGS
1254 CEPH_ARGS+=" --osd_scrub_chunk_min=3 --osd_scrub_chunk_max=3"
1255 _scrub_snaps_multi $dir replica
1256 err=$?
1257 CEPH_ARGS=$ORIG_ARGS
1258 return $err
1259 }
1260
1261 function TEST_scrub_snaps_primary() {
1262 local dir=$1
1263 ORIG_ARGS=$CEPH_ARGS
1264 CEPH_ARGS+=" --osd_scrub_chunk_min=3 --osd_scrub_chunk_max=3"
1265 _scrub_snaps_multi $dir primary
1266 err=$?
1267 CEPH_ARGS=$ORIG_ARGS
1268 return $err
1269 }
1270
1271 main osd-scrub-snaps "$@"
1272
1273 # Local Variables:
1274 # compile-command: "cd build ; make -j4 && \
1275 # ../qa/run-standalone.sh osd-scrub-snaps.sh"
1276 # End: