[ceph.git] / ceph / src / test / test_unfound.sh

#!/usr/bin/env bash
set -x

#
# Creates some unfound objects and then tests finding them.
#

# Includes
source "`dirname $0`/test_common.sh"

TEST_POOL=rbd

# Functions
my_write_objects() {
        write_objects $1 $2 10 1000000 $TEST_POOL
}

setup() {
        export CEPH_NUM_OSD=$1

        # Start ceph
        ./stop.sh

        # set recovery start to a really long time to ensure that we don't start recovery
        ./vstart.sh -d -n -o 'osd recovery delay start = 10000
osd max scrubs = 0' || die "vstart failed"
}

osd_resurrection_1_impl() {
        # Write lots and lots of objects
        my_write_objects 1 2

        # Take down osd1
        stop_osd 1

        # Continue writing a lot of objects
        my_write_objects 3 4

        # Bring up osd1
        restart_osd 1

        # Finish peering.
        sleep 15

        # Stop osd0.
        # At this point we have peered, but *NOT* recovered.
        # Objects should be lost.
        stop_osd 0

	poll_cmd "./ceph pg debug unfound_objects_exist" TRUE 3 120
        [ $? -eq 1 ] || die "Failed to see unfound objects."
        echo "Got unfound objects."

        (
                ./rados -c ./ceph.conf -p $TEST_POOL get obj01 $TEMPDIR/obj01 || die "radostool failed"
        ) &
        sleep 5
        [ -e $TEMPDIR/obj01 ] && die "unexpected error: fetched unfound object?"

        restart_osd 0

	poll_cmd "./ceph pg debug unfound_objects_exist" FALSE 3 120
        [ $? -eq 1 ] || die "Failed to recover unfound objects."

        wait
        [ -e $TEMPDIR/obj01 ] || die "unexpected error: failed to fetched newly-found object"

        # Turn off recovery delay start and verify that every osd gets copies
        # of the correct objects.
        echo "starting recovery..."
        start_recovery 2

        # success
        return 0
}

osd_resurrection_1() {
        setup 2
        osd_resurrection_1_impl
}

stray_test_impl() {
        stop_osd 0
        # 0:stopped 1:active 2:active

        my_write_objects 1 1

        stop_osd 1
        sleep 15
        # 0:stopped 1:stopped(ver1) 2:active(ver1)

        my_write_objects 2 2

        restart_osd 1
        sleep 15
        # 0:stopped 1:active(ver1) 2:active(ver2)

        stop_osd 2
        sleep 15
        # 0:stopped 1:active(ver1) 2:stopped(ver2)

        restart_osd 0
        sleep 15
        # 0:active 1:active(ver1) 2:stopped(ver2)

	poll_cmd "./ceph pg debug unfound_objects_exist" TRUE 5 300
        [ $? -eq 1 ] || die "Failed to see unfound objects."

        #
        # Now, when we bring up osd2, it will be considered a stray. However, it
        # has the version that we need-- the very latest version of the
        # objects.
        #

        restart_osd 2
        sleep 15

	poll_cmd "./ceph pg debug unfound_objects_exist" FALSE 4 240
        [ $? -eq 1 ] || die "Failed to discover unfound objects."

        echo "starting recovery..."
        start_recovery 3

        # success
        return 0
}

stray_test() {
        setup 3
        stray_test_impl
}

run() {
        osd_resurrection_1 || die "test failed"

        stray_test || die "test failed"
}

$@
Commit	Line	Data
11fdf7f2 TL	1	#!/usr/bin/env bash
11fdf7f2 TL	2	set -x
7c673cae FG	3
	4	#
	5	# Creates some unfound objects and then tests finding them.
	6	#
	7
	8	# Includes
	9	source "`dirname $0`/test_common.sh"
	10
	11	TEST_POOL=rbd
	12
	13	# Functions
	14	my_write_objects() {
	15	write_objects $1 $2 10 1000000 $TEST_POOL
	16	}
	17
	18	setup() {
	19	export CEPH_NUM_OSD=$1
	20
	21	# Start ceph
	22	./stop.sh
	23
	24	# set recovery start to a really long time to ensure that we don't start recovery
	25	./vstart.sh -d -n -o 'osd recovery delay start = 10000
	26	osd max scrubs = 0' \|\| die "vstart failed"
	27	}
	28
	29	osd_resurrection_1_impl() {
	30	# Write lots and lots of objects
	31	my_write_objects 1 2
	32
	33	# Take down osd1
	34	stop_osd 1
	35
	36	# Continue writing a lot of objects
	37	my_write_objects 3 4
	38
	39	# Bring up osd1
	40	restart_osd 1
	41
	42	# Finish peering.
	43	sleep 15
	44
	45	# Stop osd0.
	46	# At this point we have peered, but NOT recovered.
	47	# Objects should be lost.
	48	stop_osd 0
	49
	50	poll_cmd "./ceph pg debug unfound_objects_exist" TRUE 3 120
	51	[ $? -eq 1 ] \|\| die "Failed to see unfound objects."
	52	echo "Got unfound objects."
	53
	54	(
	55	./rados -c ./ceph.conf -p $TEST_POOL get obj01 $TEMPDIR/obj01 \|\| die "radostool failed"
	56	) &
	57	sleep 5
	58	[ -e $TEMPDIR/obj01 ] && die "unexpected error: fetched unfound object?"
	59
	60	restart_osd 0
	61
	62	poll_cmd "./ceph pg debug unfound_objects_exist" FALSE 3 120
	63	[ $? -eq 1 ] \|\| die "Failed to recover unfound objects."
	64
	65	wait
	66	[ -e $TEMPDIR/obj01 ] \|\| die "unexpected error: failed to fetched newly-found object"
67
68	# Turn off recovery delay start and verify that every osd gets copies
69	# of the correct objects.
70	echo "starting recovery..."
71	start_recovery 2
72
73	# success
74	return 0
75	}
76
77	osd_resurrection_1() {
78	setup 2
79	osd_resurrection_1_impl
80	}
81
82	stray_test_impl() {
83	stop_osd 0
84	# 0:stopped 1:active 2:active
85
86	my_write_objects 1 1
87
88	stop_osd 1
89	sleep 15
90	# 0:stopped 1:stopped(ver1) 2:active(ver1)
91
92	my_write_objects 2 2
93
94	restart_osd 1
95	sleep 15
96	# 0:stopped 1:active(ver1) 2:active(ver2)
97
98	stop_osd 2
99	sleep 15
100	# 0:stopped 1:active(ver1) 2:stopped(ver2)
101
102	restart_osd 0
103	sleep 15
104	# 0:active 1:active(ver1) 2:stopped(ver2)
105
106	poll_cmd "./ceph pg debug unfound_objects_exist" TRUE 5 300
107	[ $? -eq 1 ] \|\| die "Failed to see unfound objects."
108
109	#
11fdf7f2	110	# Now, when we bring up osd2, it will be considered a stray. However, it
7c673cae FG	111	# has the version that we need-- the very latest version of the
	112	# objects.
	113	#
	114
	115	restart_osd 2
	116	sleep 15
	117
	118	poll_cmd "./ceph pg debug unfound_objects_exist" FALSE 4 240
	119	[ $? -eq 1 ] \|\| die "Failed to discover unfound objects."
	120
	121	echo "starting recovery..."
	122	start_recovery 3
	123
	124	# success
	125	return 0
	126	}
	127
	128	stray_test() {
	129	setup 3
	130	stray_test_impl
	131	}
	132
	133	run() {
	134	osd_resurrection_1 \|\| die "test failed"
	135
	136	stray_test \|\| die "test failed"
	137	}
	138
	139	$@