[ceph.git] / ceph / qa / workunits / rados / test_crash.sh

#!/bin/sh

set -x

# run on a single-node three-OSD cluster

sudo killall -ABRT ceph-osd
sleep 5

# kill caused coredumps; find them and delete them, carefully, so as
# not to disturb other coredumps, or else teuthology will see them
# and assume test failure.  sudos are because the core files are
# root/600
for f in $(find $TESTDIR/archive/coredump -type f); do
	gdb_output=$(echo "quit" | sudo gdb /usr/bin/ceph-osd $f)
	if expr match "$gdb_output" ".*generated.*ceph-osd.*" && \
	   ( \

	   	expr match "$gdb_output" ".*terminated.*signal 6.*" || \
	   	expr match "$gdb_output" ".*terminated.*signal SIGABRT.*" \
	   )
	then
		sudo rm $f
	fi
done

# let daemon find crashdumps on startup
sudo systemctl restart ceph-crash
sleep 30

# must be 3 crashdumps registered and moved to crash/posted
[ $(ceph crash ls | wc -l) = 4 ]  || exit 1   # 4 here bc of the table header
[ $(sudo find /var/lib/ceph/crash/posted/ -name meta | wc -l) = 3 ] || exit 1

# there should be a health warning
ceph health detail | grep RECENT_CRASH || exit 1
ceph crash archive-all
sleep 30
ceph health detail | grep -c RECENT_CRASH | grep 0     # should be gone!
Commit	Line	Data
11fdf7f2 TL	1	#!/bin/sh
	2
	3	set -x
	4
	5	# run on a single-node three-OSD cluster
	6
	7	sudo killall -ABRT ceph-osd
	8	sleep 5
	9
	10	# kill caused coredumps; find them and delete them, carefully, so as
	11	# not to disturb other coredumps, or else teuthology will see them
	12	# and assume test failure. sudos are because the core files are
	13	# root/600
	14	for f in $(find $TESTDIR/archive/coredump -type f); do
	15	gdb_output=$(echo "quit" \| sudo gdb /usr/bin/ceph-osd $f)
	16	if expr match "$gdb_output" ".generated.ceph-osd.*" && \
	17	( \
	18
	19	expr match "$gdb_output" ".terminated.signal 6.*" \|\| \
	20	expr match "$gdb_output" ".terminated.signal SIGABRT.*" \
	21	)
	22	then
	23	sudo rm $f
	24	fi
	25	done
	26
	27	# let daemon find crashdumps on startup
	28	sudo systemctl restart ceph-crash
	29	sleep 30
	30
	31	# must be 3 crashdumps registered and moved to crash/posted
eafe8130	32	[ $(ceph crash ls \| wc -l) = 4 ] \|\| exit 1 # 4 here bc of the table header
11fdf7f2	33	[ $(sudo find /var/lib/ceph/crash/posted/ -name meta \| wc -l) = 3 ] \|\| exit 1
eafe8130 TL	34
	35	# there should be a health warning
	36	ceph health detail \| grep RECENT_CRASH \|\| exit 1
	37	ceph crash archive-all
	38	sleep 30
	39	ceph health detail \| grep -c RECENT_CRASH \| grep 0 # should be gone!