]> git.proxmox.com Git - ceph.git/blame - ceph/qa/workunits/rados/test_crash.sh
import ceph 14.2.5
[ceph.git] / ceph / qa / workunits / rados / test_crash.sh
CommitLineData
11fdf7f2
TL
1#!/bin/sh
2
3set -x
4
5# run on a single-node three-OSD cluster
6
7sudo killall -ABRT ceph-osd
8sleep 5
9
10# kill caused coredumps; find them and delete them, carefully, so as
11# not to disturb other coredumps, or else teuthology will see them
12# and assume test failure. sudos are because the core files are
13# root/600
14for f in $(find $TESTDIR/archive/coredump -type f); do
15 gdb_output=$(echo "quit" | sudo gdb /usr/bin/ceph-osd $f)
16 if expr match "$gdb_output" ".*generated.*ceph-osd.*" && \
17 ( \
18
19 expr match "$gdb_output" ".*terminated.*signal 6.*" || \
20 expr match "$gdb_output" ".*terminated.*signal SIGABRT.*" \
21 )
22 then
23 sudo rm $f
24 fi
25done
26
27# let daemon find crashdumps on startup
28sudo systemctl restart ceph-crash
29sleep 30
30
31# must be 3 crashdumps registered and moved to crash/posted
eafe8130 32[ $(ceph crash ls | wc -l) = 4 ] || exit 1 # 4 here bc of the table header
11fdf7f2 33[ $(sudo find /var/lib/ceph/crash/posted/ -name meta | wc -l) = 3 ] || exit 1
eafe8130
TL
34
35# there should be a health warning
36ceph health detail | grep RECENT_CRASH || exit 1
37ceph crash archive-all
38sleep 30
39ceph health detail | grep -c RECENT_CRASH | grep 0 # should be gone!