]> git.proxmox.com Git - ceph.git/blob - ceph/qa/workunits/rados/test_crash.sh
6608d7872e232b12ac5b9586ca84e63d37d4bc2b
[ceph.git] / ceph / qa / workunits / rados / test_crash.sh
1 #!/bin/sh
2
3 set -x
4
5 # run on a single-node three-OSD cluster
6
7 sudo killall -ABRT ceph-osd
8 sleep 5
9
10 # kill caused coredumps; find them and delete them, carefully, so as
11 # not to disturb other coredumps, or else teuthology will see them
12 # and assume test failure. sudos are because the core files are
13 # root/600
14 for f in $(find $TESTDIR/archive/coredump -type f); do
15 gdb_output=$(echo "quit" | sudo gdb /usr/bin/ceph-osd $f)
16 if expr match "$gdb_output" ".*generated.*ceph-osd.*" && \
17 ( \
18
19 expr match "$gdb_output" ".*terminated.*signal 6.*" || \
20 expr match "$gdb_output" ".*terminated.*signal SIGABRT.*" \
21 )
22 then
23 sudo rm $f
24 fi
25 done
26
27 # let daemon find crashdumps on startup
28 sudo systemctl restart ceph-crash
29 sleep 30
30
31 # must be 3 crashdumps registered and moved to crash/posted
32 [ $(ceph crash ls | wc -l) = 4 ] || exit 1 # 4 here bc of the table header
33 [ $(sudo find /var/lib/ceph/crash/posted/ -name meta | wc -l) = 3 ] || exit 1
34
35 # there should be a health warning
36 ceph health detail | grep RECENT_CRASH || exit 1
37 ceph crash archive-all
38 sleep 30
39 ceph health detail | grep -c RECENT_CRASH | grep 0 # should be gone!