]> git.proxmox.com Git - ceph.git/blob - ceph/src/test/osd/osd-fast-mark-down.sh
5c8f420fecbb4d3863d90575d470968d5a2ef897
[ceph.git] / ceph / src / test / osd / osd-fast-mark-down.sh
1 #!/bin/bash
2 #
3 # Copyright (C) 2016 Piotr Dałek <git@predictor.org.pl>
4 # Copyright (C) 2014, 2015 Red Hat <contact@redhat.com>
5 #
6 # Author: Piotr Dałek <git@predictor.org.pl>
7 #
8 # This program is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU Library Public License as published by
10 # the Free Software Foundation; either version 2, or (at your option)
11 # any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU Library Public License for more details.
17 #
18
19 source $(dirname $0)/../detect-build-env-vars.sh
20 source $CEPH_ROOT/qa/workunits/ceph-helpers.sh
21 MAX_PROPAGATION_TIME=30
22
23 function run() {
24 local dir=$1
25 shift
26 rm -f $dir/*.pid
27 export CEPH_MON="127.0.0.1:7126" # git grep '\<7126\>' : there must be only one
28 export CEPH_ARGS
29 CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
30 CEPH_ARGS+="--mon-host=$CEPH_MON "
31
32 OLD_ARGS=$CEPH_ARGS
33 CEPH_ARGS+="--osd-fast-fail-on-connection-refused=false "
34 echo "Ensuring old behavior is there..."
35 test_fast_kill $dir && (echo "OSDs died too early! Old behavior doesn't work." ; return 1)
36
37 CEPH_ARGS=$OLD_ARGS"--osd-fast-fail-on-connection-refused=true "
38 OLD_ARGS=$CEPH_ARGS
39
40 CEPH_ARGS+="--ms_type=simple"
41 echo "Testing simple msgr..."
42 test_fast_kill $dir || return 1
43
44 CEPH_ARGS=$OLD_ARGS"--ms_type=async"
45 echo "Testing async msgr..."
46 test_fast_kill $dir || return 1
47
48 return 0
49
50 }
51
52 function test_fast_kill() {
53 # create cluster with 3 osds
54 setup $dir || return 1
55 run_mon $dir a --osd_pool_default_size=3 || return 1
56 run_mgr $dir x || return 1
57 for oi in {0..2}; do
58 run_osd $dir $oi || return 1
59 pids[$oi]=$(cat $dir/osd.$oi.pid)
60 done
61
62 # make some objects so osds to ensure connectivity between osds
63 rados -p rbd bench 10 write -b 4096 --max-objects 128 --no-cleanup
64 sleep 1
65
66 killid=0
67 previd=0
68
69 # kill random osd and see if after max MAX_PROPAGATION_TIME, the osd count decreased.
70 for i in {1..2}; do
71 while [ $killid -eq $previd ]; do
72 killid=${pids[$RANDOM%${#pids[@]}]}
73 done
74 previd=$killid
75
76 kill -9 $killid
77 time_left=$MAX_PROPAGATION_TIME
78 down_osds=0
79
80 while [ $time_left -gt 0 ]; do
81 sleep 1
82 time_left=$[$time_left - 1];
83
84 grep -m 1 -c -F "ms_handle_refused" $dir/osd.*.log > /dev/null
85 if [ $? -ne 0 ]; then
86 continue
87 fi
88
89 down_osds=$(ceph osd tree | grep -c down)
90 if [ $down_osds -lt $i ]; then
91 # osds not marked down yet, try again in a second
92 continue
93 elif [ $down_osds -gt $i ]; then
94 echo Too many \($down_osds\) osds died!
95 return 1
96 else
97 break
98 fi
99 done
100
101 if [ $down_osds -lt $i ]; then
102 echo Killed the OSD, yet it is not marked down
103 ceph osd tree
104 return 1
105 fi
106 done
107 pkill -SIGTERM rados
108 teardown $dir || return 1
109 }
110
111 main osd-fast-mark-down "$@"
112
113 # Local Variables:
114 # compile-command: "cd ../.. ; make -j4 && test/osd/osd-fast-mark-down.sh"
115 # End: