]> git.proxmox.com Git - ceph.git/blob - ceph/qa/standalone/osd/osd-fast-mark-down.sh
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / qa / standalone / osd / osd-fast-mark-down.sh
1 #!/usr/bin/env bash
2 #
3 # Copyright (C) 2016 Piotr Dałek <git@predictor.org.pl>
4 # Copyright (C) 2014, 2015 Red Hat <contact@redhat.com>
5 #
6 # Author: Piotr Dałek <git@predictor.org.pl>
7 #
8 # This program is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU Library Public License as published by
10 # the Free Software Foundation; either version 2, or (at your option)
11 # any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU Library Public License for more details.
17 #
18
19 source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
20 MAX_PROPAGATION_TIME=30
21
22 function run() {
23 local dir=$1
24 shift
25 rm -f $dir/*.pid
26 export CEPH_MON="127.0.0.1:7126" # git grep '\<7126\>' : there must be only one
27 export CEPH_ARGS
28 CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
29
30 OLD_ARGS=$CEPH_ARGS
31 CEPH_ARGS+="--osd-fast-fail-on-connection-refused=false "
32 echo "Ensuring old behavior is there..."
33 test_fast_kill $dir && (echo "OSDs died too early! Old behavior doesn't work." ; return 1)
34
35 CEPH_ARGS=$OLD_ARGS"--osd-fast-fail-on-connection-refused=true "
36 OLD_ARGS=$CEPH_ARGS
37
38 # force v1 addr here for simple's benefit
39 CEPH_ARGS+="--ms_type=simple --mon-host=v1:$CEPH_MON"
40 echo "Testing simple msgr..."
41 test_fast_kill $dir || return 1
42
43 CEPH_ARGS=$OLD_ARGS"--ms_type=async --mon-host=$CEPH_MON"
44 echo "Testing async msgr..."
45 test_fast_kill $dir || return 1
46
47 return 0
48
49 }
50
51 function test_fast_kill() {
52 # create cluster with 3 osds
53 setup $dir || return 1
54 run_mon $dir a --osd_pool_default_size=3 || return 1
55 run_mgr $dir x || return 1
56 for oi in {0..2}; do
57 run_osd $dir $oi || return 1
58 pids[$oi]=$(cat $dir/osd.$oi.pid)
59 done
60
61 create_rbd_pool || return 1
62
63 # make some objects so osds to ensure connectivity between osds
64 timeout 20 rados -p rbd bench 10 write -b 4096 --max-objects 128 --no-cleanup || return 1
65 sleep 1
66
67 killid=0
68 previd=0
69
70 # kill random osd and see if after max MAX_PROPAGATION_TIME, the osd count decreased.
71 for i in {1..2}; do
72 while [ $killid -eq $previd ]; do
73 killid=${pids[$RANDOM%${#pids[@]}]}
74 done
75 previd=$killid
76
77 kill -9 $killid
78 time_left=$MAX_PROPAGATION_TIME
79 down_osds=0
80
81 while [ $time_left -gt 0 ]; do
82 sleep 1
83 time_left=$[$time_left - 1];
84
85 grep -m 1 -c -F "ms_handle_refused" $dir/osd.*.log > /dev/null
86 if [ $? -ne 0 ]; then
87 continue
88 fi
89
90 down_osds=$(ceph osd tree | grep -c down)
91 if [ $down_osds -lt $i ]; then
92 # osds not marked down yet, try again in a second
93 continue
94 elif [ $down_osds -gt $i ]; then
95 echo Too many \($down_osds\) osds died!
96 return 1
97 else
98 break
99 fi
100 done
101
102 if [ $down_osds -lt $i ]; then
103 echo Killed the OSD, yet it is not marked down
104 ceph osd tree
105 return 1
106 fi
107 done
108 pkill -SIGTERM rados
109 teardown $dir || return 1
110 }
111
112 main osd-fast-mark-down "$@"
113
114 # Local Variables:
115 # compile-command: "cd ../.. ; make -j4 && test/osd/osd-fast-mark-down.sh"
116 # End: