]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | #!/bin/bash |
2 | # | |
3 | # Copyright (C) 2016 Piotr Dałek <git@predictor.org.pl> | |
4 | # Copyright (C) 2014, 2015 Red Hat <contact@redhat.com> | |
5 | # | |
6 | # Author: Piotr Dałek <git@predictor.org.pl> | |
7 | # | |
8 | # This program is free software; you can redistribute it and/or modify | |
9 | # it under the terms of the GNU Library Public License as published by | |
10 | # the Free Software Foundation; either version 2, or (at your option) | |
11 | # any later version. | |
12 | # | |
13 | # This program is distributed in the hope that it will be useful, | |
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | # GNU Library Public License for more details. | |
17 | # | |
18 | ||
19 | source $(dirname $0)/../detect-build-env-vars.sh | |
20 | source $CEPH_ROOT/qa/workunits/ceph-helpers.sh | |
21 | MAX_PROPAGATION_TIME=30 | |
22 | ||
23 | function run() { | |
24 | local dir=$1 | |
25 | shift | |
26 | rm -f $dir/*.pid | |
27 | export CEPH_MON="127.0.0.1:7126" # git grep '\<7126\>' : there must be only one | |
28 | export CEPH_ARGS | |
29 | CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " | |
30 | CEPH_ARGS+="--mon-host=$CEPH_MON " | |
224ce89b WB |
31 | # |
32 | # Disable device auto class feature for this testing, | |
33 | # as it will automatically make root clones based on new class types | |
34 | # and hence affect the down osd counting. | |
35 | # E.g., | |
36 | # | |
37 | # ID WEIGHT TYPE NAME UP/DOWN REWEIGHT PRIMARY-AFFINITY | |
38 | # -4 3.00000 root default~hdd | |
39 | # -3 3.00000 host gitbuilder-ceph-rpm-centos7-amd64-basic~hdd | |
40 | # 0 1.00000 osd.0 down 1.00000 1.00000 | |
41 | # 1 1.00000 osd.1 up 1.00000 1.00000 | |
42 | # 2 1.00000 osd.2 up 1.00000 1.00000 | |
43 | # -1 3.00000 root default | |
44 | # -2 3.00000 host gitbuilder-ceph-rpm-centos7-amd64-basic | |
45 | # 0 1.00000 osd.0 down 1.00000 1.00000 | |
46 | # 1 1.00000 osd.1 up 1.00000 1.00000 | |
47 | # 2 1.00000 osd.2 up 1.00000 1.00000 | |
48 | # | |
49 | CEPH_ARGS+="--osd-class-update-on-start=false " | |
7c673cae FG |
50 | |
51 | OLD_ARGS=$CEPH_ARGS | |
52 | CEPH_ARGS+="--osd-fast-fail-on-connection-refused=false " | |
53 | echo "Ensuring old behavior is there..." | |
54 | test_fast_kill $dir && (echo "OSDs died too early! Old behavior doesn't work." ; return 1) | |
55 | ||
56 | CEPH_ARGS=$OLD_ARGS"--osd-fast-fail-on-connection-refused=true " | |
57 | OLD_ARGS=$CEPH_ARGS | |
58 | ||
59 | CEPH_ARGS+="--ms_type=simple" | |
60 | echo "Testing simple msgr..." | |
61 | test_fast_kill $dir || return 1 | |
62 | ||
63 | CEPH_ARGS=$OLD_ARGS"--ms_type=async" | |
64 | echo "Testing async msgr..." | |
65 | test_fast_kill $dir || return 1 | |
66 | ||
67 | return 0 | |
68 | ||
69 | } | |
70 | ||
71 | function test_fast_kill() { | |
72 | # create cluster with 3 osds | |
73 | setup $dir || return 1 | |
74 | run_mon $dir a --osd_pool_default_size=3 || return 1 | |
224ce89b | 75 | run_mgr $dir x || return 1 |
7c673cae FG |
76 | for oi in {0..2}; do |
77 | run_osd $dir $oi || return 1 | |
78 | pids[$oi]=$(cat $dir/osd.$oi.pid) | |
79 | done | |
80 | ||
81 | # make some objects so osds to ensure connectivity between osds | |
82 | rados -p rbd bench 10 write -b 4096 --max-objects 128 --no-cleanup | |
83 | sleep 1 | |
84 | ||
85 | killid=0 | |
86 | previd=0 | |
87 | ||
88 | # kill random osd and see if after max MAX_PROPAGATION_TIME, the osd count decreased. | |
89 | for i in {1..2}; do | |
90 | while [ $killid -eq $previd ]; do | |
91 | killid=${pids[$RANDOM%${#pids[@]}]} | |
92 | done | |
93 | previd=$killid | |
94 | ||
95 | kill -9 $killid | |
96 | time_left=$MAX_PROPAGATION_TIME | |
97 | down_osds=0 | |
98 | ||
99 | while [ $time_left -gt 0 ]; do | |
100 | sleep 1 | |
101 | time_left=$[$time_left - 1]; | |
102 | ||
103 | grep -m 1 -c -F "ms_handle_refused" $dir/osd.*.log > /dev/null | |
104 | if [ $? -ne 0 ]; then | |
105 | continue | |
106 | fi | |
107 | ||
108 | down_osds=$(ceph osd tree | grep -c down) | |
109 | if [ $down_osds -lt $i ]; then | |
110 | # osds not marked down yet, try again in a second | |
111 | continue | |
112 | elif [ $down_osds -gt $i ]; then | |
113 | echo Too many \($down_osds\) osds died! | |
114 | return 1 | |
115 | else | |
116 | break | |
117 | fi | |
118 | done | |
119 | ||
120 | if [ $down_osds -lt $i ]; then | |
121 | echo Killed the OSD, yet it is not marked down | |
122 | ceph osd tree | |
123 | return 1 | |
124 | fi | |
125 | done | |
126 | pkill -SIGTERM rados | |
127 | teardown $dir || return 1 | |
128 | } | |
129 | ||
130 | main osd-fast-mark-down "$@" | |
131 | ||
132 | # Local Variables: | |
133 | # compile-command: "cd ../.. ; make -j4 && test/osd/osd-fast-mark-down.sh" | |
134 | # End: |