]> git.proxmox.com Git - ceph.git/blob - ceph/qa/standalone/misc/ver-health.sh
import quincy beta 17.1.0
[ceph.git] / ceph / qa / standalone / misc / ver-health.sh
1 #!/usr/bin/env bash
2 #
3 # Copyright (C) 2020 Red Hat <contact@redhat.com>
4 #
5 # Author: David Zafman <dzafman@redhat.com>
6 #
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU Library Public License as published by
9 # the Free Software Foundation; either version 2, or (at your option)
10 # any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU Library Public License for more details.
16 #
17 source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
18
19 function run() {
20 local dir=$1
21 shift
22
23 export CEPH_MON_A="127.0.0.1:7165" # git grep '\<7165\>' : there must be only one
24 export CEPH_MON_B="127.0.0.1:7166" # git grep '\<7166\>' : there must be only one
25 export CEPH_ARGS
26 CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
27 CEPH_ARGS+="--mon-host=$CEPH_MON "
28 CEPH_ARGS+="--mon_health_to_clog_tick_interval=1.0 "
29 export ORIG_CEPH_ARGS="$CEPH_ARGS"
30
31 local funcs=${@:-$(set | ${SED} -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
32 for func in $funcs ; do
33 setup $dir || return 1
34 $func $dir || return 1
35 teardown $dir || return 1
36 done
37 }
38
39 function wait_for_health_string() {
40 local grep_string=$1
41 local seconds=${2:-20}
42
43 # Allow mon to notice version difference
44 set -o pipefail
45 PASSED="false"
46 for ((i=0; i < $seconds; i++)); do
47 if ceph health | grep -q "$grep_string"
48 then
49 PASSED="true"
50 break
51 fi
52 sleep 1
53 done
54 set +o pipefail
55
56 # Make sure health changed
57 if [ $PASSED = "false" ];
58 then
59 return 1
60 fi
61 return 0
62 }
63
64
65
66 # Test a single OSD with an old version and multiple OSDs with 2 different old versions
67 function TEST_check_version_health_1() {
68 local dir=$1
69
70 # Asssume MON_A is leader?
71 CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A "
72 # setup
73 setup $dir || return 1
74
75 # create a cluster with two monitors and three osds
76 run_mon $dir a --public-addr=$CEPH_MON_A --mon_warn_older_version_delay=0 || return 1
77 run_mon $dir b --public-addr=$CEPH_MON_B --mon_warn_older_version_delay=0 || return 1
78 run_osd $dir 0 || return 1
79 run_osd $dir 1 || return 1
80 run_osd $dir 2 || return 1
81
82 sleep 5
83 ceph health detail
84 # should not see this yet
85 ceph health detail | grep DAEMON_OLD_VERSION && return 1
86
87 kill_daemons $dir KILL osd.1
88 ceph_debug_version_for_testing=01.00.00-gversion-test activate_osd $dir 1
89
90 wait_for_health_string "HEALTH_WARN .*There is a daemon running an older version of ceph" || return 1
91
92 ceph health detail
93 # Should notice that osd.1 is a different version
94 ceph health detail | grep -q "HEALTH_WARN .*There is a daemon running an older version of ceph" || return 1
95 ceph health detail | grep -q "^[[]WRN[]] DAEMON_OLD_VERSION: There is a daemon running an older version of ceph" || return 1
96 ceph health detail | grep -q "osd.1 is running an older version of ceph: 01.00.00-gversion-test" || return 1
97
98 kill_daemons $dir KILL osd.2
99 ceph_debug_version_for_testing=01.00.00-gversion-test activate_osd $dir 2
100 kill_daemons $dir KILL osd.0
101 ceph_debug_version_for_testing=02.00.00-gversion-test activate_osd $dir 0
102
103 wait_for_health_string "HEALTH_ERR .*There are daemons running multiple old versions of ceph" || return 1
104
105 ceph health detail
106 ceph health detail | grep -q "HEALTH_ERR .*There are daemons running multiple old versions of ceph" || return 1
107 ceph health detail | grep -q "^[[]ERR[]] DAEMON_OLD_VERSION: There are daemons running multiple old versions of ceph" || return 1
108 ceph health detail | grep -q "osd.1 osd.2 are running an older version of ceph: 01.00.00-gversion-test" || return 1
109 ceph health detail | grep -q "osd.0 is running an older version of ceph: 02.00.00-gversion-test" || return 1
110 }
111
112 # Test with 1 MON and 1 MDS with an older version, and add 2 OSDs with different versions
113 function TEST_check_version_health_2() {
114 local dir=$1
115
116 # Asssume MON_A is leader?
117 CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A "
118 # setup
119 setup $dir || return 1
120
121 # create a cluster with all daemon types
122 run_mon $dir a --public-addr=$CEPH_MON_A --mon_warn_older_version_delay=0 || return 1
123 run_mon $dir b --public-addr=$CEPH_MON_B --mon_warn_older_version_delay=0 || return 1
124 run_osd $dir 0 || return 1
125 run_osd $dir 1 || return 1
126 run_osd $dir 2 || return 1
127 run_mgr $dir x || return 1
128 run_mgr $dir y || return 1
129 run_mds $dir m || return 1
130 run_mds $dir n || return 1
131
132 sleep 5
133 ceph health detail
134 # should not see this yet
135 ceph health detail | grep DAEMON_OLD_VERSION && return 1
136
137 kill_daemons $dir KILL mon.b
138 ceph_debug_version_for_testing=01.00.00-gversion-test run_mon $dir b --mon_warn_older_version_delay=0
139 # XXX: Manager doesn't seem to use the test specific config for version
140 #kill_daemons $dir KILL mgr.x
141 #ceph_debug_version_for_testing=02.00.00-gversion-test run_mgr $dir x
142 kill_daemons $dir KILL mds.m
143 ceph_debug_version_for_testing=01.00.00-gversion-test run_mds $dir m
144
145 wait_for_health_string "HEALTH_WARN .*There are daemons running an older version of ceph" || return 1
146
147 ceph health detail
148 # Should notice that mon.b and mds.m is a different version
149 ceph health detail | grep -q "HEALTH_WARN .*There are daemons running an older version of ceph" || return 1
150 ceph health detail | grep -q "^[[]WRN[]] DAEMON_OLD_VERSION: There are daemons running an older version of ceph" || return 1
151 ceph health detail | grep -q "mon.b mds.m are running an older version of ceph: 01.00.00-gversion-test" || return 1
152
153 kill_daemons $dir KILL osd.2
154 ceph_debug_version_for_testing=01.00.00-gversion-test activate_osd $dir 2
155 kill_daemons $dir KILL osd.0
156 ceph_debug_version_for_testing=02.00.00-gversion-test activate_osd $dir 0
157
158 wait_for_health_string "HEALTH_ERR .*There are daemons running multiple old versions of ceph" || return 1
159
160 ceph health detail
161 ceph health | grep -q "HEALTH_ERR .*There are daemons running multiple old versions of ceph" || return 1
162 ceph health detail | grep -q "HEALTH_ERR .*There are daemons running multiple old versions of ceph" || return 1
163 ceph health detail | grep -q "^[[]ERR[]] DAEMON_OLD_VERSION: There are daemons running multiple old versions of ceph" || return 1
164 ceph health detail | grep -q "mon.b osd.2 mds.m are running an older version of ceph: 01.00.00-gversion-test" || return 1
165 ceph health detail | grep -q "osd.0 is running an older version of ceph: 02.00.00-gversion-test" || return 1
166 }
167
168 # Verify delay handling with same setup as test 1
169 function TEST_check_version_health_3() {
170 local dir=$1
171
172 # Asssume MON_A is leader?
173 CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A "
174 # setup
175 setup $dir || return 1
176
177 # create a cluster with two monitors and three osds
178 run_mon $dir a --public-addr=$CEPH_MON_A || return 1
179 run_mon $dir b --public-addr=$CEPH_MON_B || return 1
180
181 local start_osd_time=$SECONDS
182 # use memstore for faster bootup
183 EXTRA_OPTS=" --osd-objectstore=memstore" run_osd $dir 0 || return 1
184 EXTRA_OPTS=" --osd-objectstore=memstore" run_osd $dir 1 || return 1
185 EXTRA_OPTS=" --osd-objectstore=memstore" run_osd $dir 2 || return 1
186 # take the time used for boot osds into consideration
187 local warn_older_version_delay=$(($SECONDS - $start_osd_time + 20))
188
189 sleep 5
190 ceph health detail
191 # should not see this yet
192 ceph health detail | grep DAEMON_OLD_VERSION && return 1
193 ceph tell 'mon.*' injectargs "--mon_warn_older_version_delay $warn_older_version_delay"
194 kill_daemons $dir KILL osd.1
195 EXTRA_OPTS=" --osd-objectstore=memstore" \
196 ceph_debug_version_for_testing=01.00.00-gversion-test \
197 activate_osd $dir 1
198
199 # Wait 50% of 20 second delay config
200 sleep 10
201 # should not see this yet
202 ceph health detail | grep DAEMON_OLD_VERSION && return 1
203
204 # Now make sure that at least 20 seconds have passed
205 wait_for_health_string "HEALTH_WARN .*There is a daemon running an older version of ceph" 20 || return 1
206
207 ceph health detail
208 # Should notice that osd.1 is a different version
209 ceph health detail | grep -q "HEALTH_WARN .*There is a daemon running an older version of ceph" || return 1
210 ceph health detail | grep -q "^[[]WRN[]] DAEMON_OLD_VERSION: There is a daemon running an older version of ceph" || return 1
211 ceph health detail | grep -q "osd.1 is running an older version of ceph: 01.00.00-gversion-test" || return 1
212
213 kill_daemons $dir KILL osd.2
214 ceph_debug_version_for_testing=01.00.00-gversion-test activate_osd $dir 2
215 kill_daemons $dir KILL osd.0
216 ceph_debug_version_for_testing=02.00.00-gversion-test activate_osd $dir 0
217
218 wait_for_health_string "HEALTH_ERR .*There are daemons running multiple old versions of ceph" || return 1
219
220 ceph health detail
221 ceph health detail | grep -q "HEALTH_ERR .*There are daemons running multiple old versions of ceph" || return 1
222 ceph health detail | grep -q "^[[]ERR[]] DAEMON_OLD_VERSION: There are daemons running multiple old versions of ceph" || return 1
223 ceph health detail | grep -q "osd.1 osd.2 are running an older version of ceph: 01.00.00-gversion-test" || return 1
224 ceph health detail | grep -q "osd.0 is running an older version of ceph: 02.00.00-gversion-test" || return 1
225 }
226
227 main ver-health "$@"
228
229 # Local Variables:
230 # compile-command: "cd ../.. ; make -j4 && ../qa/run-standalone.sh ver-health.sh"
231 # End: