]> git.proxmox.com Git - ceph.git/blob - ceph/qa/workunits/cephadm/test_cephadm.sh
import 15.2.0 Octopus source
[ceph.git] / ceph / qa / workunits / cephadm / test_cephadm.sh
1 #!/bin/bash -ex
2
3 SCRIPT_NAME=$(basename ${BASH_SOURCE[0]})
4 SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
5
6 FSID='00000000-0000-0000-0000-0000deadbeef'
7
8 # images that are used
9 IMAGE_MASTER=${IMAGE_MASTER:-'quay.io/ceph-ci/ceph:octopus'} # octopus for octopus branch
10 IMAGE_NAUTILUS=${IMAGE_NAUTILUS:-'docker.io/ceph/daemon-base:latest-nautilus'}
11 IMAGE_MIMIC=${IMAGE_MIMIC:-'docker.io/ceph/daemon-base:latest-mimic'}
12
13 TMPDIR=$(mktemp -d)
14
15 function cleanup()
16 {
17 dump_all_logs
18 rm -rf $TMPDIR
19 }
20 trap cleanup EXIT
21
22 OSD_IMAGE_NAME="${SCRIPT_NAME%.*}_osd.img"
23 OSD_IMAGE_SIZE='6G'
24 OSD_TO_CREATE=2
25 OSD_VG_NAME=${SCRIPT_NAME%.*}
26 OSD_LV_NAME=${SCRIPT_NAME%.*}
27
28 CEPHADM_SRC_DIR=${SCRIPT_DIR}/../../../src/cephadm
29 CEPHADM_SAMPLES_DIR=${CEPHADM_SRC_DIR}/samples
30
31 [ -z "$SUDO" ] && SUDO=sudo
32
33 if [ -z "$CEPHADM" ]; then
34 CEPHADM=${CEPHADM_SRC_DIR}/cephadm
35 fi
36
37 # at this point, we need $CEPHADM set
38 if ! [ -x "$CEPHADM" ]; then
39 echo "cephadm not found. Please set \$CEPHADM"
40 exit 1
41 fi
42
43 # respawn ourselves with a shebang
44 if [ -z "$PYTHON_KLUDGE" ]; then
45 # see which pythons we should test with
46 PYTHONS=""
47 which python3 && PYTHONS="$PYTHONS python3"
48 which python2 && PYTHONS="$PYTHONS python2"
49 echo "PYTHONS $PYTHONS"
50 if [ -z "$PYTHONS" ]; then
51 echo "No PYTHONS found!"
52 exit 1
53 fi
54
55 TMPBINDIR=$(mktemp -d)
56 trap "rm -rf $TMPBINDIR" EXIT
57 ORIG_CEPHADM="$CEPHADM"
58 CEPHADM="$TMPBINDIR/cephadm"
59 for p in $PYTHONS; do
60 echo "=== re-running with $p ==="
61 ln -s `which $p` $TMPBINDIR/python
62 echo "#!$TMPBINDIR/python" > $CEPHADM
63 cat $ORIG_CEPHADM >> $CEPHADM
64 chmod 700 $CEPHADM
65 $TMPBINDIR/python --version
66 PYTHON_KLUDGE=1 CEPHADM=$CEPHADM $0
67 rm $TMPBINDIR/python
68 done
69 rm -rf $TMPBINDIR
70 echo "PASS with all of: $PYTHONS"
71 exit 0
72 fi
73
74 # add image to args
75 CEPHADM_ARGS="$CEPHADM_ARGS --image $IMAGE_MASTER"
76
77 # combine into a single var
78 CEPHADM_BIN="$CEPHADM"
79 CEPHADM="$SUDO $CEPHADM_BIN $CEPHADM_ARGS"
80
81 # clean up previous run(s)?
82 $CEPHADM rm-cluster --fsid $FSID --force
83 $SUDO vgchange -an $OSD_VG_NAME || true
84 loopdev=$($SUDO losetup -a | grep $(basename $OSD_IMAGE_NAME) | awk -F : '{print $1}')
85 if ! [ "$loopdev" = "" ]; then
86 $SUDO losetup -d $loopdev
87 fi
88
89 function expect_false()
90 {
91 set -x
92 if eval "$@"; then return 1; else return 0; fi
93 }
94
95 function is_available()
96 {
97 local name="$1"
98 local condition="$2"
99 local tries="$3"
100
101 local num=0
102 while ! eval "$condition"; do
103 num=$(($num + 1))
104 if [ "$num" -ge $tries ]; then
105 echo "$name is not available"
106 false
107 fi
108 sleep 5
109 done
110
111 echo "$name is available"
112 true
113 }
114
115 function dump_log()
116 {
117 local name="$1"
118 local num_lines="$2"
119
120 if [ -z $num_lines ]; then
121 num_lines=100
122 fi
123
124 echo '-------------------------'
125 echo 'dump daemon log:' $name
126 echo '-------------------------'
127
128 $CEPHADM logs --name $name -- --no-pager -n $num_lines
129 }
130
131 function dump_all_logs()
132 {
133 names=$($CEPHADM ls | jq -r '.[].name')
134
135 echo 'dumping logs for daemons: ' $names
136 for name in $names; do
137 dump_log $name
138 done
139 }
140
141 function nfs_stop()
142 {
143 # stop the running nfs server
144 local units="nfs-server nfs-kernel-server"
145 for unit in $units; do
146 if systemctl status $unit; then
147 $SUDO systemctl stop $unit
148 fi
149 done
150
151 # ensure the NFS port is no longer in use
152 expect_false "$SUDO ss -tlnp '( sport = :nfs )' | grep LISTEN"
153 }
154
155 ## prepare + check host
156 $SUDO $CEPHADM check-host
157
158 ## version + --image
159 $SUDO CEPHADM_IMAGE=$IMAGE_NAUTILUS $CEPHADM_BIN version
160 $SUDO CEPHADM_IMAGE=$IMAGE_NAUTILUS $CEPHADM_BIN version \
161 | grep 'ceph version 14'
162 $SUDO $CEPHADM_BIN --image $IMAGE_MIMIC version
163 $SUDO $CEPHADM_BIN --image $IMAGE_MIMIC version \
164 | grep 'ceph version 13'
165 $SUDO $CEPHADM_BIN --image $IMAGE_MASTER version | grep 'ceph version'
166
167 # try force docker; this won't work if docker isn't installed
168 systemctl status docker && ( $CEPHADM --docker version | grep 'ceph version' )
169
170 ## test shell before bootstrap, when crash dir isn't (yet) present on this host
171 $CEPHADM shell --fsid $FSID -- ceph -v | grep 'ceph version'
172 $CEPHADM shell --fsid $FSID -e FOO=BAR -- printenv | grep FOO=BAR
173
174 ## bootstrap
175 ORIG_CONFIG=`mktemp -p $TMPDIR`
176 CONFIG=`mktemp -p $TMPDIR`
177 MONCONFIG=`mktemp -p $TMPDIR`
178 KEYRING=`mktemp -p $TMPDIR`
179 IP=127.0.0.1
180 cat <<EOF > $ORIG_CONFIG
181 [global]
182 log to file = true
183 osd crush chooseleaf type = 0
184 EOF
185 $CEPHADM bootstrap \
186 --mon-id a \
187 --mgr-id x \
188 --mon-ip $IP \
189 --fsid $FSID \
190 --config $ORIG_CONFIG \
191 --output-config $CONFIG \
192 --output-keyring $KEYRING \
193 --output-pub-ssh-key $TMPDIR/ceph.pub \
194 --allow-overwrite \
195 --skip-mon-network \
196 --skip-monitoring-stack
197 test -e $CONFIG
198 test -e $KEYRING
199 rm -f $ORIG_CONFIG
200
201 $SUDO test -e /var/log/ceph/$FSID/ceph-mon.a.log
202 $SUDO test -e /var/log/ceph/$FSID/ceph-mgr.x.log
203
204 for u in ceph.target \
205 ceph-$FSID.target \
206 ceph-$FSID@mon.a \
207 ceph-$FSID@mgr.x; do
208 systemctl is-enabled $u
209 systemctl is-active $u
210 done
211 systemctl | grep system-ceph | grep -q .slice # naming is escaped and annoying
212
213 # check ceph -s works (via shell w/ passed config/keyring)
214 $CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
215 ceph -s | grep $FSID
216
217 for t in mon mgr node-exporter prometheus grafana; do
218 $CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
219 ceph orch apply $t --unmanaged
220 done
221
222 ## ls
223 $CEPHADM ls | jq '.[]' | jq 'select(.name == "mon.a").fsid' \
224 | grep $FSID
225 $CEPHADM ls | jq '.[]' | jq 'select(.name == "mgr.x").fsid' \
226 | grep $FSID
227
228 # make sure the version is returned correctly
229 $CEPHADM ls | jq '.[]' | jq 'select(.name == "mon.a").version' | grep -q \\.
230
231 ## deploy
232 # add mon.b
233 cp $CONFIG $MONCONFIG
234 echo "public addr = $IP:3301" >> $MONCONFIG
235 $CEPHADM deploy --name mon.b \
236 --fsid $FSID \
237 --keyring /var/lib/ceph/$FSID/mon.a/keyring \
238 --config $CONFIG
239 for u in ceph-$FSID@mon.b; do
240 systemctl is-enabled $u
241 systemctl is-active $u
242 done
243
244 # add mgr.y
245 $CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
246 ceph auth get-or-create mgr.y \
247 mon 'allow profile mgr' \
248 osd 'allow *' \
249 mds 'allow *' > $TMPDIR/keyring.mgr.y
250 $CEPHADM deploy --name mgr.y \
251 --fsid $FSID \
252 --keyring $TMPDIR/keyring.mgr.y \
253 --config $CONFIG
254 for u in ceph-$FSID@mgr.y; do
255 systemctl is-enabled $u
256 systemctl is-active $u
257 done
258
259 for f in `seq 1 30`; do
260 if $CEPHADM shell --fsid $FSID \
261 --config $CONFIG --keyring $KEYRING -- \
262 ceph -s -f json-pretty \
263 | jq '.mgrmap.num_standbys' | grep -q 1 ; then break; fi
264 sleep 1
265 done
266 $CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
267 ceph -s -f json-pretty \
268 | jq '.mgrmap.num_standbys' | grep -q 1
269
270 # add osd.{1,2,..}
271 dd if=/dev/zero of=$TMPDIR/$OSD_IMAGE_NAME bs=1 count=0 seek=$OSD_IMAGE_SIZE
272 loop_dev=$($SUDO losetup -f)
273 $SUDO vgremove -f $OSD_VG_NAME || true
274 $SUDO losetup $loop_dev $TMPDIR/$OSD_IMAGE_NAME
275 $SUDO pvcreate $loop_dev && $SUDO vgcreate $OSD_VG_NAME $loop_dev
276 for id in `seq 0 $((--OSD_TO_CREATE))`; do
277 $SUDO lvcreate -l $((100/$OSD_TO_CREATE))%VG -n $OSD_LV_NAME.$id $OSD_VG_NAME
278 $CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
279 ceph orch daemon add osd \
280 $(hostname):/dev/$OSD_VG_NAME/$OSD_LV_NAME.$id
281 done
282
283 # add node-exporter
284 ${CEPHADM//--image $IMAGE_MASTER/} deploy \
285 --name node-exporter.a --fsid $FSID
286 cond="curl 'http://localhost:9100' | grep -q 'Node Exporter'"
287 is_available "node-exporter" "$cond" 5
288
289 # add prometheus
290 cat ${CEPHADM_SAMPLES_DIR}/prometheus.json | \
291 ${CEPHADM//--image $IMAGE_MASTER/} deploy \
292 --name prometheus.a --fsid $FSID --config-json -
293 cond="curl 'localhost:9095/api/v1/query?query=up'"
294 is_available "prometheus" "$cond" 5
295
296 # add grafana
297 cat ${CEPHADM_SAMPLES_DIR}/grafana.json | \
298 ${CEPHADM//--image $IMAGE_MASTER/} deploy \
299 --name grafana.a --fsid $FSID --config-json -
300 cond="curl --insecure 'https://localhost:3000' | grep -q 'grafana'"
301 is_available "grafana" "$cond" 30
302
303 # add nfs-ganesha
304 nfs_stop
305 nfs_rados_pool=$(cat ${CEPHADM_SAMPLES_DIR}/nfs.json | jq -r '.["pool"]')
306 $CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
307 ceph osd pool create $nfs_rados_pool 64
308 $CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
309 rados --pool nfs-ganesha --namespace nfs-ns create conf-nfs.a
310 $CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
311 ceph orch pause
312 $CEPHADM deploy --name nfs.a \
313 --fsid $FSID \
314 --keyring $KEYRING \
315 --config $CONFIG \
316 --config-json ${CEPHADM_SAMPLES_DIR}/nfs.json
317 cond="$SUDO ss -tlnp '( sport = :nfs )' | grep 'ganesha.nfsd'"
318 is_available "nfs" "$cond" 10
319 $CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
320 ceph orch resume
321
322 ## run
323 # WRITE ME
324
325 ## unit
326 $CEPHADM unit --fsid $FSID --name mon.a -- is-enabled
327 $CEPHADM unit --fsid $FSID --name mon.a -- is-active
328 expect_false $CEPHADM unit --fsid $FSID --name mon.xyz -- is-active
329 $CEPHADM unit --fsid $FSID --name mon.a -- disable
330 expect_false $CEPHADM unit --fsid $FSID --name mon.a -- is-enabled
331 $CEPHADM unit --fsid $FSID --name mon.a -- enable
332 $CEPHADM unit --fsid $FSID --name mon.a -- is-enabled
333
334 ## shell
335 $CEPHADM shell --fsid $FSID -- true
336 $CEPHADM shell --fsid $FSID -- test -d /var/log/ceph
337 expect_false $CEPHADM --timeout 1 shell --fsid $FSID -- sleep 10
338 $CEPHADM --timeout 10 shell --fsid $FSID -- sleep 1
339
340 ## enter
341 expect_false $CEPHADM enter
342 $CEPHADM enter --fsid $FSID --name mon.a -- test -d /var/lib/ceph/mon/ceph-a
343 $CEPHADM enter --fsid $FSID --name mgr.x -- test -d /var/lib/ceph/mgr/ceph-x
344 $CEPHADM enter --fsid $FSID --name mon.a -- pidof ceph-mon
345 expect_false $CEPHADM enter --fsid $FSID --name mgr.x -- pidof ceph-mon
346 $CEPHADM enter --fsid $FSID --name mgr.x -- pidof ceph-mgr
347 # this triggers a bug in older versions of podman, including 18.04's 1.6.2
348 #expect_false $CEPHADM --timeout 1 enter --fsid $FSID --name mon.a -- sleep 10
349 $CEPHADM --timeout 10 enter --fsid $FSID --name mon.a -- sleep 1
350
351 ## ceph-volume
352 $CEPHADM ceph-volume --fsid $FSID -- inventory --format=json \
353 | jq '.[]'
354
355 ## rm-daemon
356 # mon and osd require --force
357 expect_false $CEPHADM rm-daemon --fsid $FSID --name mon.a
358 # mgr does not
359 $CEPHADM rm-daemon --fsid $FSID --name mgr.x
360
361 ## rm-cluster
362 expect_false $CEPHADM rm-cluster --fsid $FSID
363 $CEPHADM rm-cluster --fsid $FSID --force
364
365 rm -rf $TMPDIR
366 echo PASS