]> git.proxmox.com Git - ceph.git/blame - ceph/qa/workunits/cephadm/test_cephadm.sh
import 15.2.2 octopus source
[ceph.git] / ceph / qa / workunits / cephadm / test_cephadm.sh
CommitLineData
9f95a23c
TL
1#!/bin/bash -ex
2
3SCRIPT_NAME=$(basename ${BASH_SOURCE[0]})
4SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
5
1911f103
TL
6# cleanup during exit
7[ -z "$CLEANUP" ] && CLEANUP=true
8
9f95a23c
TL
9FSID='00000000-0000-0000-0000-0000deadbeef'
10
11# images that are used
1911f103 12IMAGE_MASTER=${IMAGE_MASTER:-'docker.io/ceph/daemon-base:latest-octopus'}
9f95a23c
TL
13IMAGE_NAUTILUS=${IMAGE_NAUTILUS:-'docker.io/ceph/daemon-base:latest-nautilus'}
14IMAGE_MIMIC=${IMAGE_MIMIC:-'docker.io/ceph/daemon-base:latest-mimic'}
15
9f95a23c
TL
16OSD_IMAGE_NAME="${SCRIPT_NAME%.*}_osd.img"
17OSD_IMAGE_SIZE='6G'
18OSD_TO_CREATE=2
19OSD_VG_NAME=${SCRIPT_NAME%.*}
20OSD_LV_NAME=${SCRIPT_NAME%.*}
21
22CEPHADM_SRC_DIR=${SCRIPT_DIR}/../../../src/cephadm
23CEPHADM_SAMPLES_DIR=${CEPHADM_SRC_DIR}/samples
24
25[ -z "$SUDO" ] && SUDO=sudo
26
27if [ -z "$CEPHADM" ]; then
28 CEPHADM=${CEPHADM_SRC_DIR}/cephadm
29fi
30
31# at this point, we need $CEPHADM set
32if ! [ -x "$CEPHADM" ]; then
33 echo "cephadm not found. Please set \$CEPHADM"
34 exit 1
35fi
36
37# respawn ourselves with a shebang
38if [ -z "$PYTHON_KLUDGE" ]; then
39 # see which pythons we should test with
40 PYTHONS=""
41 which python3 && PYTHONS="$PYTHONS python3"
42 which python2 && PYTHONS="$PYTHONS python2"
43 echo "PYTHONS $PYTHONS"
44 if [ -z "$PYTHONS" ]; then
45 echo "No PYTHONS found!"
46 exit 1
47 fi
48
49 TMPBINDIR=$(mktemp -d)
50 trap "rm -rf $TMPBINDIR" EXIT
51 ORIG_CEPHADM="$CEPHADM"
52 CEPHADM="$TMPBINDIR/cephadm"
53 for p in $PYTHONS; do
54 echo "=== re-running with $p ==="
55 ln -s `which $p` $TMPBINDIR/python
56 echo "#!$TMPBINDIR/python" > $CEPHADM
57 cat $ORIG_CEPHADM >> $CEPHADM
58 chmod 700 $CEPHADM
59 $TMPBINDIR/python --version
60 PYTHON_KLUDGE=1 CEPHADM=$CEPHADM $0
61 rm $TMPBINDIR/python
62 done
63 rm -rf $TMPBINDIR
64 echo "PASS with all of: $PYTHONS"
65 exit 0
66fi
67
68# add image to args
69CEPHADM_ARGS="$CEPHADM_ARGS --image $IMAGE_MASTER"
70
71# combine into a single var
72CEPHADM_BIN="$CEPHADM"
73CEPHADM="$SUDO $CEPHADM_BIN $CEPHADM_ARGS"
74
75# clean up previous run(s)?
76$CEPHADM rm-cluster --fsid $FSID --force
77$SUDO vgchange -an $OSD_VG_NAME || true
78loopdev=$($SUDO losetup -a | grep $(basename $OSD_IMAGE_NAME) | awk -F : '{print $1}')
79if ! [ "$loopdev" = "" ]; then
80 $SUDO losetup -d $loopdev
81fi
82
1911f103
TL
83# TMPDIR for test data
84[ -d "$TMPDIR" ] || TMPDIR=$(mktemp -d tmp.$SCRIPT_NAME.XXXXXX)
85
86function cleanup()
87{
88 if [ $CLEANUP = false ]; then
89 # preserve the TMPDIR state
90 echo "========================"
91 echo "!!! CLEANUP=$CLEANUP !!!"
92 echo
93 echo "TMPDIR=$TMPDIR"
94 echo "========================"
95 return
96 fi
97
98 dump_all_logs $FSID
99 rm -rf $TMPDIR
100}
101trap cleanup EXIT
102
9f95a23c
TL
103function expect_false()
104{
105 set -x
106 if eval "$@"; then return 1; else return 0; fi
107}
108
109function is_available()
110{
111 local name="$1"
112 local condition="$2"
113 local tries="$3"
114
115 local num=0
116 while ! eval "$condition"; do
117 num=$(($num + 1))
118 if [ "$num" -ge $tries ]; then
119 echo "$name is not available"
120 false
121 fi
122 sleep 5
123 done
124
125 echo "$name is available"
126 true
127}
128
129function dump_log()
130{
1911f103
TL
131 local fsid="$1"
132 local name="$2"
133 local num_lines="$3"
9f95a23c
TL
134
135 if [ -z $num_lines ]; then
136 num_lines=100
137 fi
138
139 echo '-------------------------'
140 echo 'dump daemon log:' $name
141 echo '-------------------------'
142
1911f103 143 $CEPHADM logs --fsid $fsid --name $name -- --no-pager -n $num_lines
9f95a23c
TL
144}
145
146function dump_all_logs()
147{
1911f103
TL
148 local fsid="$1"
149 local names=$($CEPHADM ls | jq -r '.[] | select(.fsid == "'$fsid'").name')
9f95a23c
TL
150
151 echo 'dumping logs for daemons: ' $names
152 for name in $names; do
1911f103 153 dump_log $fsid $name
9f95a23c
TL
154 done
155}
156
157function nfs_stop()
158{
159 # stop the running nfs server
160 local units="nfs-server nfs-kernel-server"
161 for unit in $units; do
162 if systemctl status $unit; then
163 $SUDO systemctl stop $unit
164 fi
165 done
166
167 # ensure the NFS port is no longer in use
168 expect_false "$SUDO ss -tlnp '( sport = :nfs )' | grep LISTEN"
169}
170
171## prepare + check host
172$SUDO $CEPHADM check-host
173
174## version + --image
175$SUDO CEPHADM_IMAGE=$IMAGE_NAUTILUS $CEPHADM_BIN version
176$SUDO CEPHADM_IMAGE=$IMAGE_NAUTILUS $CEPHADM_BIN version \
177 | grep 'ceph version 14'
178$SUDO $CEPHADM_BIN --image $IMAGE_MIMIC version
179$SUDO $CEPHADM_BIN --image $IMAGE_MIMIC version \
180 | grep 'ceph version 13'
181$SUDO $CEPHADM_BIN --image $IMAGE_MASTER version | grep 'ceph version'
182
183# try force docker; this won't work if docker isn't installed
184systemctl status docker && ( $CEPHADM --docker version | grep 'ceph version' )
185
186## test shell before bootstrap, when crash dir isn't (yet) present on this host
187$CEPHADM shell --fsid $FSID -- ceph -v | grep 'ceph version'
188$CEPHADM shell --fsid $FSID -e FOO=BAR -- printenv | grep FOO=BAR
189
190## bootstrap
191ORIG_CONFIG=`mktemp -p $TMPDIR`
192CONFIG=`mktemp -p $TMPDIR`
193MONCONFIG=`mktemp -p $TMPDIR`
194KEYRING=`mktemp -p $TMPDIR`
195IP=127.0.0.1
196cat <<EOF > $ORIG_CONFIG
197[global]
198 log to file = true
199 osd crush chooseleaf type = 0
200EOF
201$CEPHADM bootstrap \
202 --mon-id a \
203 --mgr-id x \
204 --mon-ip $IP \
205 --fsid $FSID \
206 --config $ORIG_CONFIG \
207 --output-config $CONFIG \
208 --output-keyring $KEYRING \
209 --output-pub-ssh-key $TMPDIR/ceph.pub \
210 --allow-overwrite \
211 --skip-mon-network \
212 --skip-monitoring-stack
213test -e $CONFIG
214test -e $KEYRING
215rm -f $ORIG_CONFIG
216
217$SUDO test -e /var/log/ceph/$FSID/ceph-mon.a.log
218$SUDO test -e /var/log/ceph/$FSID/ceph-mgr.x.log
219
220for u in ceph.target \
221 ceph-$FSID.target \
222 ceph-$FSID@mon.a \
223 ceph-$FSID@mgr.x; do
224 systemctl is-enabled $u
225 systemctl is-active $u
226done
227systemctl | grep system-ceph | grep -q .slice # naming is escaped and annoying
228
229# check ceph -s works (via shell w/ passed config/keyring)
230$CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
231 ceph -s | grep $FSID
232
233for t in mon mgr node-exporter prometheus grafana; do
234 $CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
235 ceph orch apply $t --unmanaged
236done
237
238## ls
239$CEPHADM ls | jq '.[]' | jq 'select(.name == "mon.a").fsid' \
240 | grep $FSID
241$CEPHADM ls | jq '.[]' | jq 'select(.name == "mgr.x").fsid' \
242 | grep $FSID
243
244# make sure the version is returned correctly
245$CEPHADM ls | jq '.[]' | jq 'select(.name == "mon.a").version' | grep -q \\.
246
247## deploy
248# add mon.b
249cp $CONFIG $MONCONFIG
801d1391 250echo "public addrv = [v2:$IP:3301,v1:$IP:6790]" >> $MONCONFIG
9f95a23c
TL
251$CEPHADM deploy --name mon.b \
252 --fsid $FSID \
253 --keyring /var/lib/ceph/$FSID/mon.a/keyring \
801d1391 254 --config $MONCONFIG
9f95a23c
TL
255for u in ceph-$FSID@mon.b; do
256 systemctl is-enabled $u
257 systemctl is-active $u
258done
801d1391
TL
259cond="$CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
260 ceph mon stat | grep '2 mons'"
261is_available "mon.b" "$cond" 30
9f95a23c
TL
262
263# add mgr.y
264$CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
265 ceph auth get-or-create mgr.y \
266 mon 'allow profile mgr' \
267 osd 'allow *' \
268 mds 'allow *' > $TMPDIR/keyring.mgr.y
269$CEPHADM deploy --name mgr.y \
270 --fsid $FSID \
271 --keyring $TMPDIR/keyring.mgr.y \
272 --config $CONFIG
273for u in ceph-$FSID@mgr.y; do
274 systemctl is-enabled $u
275 systemctl is-active $u
276done
277
278for f in `seq 1 30`; do
279 if $CEPHADM shell --fsid $FSID \
280 --config $CONFIG --keyring $KEYRING -- \
281 ceph -s -f json-pretty \
282 | jq '.mgrmap.num_standbys' | grep -q 1 ; then break; fi
283 sleep 1
284done
285$CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
286 ceph -s -f json-pretty \
287 | jq '.mgrmap.num_standbys' | grep -q 1
288
289# add osd.{1,2,..}
290dd if=/dev/zero of=$TMPDIR/$OSD_IMAGE_NAME bs=1 count=0 seek=$OSD_IMAGE_SIZE
291loop_dev=$($SUDO losetup -f)
292$SUDO vgremove -f $OSD_VG_NAME || true
293$SUDO losetup $loop_dev $TMPDIR/$OSD_IMAGE_NAME
294$SUDO pvcreate $loop_dev && $SUDO vgcreate $OSD_VG_NAME $loop_dev
801d1391
TL
295
296# osd boostrap keyring
297$CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
298 ceph auth get client.bootstrap-osd > $TMPDIR/keyring.bootstrap.osd
299
300# create lvs first so ceph-volume doesn't overlap with lv creation
9f95a23c
TL
301for id in `seq 0 $((--OSD_TO_CREATE))`; do
302 $SUDO lvcreate -l $((100/$OSD_TO_CREATE))%VG -n $OSD_LV_NAME.$id $OSD_VG_NAME
801d1391
TL
303done
304
305for id in `seq 0 $((--OSD_TO_CREATE))`; do
306 device_name=/dev/$OSD_VG_NAME/$OSD_LV_NAME.$id
1911f103
TL
307 CEPH_VOLUME="$CEPHADM ceph-volume \
308 --fsid $FSID \
309 --config $CONFIG \
310 --keyring $TMPDIR/keyring.bootstrap.osd --"
801d1391
TL
311
312 # prepare the osd
1911f103
TL
313 $CEPH_VOLUME lvm prepare --bluestore --data $device_name --no-systemd
314 $CEPH_VOLUME lvm batch --no-auto $device_name --yes --no-systemd
801d1391
TL
315
316 # osd id and osd fsid
1911f103 317 $CEPH_VOLUME lvm list --format json $device_name > $TMPDIR/osd.map
801d1391
TL
318 osd_id=$($SUDO cat $TMPDIR/osd.map | jq -cr '.. | ."ceph.osd_id"? | select(.)')
319 osd_fsid=$($SUDO cat $TMPDIR/osd.map | jq -cr '.. | ."ceph.osd_fsid"? | select(.)')
320
321 # deploy the osd
322 $CEPHADM deploy --name osd.$osd_id \
323 --fsid $FSID \
324 --keyring $TMPDIR/keyring.bootstrap.osd \
325 --config $CONFIG \
326 --osd-fsid $osd_fsid
9f95a23c
TL
327done
328
329# add node-exporter
330${CEPHADM//--image $IMAGE_MASTER/} deploy \
331 --name node-exporter.a --fsid $FSID
332cond="curl 'http://localhost:9100' | grep -q 'Node Exporter'"
333is_available "node-exporter" "$cond" 5
334
335# add prometheus
336cat ${CEPHADM_SAMPLES_DIR}/prometheus.json | \
337 ${CEPHADM//--image $IMAGE_MASTER/} deploy \
338 --name prometheus.a --fsid $FSID --config-json -
339cond="curl 'localhost:9095/api/v1/query?query=up'"
340is_available "prometheus" "$cond" 5
341
342# add grafana
343cat ${CEPHADM_SAMPLES_DIR}/grafana.json | \
344 ${CEPHADM//--image $IMAGE_MASTER/} deploy \
345 --name grafana.a --fsid $FSID --config-json -
346cond="curl --insecure 'https://localhost:3000' | grep -q 'grafana'"
347is_available "grafana" "$cond" 30
348
349# add nfs-ganesha
350nfs_stop
351nfs_rados_pool=$(cat ${CEPHADM_SAMPLES_DIR}/nfs.json | jq -r '.["pool"]')
352$CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
353 ceph osd pool create $nfs_rados_pool 64
354$CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
355 rados --pool nfs-ganesha --namespace nfs-ns create conf-nfs.a
356$CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
357 ceph orch pause
358$CEPHADM deploy --name nfs.a \
359 --fsid $FSID \
360 --keyring $KEYRING \
361 --config $CONFIG \
362 --config-json ${CEPHADM_SAMPLES_DIR}/nfs.json
363cond="$SUDO ss -tlnp '( sport = :nfs )' | grep 'ganesha.nfsd'"
364is_available "nfs" "$cond" 10
365$CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
366 ceph orch resume
367
368## run
369# WRITE ME
370
371## unit
372$CEPHADM unit --fsid $FSID --name mon.a -- is-enabled
373$CEPHADM unit --fsid $FSID --name mon.a -- is-active
374expect_false $CEPHADM unit --fsid $FSID --name mon.xyz -- is-active
375$CEPHADM unit --fsid $FSID --name mon.a -- disable
376expect_false $CEPHADM unit --fsid $FSID --name mon.a -- is-enabled
377$CEPHADM unit --fsid $FSID --name mon.a -- enable
378$CEPHADM unit --fsid $FSID --name mon.a -- is-enabled
379
380## shell
381$CEPHADM shell --fsid $FSID -- true
382$CEPHADM shell --fsid $FSID -- test -d /var/log/ceph
1911f103
TL
383expect_false $CEPHADM --timeout 10 shell --fsid $FSID -- sleep 60
384$CEPHADM --timeout 60 shell --fsid $FSID -- sleep 10
9f95a23c
TL
385
386## enter
387expect_false $CEPHADM enter
388$CEPHADM enter --fsid $FSID --name mon.a -- test -d /var/lib/ceph/mon/ceph-a
389$CEPHADM enter --fsid $FSID --name mgr.x -- test -d /var/lib/ceph/mgr/ceph-x
390$CEPHADM enter --fsid $FSID --name mon.a -- pidof ceph-mon
391expect_false $CEPHADM enter --fsid $FSID --name mgr.x -- pidof ceph-mon
392$CEPHADM enter --fsid $FSID --name mgr.x -- pidof ceph-mgr
393# this triggers a bug in older versions of podman, including 18.04's 1.6.2
1911f103
TL
394#expect_false $CEPHADM --timeout 5 enter --fsid $FSID --name mon.a -- sleep 30
395$CEPHADM --timeout 60 enter --fsid $FSID --name mon.a -- sleep 10
9f95a23c
TL
396
397## ceph-volume
398$CEPHADM ceph-volume --fsid $FSID -- inventory --format=json \
399 | jq '.[]'
400
1911f103
TL
401## preserve test state
402[ $CLEANUP = false ] && exit 0
403
9f95a23c
TL
404## rm-daemon
405# mon and osd require --force
406expect_false $CEPHADM rm-daemon --fsid $FSID --name mon.a
407# mgr does not
408$CEPHADM rm-daemon --fsid $FSID --name mgr.x
409
410## rm-cluster
411expect_false $CEPHADM rm-cluster --fsid $FSID
412$CEPHADM rm-cluster --fsid $FSID --force
413
9f95a23c 414echo PASS