]> git.proxmox.com Git - ceph.git/blame - ceph/qa/workunits/cephadm/test_cephadm.sh
import ceph pacific 16.2.5
[ceph.git] / ceph / qa / workunits / cephadm / test_cephadm.sh
CommitLineData
9f95a23c
TL
1#!/bin/bash -ex
2
3SCRIPT_NAME=$(basename ${BASH_SOURCE[0]})
4SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
5
1911f103
TL
6# cleanup during exit
7[ -z "$CLEANUP" ] && CLEANUP=true
8
9f95a23c
TL
9FSID='00000000-0000-0000-0000-0000deadbeef'
10
11# images that are used
f67539c2
TL
12IMAGE_MASTER=${IMAGE_MASTER:-'quay.ceph.io/ceph-ci/ceph:master'}
13IMAGE_PACIFIC=${IMAGE_PACIFIC:-'quay.ceph.io/ceph-ci/ceph:pacific'}
14#IMAGE_OCTOPUS=${IMAGE_OCTOPUS:-'quay.ceph.io/ceph-ci/ceph:octopus'}
15IMAGE_DEFAULT=${IMAGE_PACIFIC}
9f95a23c 16
9f95a23c
TL
17OSD_IMAGE_NAME="${SCRIPT_NAME%.*}_osd.img"
18OSD_IMAGE_SIZE='6G'
19OSD_TO_CREATE=2
20OSD_VG_NAME=${SCRIPT_NAME%.*}
21OSD_LV_NAME=${SCRIPT_NAME%.*}
22
23CEPHADM_SRC_DIR=${SCRIPT_DIR}/../../../src/cephadm
24CEPHADM_SAMPLES_DIR=${CEPHADM_SRC_DIR}/samples
25
26[ -z "$SUDO" ] && SUDO=sudo
27
28if [ -z "$CEPHADM" ]; then
29 CEPHADM=${CEPHADM_SRC_DIR}/cephadm
30fi
31
32# at this point, we need $CEPHADM set
33if ! [ -x "$CEPHADM" ]; then
34 echo "cephadm not found. Please set \$CEPHADM"
35 exit 1
36fi
37
9f95a23c 38# add image to args
f67539c2 39CEPHADM_ARGS="$CEPHADM_ARGS --image $IMAGE_DEFAULT"
9f95a23c
TL
40
41# combine into a single var
42CEPHADM_BIN="$CEPHADM"
43CEPHADM="$SUDO $CEPHADM_BIN $CEPHADM_ARGS"
44
45# clean up previous run(s)?
46$CEPHADM rm-cluster --fsid $FSID --force
47$SUDO vgchange -an $OSD_VG_NAME || true
48loopdev=$($SUDO losetup -a | grep $(basename $OSD_IMAGE_NAME) | awk -F : '{print $1}')
49if ! [ "$loopdev" = "" ]; then
50 $SUDO losetup -d $loopdev
51fi
52
1911f103
TL
53# TMPDIR for test data
54[ -d "$TMPDIR" ] || TMPDIR=$(mktemp -d tmp.$SCRIPT_NAME.XXXXXX)
f91f0fd5 55[ -d "$TMPDIR_TEST_MULTIPLE_MOUNTS" ] || TMPDIR_TEST_MULTIPLE_MOUNTS=$(mktemp -d tmp.$SCRIPT_NAME.XXXXXX)
1911f103
TL
56
57function cleanup()
58{
59 if [ $CLEANUP = false ]; then
60 # preserve the TMPDIR state
61 echo "========================"
62 echo "!!! CLEANUP=$CLEANUP !!!"
63 echo
64 echo "TMPDIR=$TMPDIR"
65 echo "========================"
66 return
67 fi
68
69 dump_all_logs $FSID
70 rm -rf $TMPDIR
71}
72trap cleanup EXIT
73
9f95a23c
TL
74function expect_false()
75{
76 set -x
77 if eval "$@"; then return 1; else return 0; fi
78}
79
80function is_available()
81{
82 local name="$1"
83 local condition="$2"
84 local tries="$3"
85
86 local num=0
87 while ! eval "$condition"; do
88 num=$(($num + 1))
89 if [ "$num" -ge $tries ]; then
90 echo "$name is not available"
91 false
92 fi
93 sleep 5
94 done
95
96 echo "$name is available"
97 true
98}
99
100function dump_log()
101{
1911f103
TL
102 local fsid="$1"
103 local name="$2"
104 local num_lines="$3"
9f95a23c
TL
105
106 if [ -z $num_lines ]; then
107 num_lines=100
108 fi
109
110 echo '-------------------------'
111 echo 'dump daemon log:' $name
112 echo '-------------------------'
113
1911f103 114 $CEPHADM logs --fsid $fsid --name $name -- --no-pager -n $num_lines
9f95a23c
TL
115}
116
117function dump_all_logs()
118{
1911f103
TL
119 local fsid="$1"
120 local names=$($CEPHADM ls | jq -r '.[] | select(.fsid == "'$fsid'").name')
9f95a23c
TL
121
122 echo 'dumping logs for daemons: ' $names
123 for name in $names; do
1911f103 124 dump_log $fsid $name
9f95a23c
TL
125 done
126}
127
128function nfs_stop()
129{
130 # stop the running nfs server
131 local units="nfs-server nfs-kernel-server"
132 for unit in $units; do
f67539c2 133 if systemctl status $unit < /dev/null; then
9f95a23c
TL
134 $SUDO systemctl stop $unit
135 fi
136 done
137
138 # ensure the NFS port is no longer in use
139 expect_false "$SUDO ss -tlnp '( sport = :nfs )' | grep LISTEN"
140}
141
142## prepare + check host
143$SUDO $CEPHADM check-host
144
f91f0fd5
TL
145## run a gather-facts (output to stdout)
146$SUDO $CEPHADM gather-facts
147
9f95a23c 148## version + --image
f67539c2
TL
149$SUDO CEPHADM_IMAGE=$IMAGE_PACIFIC $CEPHADM_BIN version
150$SUDO CEPHADM_IMAGE=$IMAGE_PACIFIC $CEPHADM_BIN version \
151 | grep 'ceph version 16'
152#$SUDO CEPHADM_IMAGE=$IMAGE_OCTOPUS $CEPHADM_BIN version
153#$SUDO CEPHADM_IMAGE=$IMAGE_OCTOPUS $CEPHADM_BIN version \
154# | grep 'ceph version 15'
9f95a23c
TL
155$SUDO $CEPHADM_BIN --image $IMAGE_MASTER version | grep 'ceph version'
156
157# try force docker; this won't work if docker isn't installed
f67539c2 158systemctl status docker > /dev/null && ( $CEPHADM --docker version | grep 'ceph version' ) || echo "docker not installed"
9f95a23c
TL
159
160## test shell before bootstrap, when crash dir isn't (yet) present on this host
161$CEPHADM shell --fsid $FSID -- ceph -v | grep 'ceph version'
162$CEPHADM shell --fsid $FSID -e FOO=BAR -- printenv | grep FOO=BAR
163
164## bootstrap
165ORIG_CONFIG=`mktemp -p $TMPDIR`
166CONFIG=`mktemp -p $TMPDIR`
167MONCONFIG=`mktemp -p $TMPDIR`
168KEYRING=`mktemp -p $TMPDIR`
169IP=127.0.0.1
170cat <<EOF > $ORIG_CONFIG
171[global]
172 log to file = true
173 osd crush chooseleaf type = 0
174EOF
175$CEPHADM bootstrap \
176 --mon-id a \
177 --mgr-id x \
178 --mon-ip $IP \
179 --fsid $FSID \
180 --config $ORIG_CONFIG \
181 --output-config $CONFIG \
182 --output-keyring $KEYRING \
183 --output-pub-ssh-key $TMPDIR/ceph.pub \
184 --allow-overwrite \
185 --skip-mon-network \
f67539c2
TL
186 --skip-monitoring-stack \
187 --with-exporter
9f95a23c
TL
188test -e $CONFIG
189test -e $KEYRING
190rm -f $ORIG_CONFIG
191
192$SUDO test -e /var/log/ceph/$FSID/ceph-mon.a.log
193$SUDO test -e /var/log/ceph/$FSID/ceph-mgr.x.log
194
195for u in ceph.target \
196 ceph-$FSID.target \
197 ceph-$FSID@mon.a \
198 ceph-$FSID@mgr.x; do
199 systemctl is-enabled $u
200 systemctl is-active $u
201done
202systemctl | grep system-ceph | grep -q .slice # naming is escaped and annoying
203
204# check ceph -s works (via shell w/ passed config/keyring)
205$CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
206 ceph -s | grep $FSID
207
208for t in mon mgr node-exporter prometheus grafana; do
209 $CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
210 ceph orch apply $t --unmanaged
211done
212
213## ls
214$CEPHADM ls | jq '.[]' | jq 'select(.name == "mon.a").fsid' \
215 | grep $FSID
216$CEPHADM ls | jq '.[]' | jq 'select(.name == "mgr.x").fsid' \
217 | grep $FSID
218
219# make sure the version is returned correctly
220$CEPHADM ls | jq '.[]' | jq 'select(.name == "mon.a").version' | grep -q \\.
221
222## deploy
223# add mon.b
224cp $CONFIG $MONCONFIG
801d1391 225echo "public addrv = [v2:$IP:3301,v1:$IP:6790]" >> $MONCONFIG
9f95a23c
TL
226$CEPHADM deploy --name mon.b \
227 --fsid $FSID \
228 --keyring /var/lib/ceph/$FSID/mon.a/keyring \
801d1391 229 --config $MONCONFIG
9f95a23c
TL
230for u in ceph-$FSID@mon.b; do
231 systemctl is-enabled $u
232 systemctl is-active $u
233done
801d1391
TL
234cond="$CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
235 ceph mon stat | grep '2 mons'"
236is_available "mon.b" "$cond" 30
9f95a23c
TL
237
238# add mgr.y
239$CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
240 ceph auth get-or-create mgr.y \
241 mon 'allow profile mgr' \
242 osd 'allow *' \
243 mds 'allow *' > $TMPDIR/keyring.mgr.y
244$CEPHADM deploy --name mgr.y \
245 --fsid $FSID \
246 --keyring $TMPDIR/keyring.mgr.y \
247 --config $CONFIG
248for u in ceph-$FSID@mgr.y; do
249 systemctl is-enabled $u
250 systemctl is-active $u
251done
252
253for f in `seq 1 30`; do
254 if $CEPHADM shell --fsid $FSID \
255 --config $CONFIG --keyring $KEYRING -- \
256 ceph -s -f json-pretty \
257 | jq '.mgrmap.num_standbys' | grep -q 1 ; then break; fi
258 sleep 1
259done
260$CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
261 ceph -s -f json-pretty \
262 | jq '.mgrmap.num_standbys' | grep -q 1
263
264# add osd.{1,2,..}
265dd if=/dev/zero of=$TMPDIR/$OSD_IMAGE_NAME bs=1 count=0 seek=$OSD_IMAGE_SIZE
266loop_dev=$($SUDO losetup -f)
267$SUDO vgremove -f $OSD_VG_NAME || true
268$SUDO losetup $loop_dev $TMPDIR/$OSD_IMAGE_NAME
269$SUDO pvcreate $loop_dev && $SUDO vgcreate $OSD_VG_NAME $loop_dev
801d1391
TL
270
271# osd boostrap keyring
272$CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
273 ceph auth get client.bootstrap-osd > $TMPDIR/keyring.bootstrap.osd
274
275# create lvs first so ceph-volume doesn't overlap with lv creation
9f95a23c
TL
276for id in `seq 0 $((--OSD_TO_CREATE))`; do
277 $SUDO lvcreate -l $((100/$OSD_TO_CREATE))%VG -n $OSD_LV_NAME.$id $OSD_VG_NAME
801d1391
TL
278done
279
280for id in `seq 0 $((--OSD_TO_CREATE))`; do
281 device_name=/dev/$OSD_VG_NAME/$OSD_LV_NAME.$id
1911f103
TL
282 CEPH_VOLUME="$CEPHADM ceph-volume \
283 --fsid $FSID \
284 --config $CONFIG \
285 --keyring $TMPDIR/keyring.bootstrap.osd --"
801d1391
TL
286
287 # prepare the osd
1911f103
TL
288 $CEPH_VOLUME lvm prepare --bluestore --data $device_name --no-systemd
289 $CEPH_VOLUME lvm batch --no-auto $device_name --yes --no-systemd
801d1391
TL
290
291 # osd id and osd fsid
1911f103 292 $CEPH_VOLUME lvm list --format json $device_name > $TMPDIR/osd.map
801d1391
TL
293 osd_id=$($SUDO cat $TMPDIR/osd.map | jq -cr '.. | ."ceph.osd_id"? | select(.)')
294 osd_fsid=$($SUDO cat $TMPDIR/osd.map | jq -cr '.. | ."ceph.osd_fsid"? | select(.)')
295
296 # deploy the osd
297 $CEPHADM deploy --name osd.$osd_id \
298 --fsid $FSID \
299 --keyring $TMPDIR/keyring.bootstrap.osd \
300 --config $CONFIG \
301 --osd-fsid $osd_fsid
9f95a23c
TL
302done
303
304# add node-exporter
f67539c2 305${CEPHADM//--image $IMAGE_DEFAULT/} deploy \
9f95a23c
TL
306 --name node-exporter.a --fsid $FSID
307cond="curl 'http://localhost:9100' | grep -q 'Node Exporter'"
e306af50 308is_available "node-exporter" "$cond" 10
9f95a23c
TL
309
310# add prometheus
311cat ${CEPHADM_SAMPLES_DIR}/prometheus.json | \
f67539c2 312 ${CEPHADM//--image $IMAGE_DEFAULT/} deploy \
9f95a23c
TL
313 --name prometheus.a --fsid $FSID --config-json -
314cond="curl 'localhost:9095/api/v1/query?query=up'"
e306af50 315is_available "prometheus" "$cond" 10
9f95a23c
TL
316
317# add grafana
318cat ${CEPHADM_SAMPLES_DIR}/grafana.json | \
f67539c2 319 ${CEPHADM//--image $IMAGE_DEFAULT/} deploy \
9f95a23c
TL
320 --name grafana.a --fsid $FSID --config-json -
321cond="curl --insecure 'https://localhost:3000' | grep -q 'grafana'"
e306af50 322is_available "grafana" "$cond" 50
9f95a23c
TL
323
324# add nfs-ganesha
325nfs_stop
326nfs_rados_pool=$(cat ${CEPHADM_SAMPLES_DIR}/nfs.json | jq -r '.["pool"]')
327$CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
328 ceph osd pool create $nfs_rados_pool 64
329$CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
330 rados --pool nfs-ganesha --namespace nfs-ns create conf-nfs.a
331$CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
332 ceph orch pause
333$CEPHADM deploy --name nfs.a \
334 --fsid $FSID \
335 --keyring $KEYRING \
336 --config $CONFIG \
337 --config-json ${CEPHADM_SAMPLES_DIR}/nfs.json
338cond="$SUDO ss -tlnp '( sport = :nfs )' | grep 'ganesha.nfsd'"
339is_available "nfs" "$cond" 10
340$CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
341 ceph orch resume
342
f91f0fd5
TL
343# add alertmanager via custom container
344alertmanager_image=$(cat ${CEPHADM_SAMPLES_DIR}/custom_container.json | jq -r '.image')
345tcp_ports=$(cat ${CEPHADM_SAMPLES_DIR}/custom_container.json | jq -r '.ports | map_values(.|tostring) | join(" ")')
346cat ${CEPHADM_SAMPLES_DIR}/custom_container.json | \
f67539c2 347 ${CEPHADM//--image $IMAGE_DEFAULT/} \
f91f0fd5
TL
348 --image $alertmanager_image \
349 deploy \
350 --tcp-ports "$tcp_ports" \
351 --name container.alertmanager.a \
352 --fsid $FSID \
353 --config-json -
354cond="$CEPHADM enter --fsid $FSID --name container.alertmanager.a -- test -f \
355 /etc/alertmanager/alertmanager.yml"
356is_available "alertmanager.yml" "$cond" 10
357cond="curl 'http://localhost:9093' | grep -q 'Alertmanager'"
358is_available "alertmanager" "$cond" 10
359
f67539c2
TL
360# Fetch the token we need to access the exporter API
361token=$($CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING ceph cephadm get-exporter-config | jq -r '.token')
362[[ ! -z "$token" ]]
363
364# check all exporter threads active
365cond="curl -k -s -H \"Authorization: Bearer $token\" \
366 https://localhost:9443/v1/metadata/health | \
367 jq -r '.tasks | select(.disks == \"active\" and .daemons == \"active\" and .host == \"active\")'"
368is_available "exporter_threads_active" "$cond" 3
369
370# check we deployed for all hosts
371$CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING ceph orch ls --service-type cephadm-exporter --format json
372host_pattern=$($CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING ceph orch ls --service-type cephadm-exporter --format json | jq -r '.[0].placement.host_pattern')
373[[ "$host_pattern" = "*" ]]
374
9f95a23c
TL
375## run
376# WRITE ME
377
378## unit
379$CEPHADM unit --fsid $FSID --name mon.a -- is-enabled
380$CEPHADM unit --fsid $FSID --name mon.a -- is-active
381expect_false $CEPHADM unit --fsid $FSID --name mon.xyz -- is-active
382$CEPHADM unit --fsid $FSID --name mon.a -- disable
383expect_false $CEPHADM unit --fsid $FSID --name mon.a -- is-enabled
384$CEPHADM unit --fsid $FSID --name mon.a -- enable
385$CEPHADM unit --fsid $FSID --name mon.a -- is-enabled
386
387## shell
388$CEPHADM shell --fsid $FSID -- true
389$CEPHADM shell --fsid $FSID -- test -d /var/log/ceph
1911f103
TL
390expect_false $CEPHADM --timeout 10 shell --fsid $FSID -- sleep 60
391$CEPHADM --timeout 60 shell --fsid $FSID -- sleep 10
f91f0fd5 392$CEPHADM shell --fsid $FSID --mount $TMPDIR $TMPDIR_TEST_MULTIPLE_MOUNTS -- stat /mnt/$(basename $TMPDIR)
9f95a23c
TL
393
394## enter
395expect_false $CEPHADM enter
396$CEPHADM enter --fsid $FSID --name mon.a -- test -d /var/lib/ceph/mon/ceph-a
397$CEPHADM enter --fsid $FSID --name mgr.x -- test -d /var/lib/ceph/mgr/ceph-x
398$CEPHADM enter --fsid $FSID --name mon.a -- pidof ceph-mon
399expect_false $CEPHADM enter --fsid $FSID --name mgr.x -- pidof ceph-mon
400$CEPHADM enter --fsid $FSID --name mgr.x -- pidof ceph-mgr
401# this triggers a bug in older versions of podman, including 18.04's 1.6.2
1911f103
TL
402#expect_false $CEPHADM --timeout 5 enter --fsid $FSID --name mon.a -- sleep 30
403$CEPHADM --timeout 60 enter --fsid $FSID --name mon.a -- sleep 10
9f95a23c
TL
404
405## ceph-volume
406$CEPHADM ceph-volume --fsid $FSID -- inventory --format=json \
407 | jq '.[]'
408
1911f103
TL
409## preserve test state
410[ $CLEANUP = false ] && exit 0
411
9f95a23c
TL
412## rm-daemon
413# mon and osd require --force
414expect_false $CEPHADM rm-daemon --fsid $FSID --name mon.a
415# mgr does not
416$CEPHADM rm-daemon --fsid $FSID --name mgr.x
417
b3b6e05e
TL
418expect_false $CEPHADM zap-osds --fsid $FSID
419$CEPHADM zap-osds --fsid $FSID --force
420
9f95a23c 421## rm-cluster
b3b6e05e
TL
422expect_false $CEPHADM rm-cluster --fsid $FSID --zap-osds
423$CEPHADM rm-cluster --fsid $FSID --force --zap-osds
9f95a23c 424
9f95a23c 425echo PASS