]> git.proxmox.com Git - ceph.git/blob - ceph/qa/workunits/cephadm/test_cephadm.sh
import 15.2.4
[ceph.git] / ceph / qa / workunits / cephadm / test_cephadm.sh
1 #!/bin/bash -ex
2
3 SCRIPT_NAME=$(basename ${BASH_SOURCE[0]})
4 SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
5
6 # cleanup during exit
7 [ -z "$CLEANUP" ] && CLEANUP=true
8
9 FSID='00000000-0000-0000-0000-0000deadbeef'
10
11 # images that are used
12 IMAGE_MASTER=${IMAGE_MASTER:-'docker.io/ceph/daemon-base:latest-octopus'}
13 IMAGE_NAUTILUS=${IMAGE_NAUTILUS:-'docker.io/ceph/daemon-base:latest-nautilus'}
14 IMAGE_MIMIC=${IMAGE_MIMIC:-'docker.io/ceph/daemon-base:latest-mimic'}
15
16 OSD_IMAGE_NAME="${SCRIPT_NAME%.*}_osd.img"
17 OSD_IMAGE_SIZE='6G'
18 OSD_TO_CREATE=2
19 OSD_VG_NAME=${SCRIPT_NAME%.*}
20 OSD_LV_NAME=${SCRIPT_NAME%.*}
21
22 CEPHADM_SRC_DIR=${SCRIPT_DIR}/../../../src/cephadm
23 CEPHADM_SAMPLES_DIR=${CEPHADM_SRC_DIR}/samples
24
25 [ -z "$SUDO" ] && SUDO=sudo
26
27 if [ -z "$CEPHADM" ]; then
28 CEPHADM=${CEPHADM_SRC_DIR}/cephadm
29 fi
30
31 # at this point, we need $CEPHADM set
32 if ! [ -x "$CEPHADM" ]; then
33 echo "cephadm not found. Please set \$CEPHADM"
34 exit 1
35 fi
36
37 # respawn ourselves with a shebang
38 if [ -z "$PYTHON_KLUDGE" ]; then
39 # see which pythons we should test with
40 PYTHONS=""
41 which python3 && PYTHONS="$PYTHONS python3"
42 which python2 && PYTHONS="$PYTHONS python2"
43 echo "PYTHONS $PYTHONS"
44 if [ -z "$PYTHONS" ]; then
45 echo "No PYTHONS found!"
46 exit 1
47 fi
48
49 TMPBINDIR=$(mktemp -d)
50 trap "rm -rf $TMPBINDIR" EXIT
51 ORIG_CEPHADM="$CEPHADM"
52 CEPHADM="$TMPBINDIR/cephadm"
53 for p in $PYTHONS; do
54 echo "=== re-running with $p ==="
55 ln -s `which $p` $TMPBINDIR/python
56 echo "#!$TMPBINDIR/python" > $CEPHADM
57 cat $ORIG_CEPHADM >> $CEPHADM
58 chmod 700 $CEPHADM
59 $TMPBINDIR/python --version
60 PYTHON_KLUDGE=1 CEPHADM=$CEPHADM $0
61 rm $TMPBINDIR/python
62 done
63 rm -rf $TMPBINDIR
64 echo "PASS with all of: $PYTHONS"
65 exit 0
66 fi
67
68 # add image to args
69 CEPHADM_ARGS="$CEPHADM_ARGS --image $IMAGE_MASTER"
70
71 # combine into a single var
72 CEPHADM_BIN="$CEPHADM"
73 CEPHADM="$SUDO $CEPHADM_BIN $CEPHADM_ARGS"
74
75 # clean up previous run(s)?
76 $CEPHADM rm-cluster --fsid $FSID --force
77 $SUDO vgchange -an $OSD_VG_NAME || true
78 loopdev=$($SUDO losetup -a | grep $(basename $OSD_IMAGE_NAME) | awk -F : '{print $1}')
79 if ! [ "$loopdev" = "" ]; then
80 $SUDO losetup -d $loopdev
81 fi
82
83 # TMPDIR for test data
84 [ -d "$TMPDIR" ] || TMPDIR=$(mktemp -d tmp.$SCRIPT_NAME.XXXXXX)
85
86 function cleanup()
87 {
88 if [ $CLEANUP = false ]; then
89 # preserve the TMPDIR state
90 echo "========================"
91 echo "!!! CLEANUP=$CLEANUP !!!"
92 echo
93 echo "TMPDIR=$TMPDIR"
94 echo "========================"
95 return
96 fi
97
98 dump_all_logs $FSID
99 rm -rf $TMPDIR
100 }
101 trap cleanup EXIT
102
103 function expect_false()
104 {
105 set -x
106 if eval "$@"; then return 1; else return 0; fi
107 }
108
109 function is_available()
110 {
111 local name="$1"
112 local condition="$2"
113 local tries="$3"
114
115 local num=0
116 while ! eval "$condition"; do
117 num=$(($num + 1))
118 if [ "$num" -ge $tries ]; then
119 echo "$name is not available"
120 false
121 fi
122 sleep 5
123 done
124
125 echo "$name is available"
126 true
127 }
128
129 function dump_log()
130 {
131 local fsid="$1"
132 local name="$2"
133 local num_lines="$3"
134
135 if [ -z $num_lines ]; then
136 num_lines=100
137 fi
138
139 echo '-------------------------'
140 echo 'dump daemon log:' $name
141 echo '-------------------------'
142
143 $CEPHADM logs --fsid $fsid --name $name -- --no-pager -n $num_lines
144 }
145
146 function dump_all_logs()
147 {
148 local fsid="$1"
149 local names=$($CEPHADM ls | jq -r '.[] | select(.fsid == "'$fsid'").name')
150
151 echo 'dumping logs for daemons: ' $names
152 for name in $names; do
153 dump_log $fsid $name
154 done
155 }
156
157 function nfs_stop()
158 {
159 # stop the running nfs server
160 local units="nfs-server nfs-kernel-server"
161 for unit in $units; do
162 if systemctl status $unit; then
163 $SUDO systemctl stop $unit
164 fi
165 done
166
167 # ensure the NFS port is no longer in use
168 expect_false "$SUDO ss -tlnp '( sport = :nfs )' | grep LISTEN"
169 }
170
171 ## prepare + check host
172 $SUDO $CEPHADM check-host
173
174 ## version + --image
175 $SUDO CEPHADM_IMAGE=$IMAGE_NAUTILUS $CEPHADM_BIN version
176 $SUDO CEPHADM_IMAGE=$IMAGE_NAUTILUS $CEPHADM_BIN version \
177 | grep 'ceph version 14'
178 $SUDO $CEPHADM_BIN --image $IMAGE_MIMIC version
179 $SUDO $CEPHADM_BIN --image $IMAGE_MIMIC version \
180 | grep 'ceph version 13'
181 $SUDO $CEPHADM_BIN --image $IMAGE_MASTER version | grep 'ceph version'
182
183 # try force docker; this won't work if docker isn't installed
184 systemctl status docker && ( $CEPHADM --docker version | grep 'ceph version' )
185
186 ## test shell before bootstrap, when crash dir isn't (yet) present on this host
187 $CEPHADM shell --fsid $FSID -- ceph -v | grep 'ceph version'
188 $CEPHADM shell --fsid $FSID -e FOO=BAR -- printenv | grep FOO=BAR
189
190 ## bootstrap
191 ORIG_CONFIG=`mktemp -p $TMPDIR`
192 CONFIG=`mktemp -p $TMPDIR`
193 MONCONFIG=`mktemp -p $TMPDIR`
194 KEYRING=`mktemp -p $TMPDIR`
195 IP=127.0.0.1
196 cat <<EOF > $ORIG_CONFIG
197 [global]
198 log to file = true
199 osd crush chooseleaf type = 0
200 EOF
201 $CEPHADM bootstrap \
202 --mon-id a \
203 --mgr-id x \
204 --mon-ip $IP \
205 --fsid $FSID \
206 --config $ORIG_CONFIG \
207 --output-config $CONFIG \
208 --output-keyring $KEYRING \
209 --output-pub-ssh-key $TMPDIR/ceph.pub \
210 --allow-overwrite \
211 --skip-mon-network \
212 --skip-monitoring-stack
213 test -e $CONFIG
214 test -e $KEYRING
215 rm -f $ORIG_CONFIG
216
217 $SUDO test -e /var/log/ceph/$FSID/ceph-mon.a.log
218 $SUDO test -e /var/log/ceph/$FSID/ceph-mgr.x.log
219
220 for u in ceph.target \
221 ceph-$FSID.target \
222 ceph-$FSID@mon.a \
223 ceph-$FSID@mgr.x; do
224 systemctl is-enabled $u
225 systemctl is-active $u
226 done
227 systemctl | grep system-ceph | grep -q .slice # naming is escaped and annoying
228
229 # check ceph -s works (via shell w/ passed config/keyring)
230 $CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
231 ceph -s | grep $FSID
232
233 for t in mon mgr node-exporter prometheus grafana; do
234 $CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
235 ceph orch apply $t --unmanaged
236 done
237
238 ## ls
239 $CEPHADM ls | jq '.[]' | jq 'select(.name == "mon.a").fsid' \
240 | grep $FSID
241 $CEPHADM ls | jq '.[]' | jq 'select(.name == "mgr.x").fsid' \
242 | grep $FSID
243
244 # make sure the version is returned correctly
245 $CEPHADM ls | jq '.[]' | jq 'select(.name == "mon.a").version' | grep -q \\.
246
247 ## deploy
248 # add mon.b
249 cp $CONFIG $MONCONFIG
250 echo "public addrv = [v2:$IP:3301,v1:$IP:6790]" >> $MONCONFIG
251 $CEPHADM deploy --name mon.b \
252 --fsid $FSID \
253 --keyring /var/lib/ceph/$FSID/mon.a/keyring \
254 --config $MONCONFIG
255 for u in ceph-$FSID@mon.b; do
256 systemctl is-enabled $u
257 systemctl is-active $u
258 done
259 cond="$CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
260 ceph mon stat | grep '2 mons'"
261 is_available "mon.b" "$cond" 30
262
263 # add mgr.y
264 $CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
265 ceph auth get-or-create mgr.y \
266 mon 'allow profile mgr' \
267 osd 'allow *' \
268 mds 'allow *' > $TMPDIR/keyring.mgr.y
269 $CEPHADM deploy --name mgr.y \
270 --fsid $FSID \
271 --keyring $TMPDIR/keyring.mgr.y \
272 --config $CONFIG
273 for u in ceph-$FSID@mgr.y; do
274 systemctl is-enabled $u
275 systemctl is-active $u
276 done
277
278 for f in `seq 1 30`; do
279 if $CEPHADM shell --fsid $FSID \
280 --config $CONFIG --keyring $KEYRING -- \
281 ceph -s -f json-pretty \
282 | jq '.mgrmap.num_standbys' | grep -q 1 ; then break; fi
283 sleep 1
284 done
285 $CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
286 ceph -s -f json-pretty \
287 | jq '.mgrmap.num_standbys' | grep -q 1
288
289 # add osd.{1,2,..}
290 dd if=/dev/zero of=$TMPDIR/$OSD_IMAGE_NAME bs=1 count=0 seek=$OSD_IMAGE_SIZE
291 loop_dev=$($SUDO losetup -f)
292 $SUDO vgremove -f $OSD_VG_NAME || true
293 $SUDO losetup $loop_dev $TMPDIR/$OSD_IMAGE_NAME
294 $SUDO pvcreate $loop_dev && $SUDO vgcreate $OSD_VG_NAME $loop_dev
295
296 # osd boostrap keyring
297 $CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
298 ceph auth get client.bootstrap-osd > $TMPDIR/keyring.bootstrap.osd
299
300 # create lvs first so ceph-volume doesn't overlap with lv creation
301 for id in `seq 0 $((--OSD_TO_CREATE))`; do
302 $SUDO lvcreate -l $((100/$OSD_TO_CREATE))%VG -n $OSD_LV_NAME.$id $OSD_VG_NAME
303 done
304
305 for id in `seq 0 $((--OSD_TO_CREATE))`; do
306 device_name=/dev/$OSD_VG_NAME/$OSD_LV_NAME.$id
307 CEPH_VOLUME="$CEPHADM ceph-volume \
308 --fsid $FSID \
309 --config $CONFIG \
310 --keyring $TMPDIR/keyring.bootstrap.osd --"
311
312 # prepare the osd
313 $CEPH_VOLUME lvm prepare --bluestore --data $device_name --no-systemd
314 $CEPH_VOLUME lvm batch --no-auto $device_name --yes --no-systemd
315
316 # osd id and osd fsid
317 $CEPH_VOLUME lvm list --format json $device_name > $TMPDIR/osd.map
318 osd_id=$($SUDO cat $TMPDIR/osd.map | jq -cr '.. | ."ceph.osd_id"? | select(.)')
319 osd_fsid=$($SUDO cat $TMPDIR/osd.map | jq -cr '.. | ."ceph.osd_fsid"? | select(.)')
320
321 # deploy the osd
322 $CEPHADM deploy --name osd.$osd_id \
323 --fsid $FSID \
324 --keyring $TMPDIR/keyring.bootstrap.osd \
325 --config $CONFIG \
326 --osd-fsid $osd_fsid
327 done
328
329 # add node-exporter
330 ${CEPHADM//--image $IMAGE_MASTER/} deploy \
331 --name node-exporter.a --fsid $FSID
332 cond="curl 'http://localhost:9100' | grep -q 'Node Exporter'"
333 is_available "node-exporter" "$cond" 10
334
335 # add prometheus
336 cat ${CEPHADM_SAMPLES_DIR}/prometheus.json | \
337 ${CEPHADM//--image $IMAGE_MASTER/} deploy \
338 --name prometheus.a --fsid $FSID --config-json -
339 cond="curl 'localhost:9095/api/v1/query?query=up'"
340 is_available "prometheus" "$cond" 10
341
342 # add grafana
343 cat ${CEPHADM_SAMPLES_DIR}/grafana.json | \
344 ${CEPHADM//--image $IMAGE_MASTER/} deploy \
345 --name grafana.a --fsid $FSID --config-json -
346 cond="curl --insecure 'https://localhost:3000' | grep -q 'grafana'"
347 is_available "grafana" "$cond" 50
348
349 # add nfs-ganesha
350 nfs_stop
351 nfs_rados_pool=$(cat ${CEPHADM_SAMPLES_DIR}/nfs.json | jq -r '.["pool"]')
352 $CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
353 ceph osd pool create $nfs_rados_pool 64
354 $CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
355 rados --pool nfs-ganesha --namespace nfs-ns create conf-nfs.a
356 $CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
357 ceph orch pause
358 $CEPHADM deploy --name nfs.a \
359 --fsid $FSID \
360 --keyring $KEYRING \
361 --config $CONFIG \
362 --config-json ${CEPHADM_SAMPLES_DIR}/nfs.json
363 cond="$SUDO ss -tlnp '( sport = :nfs )' | grep 'ganesha.nfsd'"
364 is_available "nfs" "$cond" 10
365 $CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
366 ceph orch resume
367
368 ## run
369 # WRITE ME
370
371 ## unit
372 $CEPHADM unit --fsid $FSID --name mon.a -- is-enabled
373 $CEPHADM unit --fsid $FSID --name mon.a -- is-active
374 expect_false $CEPHADM unit --fsid $FSID --name mon.xyz -- is-active
375 $CEPHADM unit --fsid $FSID --name mon.a -- disable
376 expect_false $CEPHADM unit --fsid $FSID --name mon.a -- is-enabled
377 $CEPHADM unit --fsid $FSID --name mon.a -- enable
378 $CEPHADM unit --fsid $FSID --name mon.a -- is-enabled
379
380 ## shell
381 $CEPHADM shell --fsid $FSID -- true
382 $CEPHADM shell --fsid $FSID -- test -d /var/log/ceph
383 expect_false $CEPHADM --timeout 10 shell --fsid $FSID -- sleep 60
384 $CEPHADM --timeout 60 shell --fsid $FSID -- sleep 10
385 $CEPHADM shell --fsid $FSID --mount $TMPDIR -- stat /mnt/$(basename $TMPDIR)
386
387 ## enter
388 expect_false $CEPHADM enter
389 $CEPHADM enter --fsid $FSID --name mon.a -- test -d /var/lib/ceph/mon/ceph-a
390 $CEPHADM enter --fsid $FSID --name mgr.x -- test -d /var/lib/ceph/mgr/ceph-x
391 $CEPHADM enter --fsid $FSID --name mon.a -- pidof ceph-mon
392 expect_false $CEPHADM enter --fsid $FSID --name mgr.x -- pidof ceph-mon
393 $CEPHADM enter --fsid $FSID --name mgr.x -- pidof ceph-mgr
394 # this triggers a bug in older versions of podman, including 18.04's 1.6.2
395 #expect_false $CEPHADM --timeout 5 enter --fsid $FSID --name mon.a -- sleep 30
396 $CEPHADM --timeout 60 enter --fsid $FSID --name mon.a -- sleep 10
397
398 ## ceph-volume
399 $CEPHADM ceph-volume --fsid $FSID -- inventory --format=json \
400 | jq '.[]'
401
402 ## preserve test state
403 [ $CLEANUP = false ] && exit 0
404
405 ## rm-daemon
406 # mon and osd require --force
407 expect_false $CEPHADM rm-daemon --fsid $FSID --name mon.a
408 # mgr does not
409 $CEPHADM rm-daemon --fsid $FSID --name mgr.x
410
411 ## rm-cluster
412 expect_false $CEPHADM rm-cluster --fsid $FSID
413 $CEPHADM rm-cluster --fsid $FSID --force
414
415 echo PASS