]> git.proxmox.com Git - ceph.git/blob - ceph/qa/workunits/cephadm/test_cephadm.sh
cca9cbc7bbc6cd2a078a99d15dd979a245980459
[ceph.git] / ceph / qa / workunits / cephadm / test_cephadm.sh
1 #!/bin/bash -ex
2
3 SCRIPT_NAME=$(basename ${BASH_SOURCE[0]})
4 SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
5
6 # cleanup during exit
7 [ -z "$CLEANUP" ] && CLEANUP=true
8
9 FSID='00000000-0000-0000-0000-0000deadbeef'
10
11 # images that are used
12 IMAGE_MAIN=${IMAGE_MAIN:-'quay.ceph.io/ceph-ci/ceph:main'}
13 IMAGE_PACIFIC=${IMAGE_PACIFIC:-'quay.ceph.io/ceph-ci/ceph:pacific'}
14 #IMAGE_OCTOPUS=${IMAGE_OCTOPUS:-'quay.ceph.io/ceph-ci/ceph:octopus'}
15 IMAGE_DEFAULT=${IMAGE_MAIN}
16
17 OSD_IMAGE_NAME="${SCRIPT_NAME%.*}_osd.img"
18 OSD_IMAGE_SIZE='6G'
19 OSD_TO_CREATE=2
20 OSD_VG_NAME=${SCRIPT_NAME%.*}
21 OSD_LV_NAME=${SCRIPT_NAME%.*}
22
23 # TMPDIR for test data
24 [ -d "$TMPDIR" ] || TMPDIR=$(mktemp -d tmp.$SCRIPT_NAME.XXXXXX)
25 [ -d "$TMPDIR_TEST_MULTIPLE_MOUNTS" ] || TMPDIR_TEST_MULTIPLE_MOUNTS=$(mktemp -d tmp.$SCRIPT_NAME.XXXXXX)
26
27 CEPHADM_SRC_DIR=${SCRIPT_DIR}/../../../src/cephadm
28 CEPHADM_SAMPLES_DIR=${CEPHADM_SRC_DIR}/samples
29
30 [ -z "$SUDO" ] && SUDO=sudo
31
32 if [ -z "$CEPHADM" ]; then
33 CEPHADM=`mktemp -p $TMPDIR tmp.cephadm.XXXXXX`
34 ${CEPHADM_SRC_DIR}/build.sh "$CEPHADM"
35 fi
36
37 # at this point, we need $CEPHADM set
38 if ! [ -x "$CEPHADM" ]; then
39 echo "cephadm not found. Please set \$CEPHADM"
40 exit 1
41 fi
42
43 # add image to args
44 CEPHADM_ARGS="$CEPHADM_ARGS --image $IMAGE_DEFAULT"
45
46 # combine into a single var
47 CEPHADM_BIN="$CEPHADM"
48 CEPHADM="$SUDO $CEPHADM_BIN $CEPHADM_ARGS"
49
50 # clean up previous run(s)?
51 $CEPHADM rm-cluster --fsid $FSID --force
52 $SUDO vgchange -an $OSD_VG_NAME || true
53 loopdev=$($SUDO losetup -a | grep $(basename $OSD_IMAGE_NAME) | awk -F : '{print $1}')
54 if ! [ "$loopdev" = "" ]; then
55 $SUDO losetup -d $loopdev
56 fi
57
58 function cleanup()
59 {
60 if [ $CLEANUP = false ]; then
61 # preserve the TMPDIR state
62 echo "========================"
63 echo "!!! CLEANUP=$CLEANUP !!!"
64 echo
65 echo "TMPDIR=$TMPDIR"
66 echo "========================"
67 return
68 fi
69
70 dump_all_logs $FSID
71 rm -rf $TMPDIR
72 }
73 trap cleanup EXIT
74
75 function expect_false()
76 {
77 set -x
78 if eval "$@"; then return 1; else return 0; fi
79 }
80
81 # expect_return_code $expected_code $command ...
82 function expect_return_code()
83 {
84 set -x
85 local expected_code="$1"
86 shift
87 local command="$@"
88
89 set +e
90 eval "$command"
91 local return_code="$?"
92 set -e
93
94 if [ ! "$return_code" -eq "$expected_code" ]; then return 1; else return 0; fi
95 }
96
97 function is_available()
98 {
99 local name="$1"
100 local condition="$2"
101 local tries="$3"
102
103 local num=0
104 while ! eval "$condition"; do
105 num=$(($num + 1))
106 if [ "$num" -ge $tries ]; then
107 echo "$name is not available"
108 false
109 fi
110 sleep 5
111 done
112
113 echo "$name is available"
114 true
115 }
116
117 function dump_log()
118 {
119 local fsid="$1"
120 local name="$2"
121 local num_lines="$3"
122
123 if [ -z $num_lines ]; then
124 num_lines=100
125 fi
126
127 echo '-------------------------'
128 echo 'dump daemon log:' $name
129 echo '-------------------------'
130
131 $CEPHADM logs --fsid $fsid --name $name -- --no-pager -n $num_lines
132 }
133
134 function dump_all_logs()
135 {
136 local fsid="$1"
137 local names=$($CEPHADM ls | jq -r '.[] | select(.fsid == "'$fsid'").name')
138
139 echo 'dumping logs for daemons: ' $names
140 for name in $names; do
141 dump_log $fsid $name
142 done
143 }
144
145 function nfs_stop()
146 {
147 # stop the running nfs server
148 local units="nfs-server nfs-kernel-server"
149 for unit in $units; do
150 if systemctl --no-pager status $unit > /dev/null; then
151 $SUDO systemctl stop $unit
152 fi
153 done
154
155 # ensure the NFS port is no longer in use
156 expect_false "$SUDO ss -tlnp '( sport = :nfs )' | grep LISTEN"
157 }
158
159 ## prepare + check host
160 $SUDO $CEPHADM check-host
161
162 ## run a gather-facts (output to stdout)
163 $SUDO $CEPHADM gather-facts
164
165 ## version + --image
166 $SUDO CEPHADM_IMAGE=$IMAGE_PACIFIC $CEPHADM_BIN version
167 $SUDO CEPHADM_IMAGE=$IMAGE_PACIFIC $CEPHADM_BIN version \
168 | grep 'ceph version 16'
169 #$SUDO CEPHADM_IMAGE=$IMAGE_OCTOPUS $CEPHADM_BIN version
170 #$SUDO CEPHADM_IMAGE=$IMAGE_OCTOPUS $CEPHADM_BIN version \
171 # | grep 'ceph version 15'
172 $SUDO $CEPHADM_BIN --image $IMAGE_MAIN version | grep 'ceph version'
173
174 # try force docker; this won't work if docker isn't installed
175 systemctl status docker > /dev/null && ( $CEPHADM --docker version | grep 'ceph version' ) || echo "docker not installed"
176
177 ## test shell before bootstrap, when crash dir isn't (yet) present on this host
178 $CEPHADM shell --fsid $FSID -- ceph -v | grep 'ceph version'
179 $CEPHADM shell --fsid $FSID -e FOO=BAR -- printenv | grep FOO=BAR
180
181 # test stdin
182 echo foo | $CEPHADM shell -- cat | grep -q foo
183
184 # the shell commands a bit above this seems to cause the
185 # /var/lib/ceph/<fsid> directory to be made. Since we now
186 # check in bootstrap that there are no clusters with the same
187 # fsid based on the directory existing, we need to make sure
188 # this directory is gone before bootstrapping. We can
189 # accomplish this with another rm-cluster
190 $CEPHADM rm-cluster --fsid $FSID --force
191
192 ## bootstrap
193 ORIG_CONFIG=`mktemp -p $TMPDIR`
194 CONFIG=`mktemp -p $TMPDIR`
195 MONCONFIG=`mktemp -p $TMPDIR`
196 KEYRING=`mktemp -p $TMPDIR`
197 IP=127.0.0.1
198 cat <<EOF > $ORIG_CONFIG
199 [global]
200 log to file = true
201 osd crush chooseleaf type = 0
202 EOF
203 $CEPHADM bootstrap \
204 --mon-id a \
205 --mgr-id x \
206 --mon-ip $IP \
207 --fsid $FSID \
208 --config $ORIG_CONFIG \
209 --output-config $CONFIG \
210 --output-keyring $KEYRING \
211 --output-pub-ssh-key $TMPDIR/ceph.pub \
212 --allow-overwrite \
213 --skip-mon-network \
214 --skip-monitoring-stack
215 test -e $CONFIG
216 test -e $KEYRING
217 rm -f $ORIG_CONFIG
218
219 $SUDO test -e /var/log/ceph/$FSID/ceph-mon.a.log
220 $SUDO test -e /var/log/ceph/$FSID/ceph-mgr.x.log
221
222 for u in ceph.target \
223 ceph-$FSID.target \
224 ceph-$FSID@mon.a \
225 ceph-$FSID@mgr.x; do
226 systemctl is-enabled $u
227 systemctl is-active $u
228 done
229 systemctl | grep system-ceph | grep -q .slice # naming is escaped and annoying
230
231 # check ceph -s works (via shell w/ passed config/keyring)
232 $CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
233 ceph -s | grep $FSID
234
235 for t in mon mgr node-exporter prometheus grafana; do
236 $CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
237 ceph orch apply $t --unmanaged
238 done
239
240 ## ls
241 $CEPHADM ls | jq '.[]' | jq 'select(.name == "mon.a").fsid' \
242 | grep $FSID
243 $CEPHADM ls | jq '.[]' | jq 'select(.name == "mgr.x").fsid' \
244 | grep $FSID
245
246 # make sure the version is returned correctly
247 $CEPHADM ls | jq '.[]' | jq 'select(.name == "mon.a").version' | grep -q \\.
248
249 ## deploy
250 # add mon.b
251 cp $CONFIG $MONCONFIG
252 echo "public addrv = [v2:$IP:3301,v1:$IP:6790]" >> $MONCONFIG
253 $CEPHADM deploy --name mon.b \
254 --fsid $FSID \
255 --keyring /var/lib/ceph/$FSID/mon.a/keyring \
256 --config $MONCONFIG
257 for u in ceph-$FSID@mon.b; do
258 systemctl is-enabled $u
259 systemctl is-active $u
260 done
261 cond="$CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
262 ceph mon stat | grep '2 mons'"
263 is_available "mon.b" "$cond" 30
264
265 # add mgr.y
266 $CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
267 ceph auth get-or-create mgr.y \
268 mon 'allow profile mgr' \
269 osd 'allow *' \
270 mds 'allow *' > $TMPDIR/keyring.mgr.y
271 $CEPHADM deploy --name mgr.y \
272 --fsid $FSID \
273 --keyring $TMPDIR/keyring.mgr.y \
274 --config $CONFIG
275 for u in ceph-$FSID@mgr.y; do
276 systemctl is-enabled $u
277 systemctl is-active $u
278 done
279
280 for f in `seq 1 30`; do
281 if $CEPHADM shell --fsid $FSID \
282 --config $CONFIG --keyring $KEYRING -- \
283 ceph -s -f json-pretty \
284 | jq '.mgrmap.num_standbys' | grep -q 1 ; then break; fi
285 sleep 1
286 done
287 $CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
288 ceph -s -f json-pretty \
289 | jq '.mgrmap.num_standbys' | grep -q 1
290
291 # add osd.{1,2,..}
292 dd if=/dev/zero of=$TMPDIR/$OSD_IMAGE_NAME bs=1 count=0 seek=$OSD_IMAGE_SIZE
293 loop_dev=$($SUDO losetup -f)
294 $SUDO vgremove -f $OSD_VG_NAME || true
295 $SUDO losetup $loop_dev $TMPDIR/$OSD_IMAGE_NAME
296 $SUDO pvcreate $loop_dev && $SUDO vgcreate $OSD_VG_NAME $loop_dev
297
298 # osd bootstrap keyring
299 $CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
300 ceph auth get client.bootstrap-osd > $TMPDIR/keyring.bootstrap.osd
301
302 # create lvs first so ceph-volume doesn't overlap with lv creation
303 for id in `seq 0 $((--OSD_TO_CREATE))`; do
304 $SUDO lvcreate -l $((100/$OSD_TO_CREATE))%VG -n $OSD_LV_NAME.$id $OSD_VG_NAME
305 done
306
307 for id in `seq 0 $((--OSD_TO_CREATE))`; do
308 device_name=/dev/$OSD_VG_NAME/$OSD_LV_NAME.$id
309 CEPH_VOLUME="$CEPHADM ceph-volume \
310 --fsid $FSID \
311 --config $CONFIG \
312 --keyring $TMPDIR/keyring.bootstrap.osd --"
313
314 # prepare the osd
315 $CEPH_VOLUME lvm prepare --bluestore --data $device_name --no-systemd
316 $CEPH_VOLUME lvm batch --no-auto $device_name --yes --no-systemd
317
318 # osd id and osd fsid
319 $CEPH_VOLUME lvm list --format json $device_name > $TMPDIR/osd.map
320 osd_id=$($SUDO cat $TMPDIR/osd.map | jq -cr '.. | ."ceph.osd_id"? | select(.)')
321 osd_fsid=$($SUDO cat $TMPDIR/osd.map | jq -cr '.. | ."ceph.osd_fsid"? | select(.)')
322
323 # deploy the osd
324 $CEPHADM deploy --name osd.$osd_id \
325 --fsid $FSID \
326 --keyring $TMPDIR/keyring.bootstrap.osd \
327 --config $CONFIG \
328 --osd-fsid $osd_fsid
329 done
330
331 # add node-exporter
332 ${CEPHADM//--image $IMAGE_DEFAULT/} deploy \
333 --name node-exporter.a --fsid $FSID
334 cond="curl 'http://localhost:9100' | grep -q 'Node Exporter'"
335 is_available "node-exporter" "$cond" 10
336
337 # add prometheus
338 cat ${CEPHADM_SAMPLES_DIR}/prometheus.json | \
339 ${CEPHADM//--image $IMAGE_DEFAULT/} deploy \
340 --name prometheus.a --fsid $FSID --config-json -
341 cond="curl 'localhost:9095/api/v1/query?query=up'"
342 is_available "prometheus" "$cond" 10
343
344 # add grafana
345 cat ${CEPHADM_SAMPLES_DIR}/grafana.json | \
346 ${CEPHADM//--image $IMAGE_DEFAULT/} deploy \
347 --name grafana.a --fsid $FSID --config-json -
348 cond="curl --insecure 'https://localhost:3000' | grep -q 'grafana'"
349 is_available "grafana" "$cond" 50
350
351 # add nfs-ganesha
352 nfs_stop
353 nfs_rados_pool=$(cat ${CEPHADM_SAMPLES_DIR}/nfs.json | jq -r '.["pool"]')
354 $CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
355 ceph osd pool create $nfs_rados_pool 64
356 $CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
357 rados --pool nfs-ganesha --namespace nfs-ns create conf-nfs.a
358 $CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
359 ceph orch pause
360 $CEPHADM deploy --name nfs.a \
361 --fsid $FSID \
362 --keyring $KEYRING \
363 --config $CONFIG \
364 --config-json ${CEPHADM_SAMPLES_DIR}/nfs.json
365 cond="$SUDO ss -tlnp '( sport = :nfs )' | grep 'ganesha.nfsd'"
366 is_available "nfs" "$cond" 10
367 $CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
368 ceph orch resume
369
370 # add alertmanager via custom container
371 alertmanager_image=$(cat ${CEPHADM_SAMPLES_DIR}/custom_container.json | jq -r '.image')
372 tcp_ports=$(cat ${CEPHADM_SAMPLES_DIR}/custom_container.json | jq -r '.ports | map_values(.|tostring) | join(" ")')
373 cat ${CEPHADM_SAMPLES_DIR}/custom_container.json | \
374 ${CEPHADM//--image $IMAGE_DEFAULT/} \
375 --image $alertmanager_image \
376 deploy \
377 --tcp-ports "$tcp_ports" \
378 --name container.alertmanager.a \
379 --fsid $FSID \
380 --config-json -
381 cond="$CEPHADM enter --fsid $FSID --name container.alertmanager.a -- test -f \
382 /etc/alertmanager/alertmanager.yml"
383 is_available "alertmanager.yml" "$cond" 10
384 cond="curl 'http://localhost:9093' | grep -q 'Alertmanager'"
385 is_available "alertmanager" "$cond" 10
386
387 ## run
388 # WRITE ME
389
390 ## unit
391 $CEPHADM unit --fsid $FSID --name mon.a -- is-enabled
392 $CEPHADM unit --fsid $FSID --name mon.a -- is-active
393 expect_false $CEPHADM unit --fsid $FSID --name mon.xyz -- is-active
394 $CEPHADM unit --fsid $FSID --name mon.a -- disable
395 expect_false $CEPHADM unit --fsid $FSID --name mon.a -- is-enabled
396 $CEPHADM unit --fsid $FSID --name mon.a -- enable
397 $CEPHADM unit --fsid $FSID --name mon.a -- is-enabled
398 $CEPHADM unit --fsid $FSID --name mon.a -- status
399 $CEPHADM unit --fsid $FSID --name mon.a -- stop
400 expect_return_code 3 $CEPHADM unit --fsid $FSID --name mon.a -- status
401 $CEPHADM unit --fsid $FSID --name mon.a -- start
402
403 ## shell
404 $CEPHADM shell --fsid $FSID -- true
405 $CEPHADM shell --fsid $FSID -- test -d /var/log/ceph
406 expect_false $CEPHADM --timeout 10 shell --fsid $FSID -- sleep 60
407 $CEPHADM --timeout 60 shell --fsid $FSID -- sleep 10
408 $CEPHADM shell --fsid $FSID --mount $TMPDIR $TMPDIR_TEST_MULTIPLE_MOUNTS -- stat /mnt/$(basename $TMPDIR)
409
410 ## enter
411 expect_false $CEPHADM enter
412 $CEPHADM enter --fsid $FSID --name mon.a -- test -d /var/lib/ceph/mon/ceph-a
413 $CEPHADM enter --fsid $FSID --name mgr.x -- test -d /var/lib/ceph/mgr/ceph-x
414 $CEPHADM enter --fsid $FSID --name mon.a -- pidof ceph-mon
415 expect_false $CEPHADM enter --fsid $FSID --name mgr.x -- pidof ceph-mon
416 $CEPHADM enter --fsid $FSID --name mgr.x -- pidof ceph-mgr
417 # this triggers a bug in older versions of podman, including 18.04's 1.6.2
418 #expect_false $CEPHADM --timeout 5 enter --fsid $FSID --name mon.a -- sleep 30
419 $CEPHADM --timeout 60 enter --fsid $FSID --name mon.a -- sleep 10
420
421 ## ceph-volume
422 $CEPHADM ceph-volume --fsid $FSID -- inventory --format=json \
423 | jq '.[]'
424
425 ## preserve test state
426 [ $CLEANUP = false ] && exit 0
427
428 ## rm-daemon
429 # mon and osd require --force
430 expect_false $CEPHADM rm-daemon --fsid $FSID --name mon.a
431 # mgr does not
432 $CEPHADM rm-daemon --fsid $FSID --name mgr.x
433
434 expect_false $CEPHADM zap-osds --fsid $FSID
435 $CEPHADM zap-osds --fsid $FSID --force
436
437 ## rm-cluster
438 expect_false $CEPHADM rm-cluster --fsid $FSID --zap-osds
439 $CEPHADM rm-cluster --fsid $FSID --force --zap-osds
440
441 echo PASS