]> git.proxmox.com Git - ceph.git/blob - ceph/qa/workunits/cephadm/test_cephadm.sh
31ebf0a5dd98d1791b8c3fb56d86bacf9c41f7a2
[ceph.git] / ceph / qa / workunits / cephadm / test_cephadm.sh
1 #!/bin/bash -ex
2
3 SCRIPT_NAME=$(basename ${BASH_SOURCE[0]})
4 SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
5
6 # cleanup during exit
7 [ -z "$CLEANUP" ] && CLEANUP=true
8
9 FSID='00000000-0000-0000-0000-0000deadbeef'
10
11 # images that are used
12 IMAGE_MASTER=${IMAGE_MASTER:-'quay.ceph.io/ceph-ci/ceph:master'}
13 IMAGE_PACIFIC=${IMAGE_PACIFIC:-'quay.ceph.io/ceph-ci/ceph:pacific'}
14 #IMAGE_OCTOPUS=${IMAGE_OCTOPUS:-'quay.ceph.io/ceph-ci/ceph:octopus'}
15 IMAGE_DEFAULT=${IMAGE_MASTER}
16
17 OSD_IMAGE_NAME="${SCRIPT_NAME%.*}_osd.img"
18 OSD_IMAGE_SIZE='6G'
19 OSD_TO_CREATE=2
20 OSD_VG_NAME=${SCRIPT_NAME%.*}
21 OSD_LV_NAME=${SCRIPT_NAME%.*}
22
23 CEPHADM_SRC_DIR=${SCRIPT_DIR}/../../../src/cephadm
24 CEPHADM_SAMPLES_DIR=${CEPHADM_SRC_DIR}/samples
25
26 [ -z "$SUDO" ] && SUDO=sudo
27
28 if [ -z "$CEPHADM" ]; then
29 CEPHADM=${CEPHADM_SRC_DIR}/cephadm
30 fi
31
32 # at this point, we need $CEPHADM set
33 if ! [ -x "$CEPHADM" ]; then
34 echo "cephadm not found. Please set \$CEPHADM"
35 exit 1
36 fi
37
38 # add image to args
39 CEPHADM_ARGS="$CEPHADM_ARGS --image $IMAGE_DEFAULT"
40
41 # combine into a single var
42 CEPHADM_BIN="$CEPHADM"
43 CEPHADM="$SUDO $CEPHADM_BIN $CEPHADM_ARGS"
44
45 # clean up previous run(s)?
46 $CEPHADM rm-cluster --fsid $FSID --force
47 $SUDO vgchange -an $OSD_VG_NAME || true
48 loopdev=$($SUDO losetup -a | grep $(basename $OSD_IMAGE_NAME) | awk -F : '{print $1}')
49 if ! [ "$loopdev" = "" ]; then
50 $SUDO losetup -d $loopdev
51 fi
52
53 # TMPDIR for test data
54 [ -d "$TMPDIR" ] || TMPDIR=$(mktemp -d tmp.$SCRIPT_NAME.XXXXXX)
55 [ -d "$TMPDIR_TEST_MULTIPLE_MOUNTS" ] || TMPDIR_TEST_MULTIPLE_MOUNTS=$(mktemp -d tmp.$SCRIPT_NAME.XXXXXX)
56
57 function cleanup()
58 {
59 if [ $CLEANUP = false ]; then
60 # preserve the TMPDIR state
61 echo "========================"
62 echo "!!! CLEANUP=$CLEANUP !!!"
63 echo
64 echo "TMPDIR=$TMPDIR"
65 echo "========================"
66 return
67 fi
68
69 dump_all_logs $FSID
70 rm -rf $TMPDIR
71 }
72 trap cleanup EXIT
73
74 function expect_false()
75 {
76 set -x
77 if eval "$@"; then return 1; else return 0; fi
78 }
79
80 # expect_return_code $expected_code $command ...
81 function expect_return_code()
82 {
83 set -x
84 local expected_code="$1"
85 shift
86 local command="$@"
87
88 set +e
89 eval "$command"
90 local return_code="$?"
91 set -e
92
93 if [ ! "$return_code" -eq "$expected_code" ]; then return 1; else return 0; fi
94 }
95
96 function is_available()
97 {
98 local name="$1"
99 local condition="$2"
100 local tries="$3"
101
102 local num=0
103 while ! eval "$condition"; do
104 num=$(($num + 1))
105 if [ "$num" -ge $tries ]; then
106 echo "$name is not available"
107 false
108 fi
109 sleep 5
110 done
111
112 echo "$name is available"
113 true
114 }
115
116 function dump_log()
117 {
118 local fsid="$1"
119 local name="$2"
120 local num_lines="$3"
121
122 if [ -z $num_lines ]; then
123 num_lines=100
124 fi
125
126 echo '-------------------------'
127 echo 'dump daemon log:' $name
128 echo '-------------------------'
129
130 $CEPHADM logs --fsid $fsid --name $name -- --no-pager -n $num_lines
131 }
132
133 function dump_all_logs()
134 {
135 local fsid="$1"
136 local names=$($CEPHADM ls | jq -r '.[] | select(.fsid == "'$fsid'").name')
137
138 echo 'dumping logs for daemons: ' $names
139 for name in $names; do
140 dump_log $fsid $name
141 done
142 }
143
144 function nfs_stop()
145 {
146 # stop the running nfs server
147 local units="nfs-server nfs-kernel-server"
148 for unit in $units; do
149 if systemctl status $unit < /dev/null; then
150 $SUDO systemctl stop $unit
151 fi
152 done
153
154 # ensure the NFS port is no longer in use
155 expect_false "$SUDO ss -tlnp '( sport = :nfs )' | grep LISTEN"
156 }
157
158 ## prepare + check host
159 $SUDO $CEPHADM check-host
160
161 ## run a gather-facts (output to stdout)
162 $SUDO $CEPHADM gather-facts
163
164 ## version + --image
165 $SUDO CEPHADM_IMAGE=$IMAGE_PACIFIC $CEPHADM_BIN version
166 $SUDO CEPHADM_IMAGE=$IMAGE_PACIFIC $CEPHADM_BIN version \
167 | grep 'ceph version 16'
168 #$SUDO CEPHADM_IMAGE=$IMAGE_OCTOPUS $CEPHADM_BIN version
169 #$SUDO CEPHADM_IMAGE=$IMAGE_OCTOPUS $CEPHADM_BIN version \
170 # | grep 'ceph version 15'
171 $SUDO $CEPHADM_BIN --image $IMAGE_MASTER version | grep 'ceph version'
172
173 # try force docker; this won't work if docker isn't installed
174 systemctl status docker > /dev/null && ( $CEPHADM --docker version | grep 'ceph version' ) || echo "docker not installed"
175
176 ## test shell before bootstrap, when crash dir isn't (yet) present on this host
177 $CEPHADM shell --fsid $FSID -- ceph -v | grep 'ceph version'
178 $CEPHADM shell --fsid $FSID -e FOO=BAR -- printenv | grep FOO=BAR
179
180 # test stdin
181 echo foo | $CEPHADM shell -- cat | grep -q foo
182
183 ## bootstrap
184 ORIG_CONFIG=`mktemp -p $TMPDIR`
185 CONFIG=`mktemp -p $TMPDIR`
186 MONCONFIG=`mktemp -p $TMPDIR`
187 KEYRING=`mktemp -p $TMPDIR`
188 IP=127.0.0.1
189 cat <<EOF > $ORIG_CONFIG
190 [global]
191 log to file = true
192 osd crush chooseleaf type = 0
193 EOF
194 $CEPHADM bootstrap \
195 --mon-id a \
196 --mgr-id x \
197 --mon-ip $IP \
198 --fsid $FSID \
199 --config $ORIG_CONFIG \
200 --output-config $CONFIG \
201 --output-keyring $KEYRING \
202 --output-pub-ssh-key $TMPDIR/ceph.pub \
203 --allow-overwrite \
204 --skip-mon-network \
205 --skip-monitoring-stack
206 test -e $CONFIG
207 test -e $KEYRING
208 rm -f $ORIG_CONFIG
209
210 $SUDO test -e /var/log/ceph/$FSID/ceph-mon.a.log
211 $SUDO test -e /var/log/ceph/$FSID/ceph-mgr.x.log
212
213 for u in ceph.target \
214 ceph-$FSID.target \
215 ceph-$FSID@mon.a \
216 ceph-$FSID@mgr.x; do
217 systemctl is-enabled $u
218 systemctl is-active $u
219 done
220 systemctl | grep system-ceph | grep -q .slice # naming is escaped and annoying
221
222 # check ceph -s works (via shell w/ passed config/keyring)
223 $CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
224 ceph -s | grep $FSID
225
226 for t in mon mgr node-exporter prometheus grafana; do
227 $CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
228 ceph orch apply $t --unmanaged
229 done
230
231 ## ls
232 $CEPHADM ls | jq '.[]' | jq 'select(.name == "mon.a").fsid' \
233 | grep $FSID
234 $CEPHADM ls | jq '.[]' | jq 'select(.name == "mgr.x").fsid' \
235 | grep $FSID
236
237 # make sure the version is returned correctly
238 $CEPHADM ls | jq '.[]' | jq 'select(.name == "mon.a").version' | grep -q \\.
239
240 ## deploy
241 # add mon.b
242 cp $CONFIG $MONCONFIG
243 echo "public addrv = [v2:$IP:3301,v1:$IP:6790]" >> $MONCONFIG
244 $CEPHADM deploy --name mon.b \
245 --fsid $FSID \
246 --keyring /var/lib/ceph/$FSID/mon.a/keyring \
247 --config $MONCONFIG
248 for u in ceph-$FSID@mon.b; do
249 systemctl is-enabled $u
250 systemctl is-active $u
251 done
252 cond="$CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
253 ceph mon stat | grep '2 mons'"
254 is_available "mon.b" "$cond" 30
255
256 # add mgr.y
257 $CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
258 ceph auth get-or-create mgr.y \
259 mon 'allow profile mgr' \
260 osd 'allow *' \
261 mds 'allow *' > $TMPDIR/keyring.mgr.y
262 $CEPHADM deploy --name mgr.y \
263 --fsid $FSID \
264 --keyring $TMPDIR/keyring.mgr.y \
265 --config $CONFIG
266 for u in ceph-$FSID@mgr.y; do
267 systemctl is-enabled $u
268 systemctl is-active $u
269 done
270
271 for f in `seq 1 30`; do
272 if $CEPHADM shell --fsid $FSID \
273 --config $CONFIG --keyring $KEYRING -- \
274 ceph -s -f json-pretty \
275 | jq '.mgrmap.num_standbys' | grep -q 1 ; then break; fi
276 sleep 1
277 done
278 $CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
279 ceph -s -f json-pretty \
280 | jq '.mgrmap.num_standbys' | grep -q 1
281
282 # add osd.{1,2,..}
283 dd if=/dev/zero of=$TMPDIR/$OSD_IMAGE_NAME bs=1 count=0 seek=$OSD_IMAGE_SIZE
284 loop_dev=$($SUDO losetup -f)
285 $SUDO vgremove -f $OSD_VG_NAME || true
286 $SUDO losetup $loop_dev $TMPDIR/$OSD_IMAGE_NAME
287 $SUDO pvcreate $loop_dev && $SUDO vgcreate $OSD_VG_NAME $loop_dev
288
289 # osd boostrap keyring
290 $CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
291 ceph auth get client.bootstrap-osd > $TMPDIR/keyring.bootstrap.osd
292
293 # create lvs first so ceph-volume doesn't overlap with lv creation
294 for id in `seq 0 $((--OSD_TO_CREATE))`; do
295 $SUDO lvcreate -l $((100/$OSD_TO_CREATE))%VG -n $OSD_LV_NAME.$id $OSD_VG_NAME
296 done
297
298 for id in `seq 0 $((--OSD_TO_CREATE))`; do
299 device_name=/dev/$OSD_VG_NAME/$OSD_LV_NAME.$id
300 CEPH_VOLUME="$CEPHADM ceph-volume \
301 --fsid $FSID \
302 --config $CONFIG \
303 --keyring $TMPDIR/keyring.bootstrap.osd --"
304
305 # prepare the osd
306 $CEPH_VOLUME lvm prepare --bluestore --data $device_name --no-systemd
307 $CEPH_VOLUME lvm batch --no-auto $device_name --yes --no-systemd
308
309 # osd id and osd fsid
310 $CEPH_VOLUME lvm list --format json $device_name > $TMPDIR/osd.map
311 osd_id=$($SUDO cat $TMPDIR/osd.map | jq -cr '.. | ."ceph.osd_id"? | select(.)')
312 osd_fsid=$($SUDO cat $TMPDIR/osd.map | jq -cr '.. | ."ceph.osd_fsid"? | select(.)')
313
314 # deploy the osd
315 $CEPHADM deploy --name osd.$osd_id \
316 --fsid $FSID \
317 --keyring $TMPDIR/keyring.bootstrap.osd \
318 --config $CONFIG \
319 --osd-fsid $osd_fsid
320 done
321
322 # add node-exporter
323 ${CEPHADM//--image $IMAGE_DEFAULT/} deploy \
324 --name node-exporter.a --fsid $FSID
325 cond="curl 'http://localhost:9100' | grep -q 'Node Exporter'"
326 is_available "node-exporter" "$cond" 10
327
328 # add prometheus
329 cat ${CEPHADM_SAMPLES_DIR}/prometheus.json | \
330 ${CEPHADM//--image $IMAGE_DEFAULT/} deploy \
331 --name prometheus.a --fsid $FSID --config-json -
332 cond="curl 'localhost:9095/api/v1/query?query=up'"
333 is_available "prometheus" "$cond" 10
334
335 # add grafana
336 cat ${CEPHADM_SAMPLES_DIR}/grafana.json | \
337 ${CEPHADM//--image $IMAGE_DEFAULT/} deploy \
338 --name grafana.a --fsid $FSID --config-json -
339 cond="curl --insecure 'https://localhost:3000' | grep -q 'grafana'"
340 is_available "grafana" "$cond" 50
341
342 # add nfs-ganesha
343 nfs_stop
344 nfs_rados_pool=$(cat ${CEPHADM_SAMPLES_DIR}/nfs.json | jq -r '.["pool"]')
345 $CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
346 ceph osd pool create $nfs_rados_pool 64
347 $CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
348 rados --pool nfs-ganesha --namespace nfs-ns create conf-nfs.a
349 $CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
350 ceph orch pause
351 $CEPHADM deploy --name nfs.a \
352 --fsid $FSID \
353 --keyring $KEYRING \
354 --config $CONFIG \
355 --config-json ${CEPHADM_SAMPLES_DIR}/nfs.json
356 cond="$SUDO ss -tlnp '( sport = :nfs )' | grep 'ganesha.nfsd'"
357 is_available "nfs" "$cond" 10
358 $CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
359 ceph orch resume
360
361 # add alertmanager via custom container
362 alertmanager_image=$(cat ${CEPHADM_SAMPLES_DIR}/custom_container.json | jq -r '.image')
363 tcp_ports=$(cat ${CEPHADM_SAMPLES_DIR}/custom_container.json | jq -r '.ports | map_values(.|tostring) | join(" ")')
364 cat ${CEPHADM_SAMPLES_DIR}/custom_container.json | \
365 ${CEPHADM//--image $IMAGE_DEFAULT/} \
366 --image $alertmanager_image \
367 deploy \
368 --tcp-ports "$tcp_ports" \
369 --name container.alertmanager.a \
370 --fsid $FSID \
371 --config-json -
372 cond="$CEPHADM enter --fsid $FSID --name container.alertmanager.a -- test -f \
373 /etc/alertmanager/alertmanager.yml"
374 is_available "alertmanager.yml" "$cond" 10
375 cond="curl 'http://localhost:9093' | grep -q 'Alertmanager'"
376 is_available "alertmanager" "$cond" 10
377
378 ## run
379 # WRITE ME
380
381 ## unit
382 $CEPHADM unit --fsid $FSID --name mon.a -- is-enabled
383 $CEPHADM unit --fsid $FSID --name mon.a -- is-active
384 expect_false $CEPHADM unit --fsid $FSID --name mon.xyz -- is-active
385 $CEPHADM unit --fsid $FSID --name mon.a -- disable
386 expect_false $CEPHADM unit --fsid $FSID --name mon.a -- is-enabled
387 $CEPHADM unit --fsid $FSID --name mon.a -- enable
388 $CEPHADM unit --fsid $FSID --name mon.a -- is-enabled
389 $CEPHADM unit --fsid $FSID --name mon.a -- status
390 $CEPHADM unit --fsid $FSID --name mon.a -- stop
391 expect_return_code 3 $CEPHADM unit --fsid $FSID --name mon.a -- status
392 $CEPHADM unit --fsid $FSID --name mon.a -- start
393
394 ## shell
395 $CEPHADM shell --fsid $FSID -- true
396 $CEPHADM shell --fsid $FSID -- test -d /var/log/ceph
397 expect_false $CEPHADM --timeout 10 shell --fsid $FSID -- sleep 60
398 $CEPHADM --timeout 60 shell --fsid $FSID -- sleep 10
399 $CEPHADM shell --fsid $FSID --mount $TMPDIR $TMPDIR_TEST_MULTIPLE_MOUNTS -- stat /mnt/$(basename $TMPDIR)
400
401 ## enter
402 expect_false $CEPHADM enter
403 $CEPHADM enter --fsid $FSID --name mon.a -- test -d /var/lib/ceph/mon/ceph-a
404 $CEPHADM enter --fsid $FSID --name mgr.x -- test -d /var/lib/ceph/mgr/ceph-x
405 $CEPHADM enter --fsid $FSID --name mon.a -- pidof ceph-mon
406 expect_false $CEPHADM enter --fsid $FSID --name mgr.x -- pidof ceph-mon
407 $CEPHADM enter --fsid $FSID --name mgr.x -- pidof ceph-mgr
408 # this triggers a bug in older versions of podman, including 18.04's 1.6.2
409 #expect_false $CEPHADM --timeout 5 enter --fsid $FSID --name mon.a -- sleep 30
410 $CEPHADM --timeout 60 enter --fsid $FSID --name mon.a -- sleep 10
411
412 ## ceph-volume
413 $CEPHADM ceph-volume --fsid $FSID -- inventory --format=json \
414 | jq '.[]'
415
416 ## preserve test state
417 [ $CLEANUP = false ] && exit 0
418
419 ## rm-daemon
420 # mon and osd require --force
421 expect_false $CEPHADM rm-daemon --fsid $FSID --name mon.a
422 # mgr does not
423 $CEPHADM rm-daemon --fsid $FSID --name mgr.x
424
425 expect_false $CEPHADM zap-osds --fsid $FSID
426 $CEPHADM zap-osds --fsid $FSID --force
427
428 ## rm-cluster
429 expect_false $CEPHADM rm-cluster --fsid $FSID --zap-osds
430 $CEPHADM rm-cluster --fsid $FSID --force --zap-osds
431
432 echo PASS