3 SCRIPT_NAME
=$
(basename ${BASH_SOURCE[0]})
4 SCRIPT_DIR
="$( cd "$
( dirname "${BASH_SOURCE[0]}" )" && pwd )"
7 [ -z "$CLEANUP" ] && CLEANUP
=true
9 FSID
='00000000-0000-0000-0000-0000deadbeef'
11 # images that are used
12 IMAGE_MAIN
=${IMAGE_MAIN:-'quay.ceph.io/ceph-ci/ceph:main'}
13 IMAGE_PACIFIC
=${IMAGE_PACIFIC:-'quay.ceph.io/ceph-ci/ceph:pacific'}
14 #IMAGE_OCTOPUS=${IMAGE_OCTOPUS:-'quay.ceph.io/ceph-ci/ceph:octopus'}
15 IMAGE_DEFAULT
=${IMAGE_MAIN}
17 OSD_IMAGE_NAME
="${SCRIPT_NAME%.*}_osd.img"
20 OSD_VG_NAME
=${SCRIPT_NAME%.*}
21 OSD_LV_NAME
=${SCRIPT_NAME%.*}
23 # TMPDIR for test data
24 [ -d "$TMPDIR" ] || TMPDIR
=$
(mktemp
-d tmp.
$SCRIPT_NAME.XXXXXX
)
25 [ -d "$TMPDIR_TEST_MULTIPLE_MOUNTS" ] || TMPDIR_TEST_MULTIPLE_MOUNTS
=$
(mktemp
-d tmp.
$SCRIPT_NAME.XXXXXX
)
27 CEPHADM_SRC_DIR
=${SCRIPT_DIR}/..
/..
/..
/src
/cephadm
28 CEPHADM_SAMPLES_DIR
=${CEPHADM_SRC_DIR}/samples
30 [ -z "$SUDO" ] && SUDO
=sudo
32 # If cephadm is already installed on the system, use that one, avoid building
34 if [ -z "$CEPHADM" ] && command -v cephadm
>/dev
/null
; then
35 CEPHADM
="$(command -v cephadm)"
38 if [ -z "$CEPHADM" ]; then
39 CEPHADM
=`mktemp -p $TMPDIR tmp.cephadm.XXXXXX`
40 ${CEPHADM_SRC_DIR}/build.sh
"$CEPHADM"
44 # at this point, we need $CEPHADM set
45 if ! [ -x "$CEPHADM" ]; then
46 echo "cephadm not found. Please set \$CEPHADM"
51 CEPHADM_ARGS
="$CEPHADM_ARGS --image $IMAGE_DEFAULT"
53 # combine into a single var
54 CEPHADM_BIN
="$CEPHADM"
55 CEPHADM
="$SUDO $CEPHADM_BIN $CEPHADM_ARGS"
57 # clean up previous run(s)?
58 $CEPHADM rm-cluster
--fsid $FSID --force
59 $SUDO vgchange
-an $OSD_VG_NAME || true
60 loopdev
=$
($SUDO losetup
-a |
grep $
(basename $OSD_IMAGE_NAME) |
awk -F : '{print $1}')
61 if ! [ "$loopdev" = "" ]; then
62 $SUDO losetup
-d $loopdev
67 if [ $CLEANUP = false
]; then
68 # preserve the TMPDIR state
69 echo "========================"
70 echo "!!! CLEANUP=$CLEANUP !!!"
73 echo "========================"
82 function expect_false
()
85 if eval "$@"; then return 1; else return 0; fi
88 # expect_return_code $expected_code $command ...
89 function expect_return_code
()
92 local expected_code
="$1"
98 local return_code
="$?"
101 if [ ! "$return_code" -eq "$expected_code" ]; then return 1; else return 0; fi
104 function is_available
()
111 while ! eval "$condition"; do
113 if [ "$num" -ge $tries ]; then
114 echo "$name is not available"
120 echo "$name is available"
130 if [ -z $num_lines ]; then
134 echo '-------------------------'
135 echo 'dump daemon log:' $name
136 echo '-------------------------'
138 $CEPHADM logs
--fsid $fsid --name $name -- --no-pager -n $num_lines
141 function dump_all_logs
()
144 local names
=$
($CEPHADM ls | jq
-r '.[] | select(.fsid == "'$fsid'").name')
146 echo 'dumping logs for daemons: ' $names
147 for name
in $names; do
154 # stop the running nfs server
155 local units
="nfs-server nfs-kernel-server"
156 for unit
in $units; do
157 if systemctl
--no-pager status
$unit > /dev
/null
; then
158 $SUDO systemctl stop
$unit
162 # ensure the NFS port is no longer in use
163 expect_false
"$SUDO ss -tlnp '( sport = :nfs )' | grep LISTEN"
166 ## prepare + check host
167 $SUDO $CEPHADM check-host
169 ## run a gather-facts (output to stdout)
170 $SUDO $CEPHADM gather-facts
172 ## NOTE: cephadm version is, as of around May 2023, no longer basing the
173 ## output for `cephadm version` on the version of the containers. The version
174 ## reported is that of the "binary" and is determined during the ceph build.
175 ## `cephadm version` should NOT require sudo/root.
177 $CEPHADM_BIN version |
grep 'cephadm version'
178 # Typically cmake should be running the cephadm build script with CLI arguments
179 # that embed version info into the "binary". If not using a cephadm build via
180 # cmake you can set `NO_BUILD_INFO` to skip this check.
181 if [ -z "$NO_BUILD_INFO" ]; then
182 $CEPHADM_BIN version |
grep -v 'UNSET'
183 $CEPHADM_BIN version |
grep -v 'UNKNOWN'
187 ## test shell before bootstrap, when crash dir isn't (yet) present on this host
188 $CEPHADM shell
--fsid $FSID -- ceph
-v |
grep 'ceph version'
189 $CEPHADM shell
--fsid $FSID -e FOO
=BAR
-- printenv |
grep FOO
=BAR
192 echo foo |
$CEPHADM shell
-- cat |
grep -q foo
194 # the shell commands a bit above this seems to cause the
195 # /var/lib/ceph/<fsid> directory to be made. Since we now
196 # check in bootstrap that there are no clusters with the same
197 # fsid based on the directory existing, we need to make sure
198 # this directory is gone before bootstrapping. We can
199 # accomplish this with another rm-cluster
200 $CEPHADM rm-cluster
--fsid $FSID --force
203 ORIG_CONFIG
=`mktemp -p $TMPDIR`
204 CONFIG
=`mktemp -p $TMPDIR`
205 MONCONFIG
=`mktemp -p $TMPDIR`
206 KEYRING
=`mktemp -p $TMPDIR`
208 cat <<EOF > $ORIG_CONFIG
211 osd crush chooseleaf type = 0
218 --config $ORIG_CONFIG \
219 --output-config $CONFIG \
220 --output-keyring $KEYRING \
221 --output-pub-ssh-key $TMPDIR/ceph.pub \
224 --skip-monitoring-stack
229 $SUDO test -e /var
/log
/ceph
/$FSID/ceph-mon.a.log
230 $SUDO test -e /var
/log
/ceph
/$FSID/ceph-mgr.x.log
232 for u
in ceph.target \
236 systemctl is-enabled
$u
237 systemctl is-active
$u
239 systemctl |
grep system-ceph |
grep -q .slice
# naming is escaped and annoying
241 # check ceph -s works (via shell w/ passed config/keyring)
242 $CEPHADM shell
--fsid $FSID --config $CONFIG --keyring $KEYRING -- \
245 for t
in mon mgr node-exporter prometheus grafana
; do
246 $CEPHADM shell
--fsid $FSID --config $CONFIG --keyring $KEYRING -- \
247 ceph orch apply
$t --unmanaged
251 $CEPHADM ls | jq
'.[]' | jq
'select(.name == "mon.a").fsid' \
253 $CEPHADM ls | jq
'.[]' | jq
'select(.name == "mgr.x").fsid' \
256 # make sure the version is returned correctly
257 $CEPHADM ls | jq
'.[]' | jq
'select(.name == "mon.a").version' |
grep -q \\.
261 cp $CONFIG $MONCONFIG
262 echo "public addrv = [v2:$IP:3301,v1:$IP:6790]" >> $MONCONFIG
266 --arg keyring
/var
/lib
/ceph
/$FSID/mon.a
/keyring \
267 --arg config
"$MONCONFIG" \
268 '{"fsid": $fsid, "name": $name, "params":{"keyring": $keyring, "config": $config}}' | \
269 $CEPHADM _orch deploy
270 for u
in ceph-
$FSID@mon.b
; do
271 systemctl is-enabled
$u
272 systemctl is-active
$u
274 cond
="$CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
275 ceph mon stat | grep '2 mons'"
276 is_available
"mon.b" "$cond" 30
279 $CEPHADM shell
--fsid $FSID --config $CONFIG --keyring $KEYRING -- \
280 ceph auth get-or-create mgr.y \
281 mon
'allow profile mgr' \
283 mds
'allow *' > $TMPDIR/keyring.mgr.y
287 --arg keyring
$TMPDIR/keyring.mgr.y \
288 --arg config
"$CONFIG" \
289 '{"fsid": $fsid, "name": $name, "params":{"keyring": $keyring, "config": $config}}' | \
290 $CEPHADM _orch deploy
291 for u
in ceph-
$FSID@mgr.y
; do
292 systemctl is-enabled
$u
293 systemctl is-active
$u
296 for f
in `seq 1 30`; do
297 if $CEPHADM shell
--fsid $FSID \
298 --config $CONFIG --keyring $KEYRING -- \
299 ceph
-s -f json-pretty \
300 | jq
'.mgrmap.num_standbys' |
grep -q 1 ; then break; fi
303 $CEPHADM shell
--fsid $FSID --config $CONFIG --keyring $KEYRING -- \
304 ceph
-s -f json-pretty \
305 | jq
'.mgrmap.num_standbys' |
grep -q 1
308 dd if=/dev
/zero of
=$TMPDIR/$OSD_IMAGE_NAME bs
=1 count
=0 seek
=$OSD_IMAGE_SIZE
309 loop_dev
=$
($SUDO losetup
-f)
310 $SUDO vgremove
-f $OSD_VG_NAME || true
311 $SUDO losetup
$loop_dev $TMPDIR/$OSD_IMAGE_NAME
312 $SUDO pvcreate
$loop_dev && $SUDO vgcreate
$OSD_VG_NAME $loop_dev
314 # osd bootstrap keyring
315 $CEPHADM shell
--fsid $FSID --config $CONFIG --keyring $KEYRING -- \
316 ceph auth get client.bootstrap-osd
> $TMPDIR/keyring.bootstrap.osd
318 # create lvs first so ceph-volume doesn't overlap with lv creation
319 for id
in `seq 0 $((--OSD_TO_CREATE))`; do
320 $SUDO lvcreate
-l $
((100/$OSD_TO_CREATE))%VG
-n $OSD_LV_NAME.
$id $OSD_VG_NAME
323 for id
in `seq 0 $((--OSD_TO_CREATE))`; do
324 device_name
=/dev
/$OSD_VG_NAME/$OSD_LV_NAME.
$id
325 CEPH_VOLUME
="$CEPHADM ceph-volume \
328 --keyring $TMPDIR/keyring.bootstrap.osd --"
331 $CEPH_VOLUME lvm prepare
--bluestore --data $device_name --no-systemd
332 $CEPH_VOLUME lvm
batch --no-auto $device_name --yes --no-systemd
334 # osd id and osd fsid
335 $CEPH_VOLUME lvm list
--format json
$device_name > $TMPDIR/osd.map
336 osd_id
=$
($SUDO cat $TMPDIR/osd.map | jq
-cr '.. | ."ceph.osd_id"? | select(.)')
337 osd_fsid
=$
($SUDO cat $TMPDIR/osd.map | jq
-cr '.. | ."ceph.osd_fsid"? | select(.)')
342 --arg name osd.
$osd_id \
343 --arg keyring
$TMPDIR/keyring.bootstrap.osd \
344 --arg config
"$CONFIG" \
345 --arg osd_fsid
$osd_fsid \
346 '{"fsid": $fsid, "name": $name, "params":{"keyring": $keyring, "config": $config, "osd_fsid": $osd_fsid}}' | \
347 $CEPHADM _orch deploy
353 --arg name node-exporter.a \
354 '{"fsid": $fsid, "name": $name}' | \
355 ${CEPHADM//--image $IMAGE_DEFAULT/} _orch deploy
356 cond
="curl 'http://localhost:9100' | grep -q 'Node Exporter'"
357 is_available
"node-exporter" "$cond" 10
362 --arg name prometheus.a \
363 --argjson config_blobs
"$(cat ${CEPHADM_SAMPLES_DIR}/prometheus.json)" \
364 '{"fsid": $fsid, "name": $name, "config_blobs": $config_blobs}' | \
365 ${CEPHADM//--image $IMAGE_DEFAULT/} _orch deploy
366 cond
="curl 'localhost:9095/api/v1/query?query=up'"
367 is_available
"prometheus" "$cond" 10
372 --arg name grafana.a \
373 --argjson config_blobs
"$(cat ${CEPHADM_SAMPLES_DIR}/grafana.json)" \
374 '{"fsid": $fsid, "name": $name, "config_blobs": $config_blobs}' | \
375 ${CEPHADM//--image $IMAGE_DEFAULT/} _orch deploy
376 cond
="curl --insecure 'https://localhost:3000' | grep -q 'grafana'"
377 is_available
"grafana" "$cond" 50
381 nfs_rados_pool
=$
(cat ${CEPHADM_SAMPLES_DIR}/nfs.json | jq
-r '.["pool"]')
382 $CEPHADM shell
--fsid $FSID --config $CONFIG --keyring $KEYRING -- \
383 ceph osd pool create
$nfs_rados_pool 64
384 $CEPHADM shell
--fsid $FSID --config $CONFIG --keyring $KEYRING -- \
385 rados
--pool nfs-ganesha
--namespace nfs-ns create conf-nfs.a
386 $CEPHADM shell
--fsid $FSID --config $CONFIG --keyring $KEYRING -- \
391 --arg keyring
"$KEYRING" \
392 --arg config
"$CONFIG" \
393 --argjson config_blobs
"$(cat ${CEPHADM_SAMPLES_DIR}/nfs.json)" \
394 '{"fsid": $fsid, "name": $name, "params": {"keyring": $keyring, "config": $config}, "config_blobs": $config_blobs}' | \
395 ${CEPHADM} _orch deploy
396 cond
="$SUDO ss -tlnp '( sport = :nfs )' | grep 'ganesha.nfsd'"
397 is_available
"nfs" "$cond" 10
398 $CEPHADM shell
--fsid $FSID --config $CONFIG --keyring $KEYRING -- \
401 # add alertmanager via custom container
402 alertmanager_image
=$
(cat ${CEPHADM_SAMPLES_DIR}/custom_container.json | jq
-r '.image')
403 tcp_ports
=$
(jq .ports
${CEPHADM_SAMPLES_DIR}/custom_container.json
)
406 --arg name container.alertmanager.a \
407 --arg keyring
$TMPDIR/keyring.bootstrap.osd \
408 --arg config
"$CONFIG" \
409 --arg image
"$alertmanager_image" \
410 --argjson tcp_ports
"${tcp_ports}" \
411 --argjson config_blobs
"$(cat ${CEPHADM_SAMPLES_DIR}/custom_container.json)" \
412 '{"fsid": $fsid, "name": $name, "image": $image, "params": {"keyring": $keyring, "config": $config, "tcp_ports": $tcp_ports}, "config_blobs": $config_blobs}' | \
413 ${CEPHADM//--image $IMAGE_DEFAULT/} _orch deploy
414 cond
="$CEPHADM enter --fsid $FSID --name container.alertmanager.a -- test -f \
415 /etc/alertmanager/alertmanager.yml"
416 is_available
"alertmanager.yml" "$cond" 10
417 cond
="curl 'http://localhost:9093' | grep -q 'Alertmanager'"
418 is_available
"alertmanager" "$cond" 10
424 $CEPHADM unit
--fsid $FSID --name mon.a
-- is-enabled
425 $CEPHADM unit
--fsid $FSID --name mon.a
-- is-active
426 expect_false
$CEPHADM unit
--fsid $FSID --name mon.xyz
-- is-active
427 $CEPHADM unit
--fsid $FSID --name mon.a
-- disable
428 expect_false
$CEPHADM unit
--fsid $FSID --name mon.a
-- is-enabled
429 $CEPHADM unit
--fsid $FSID --name mon.a
-- enable
430 $CEPHADM unit
--fsid $FSID --name mon.a
-- is-enabled
431 $CEPHADM unit
--fsid $FSID --name mon.a
-- status
432 $CEPHADM unit
--fsid $FSID --name mon.a
-- stop
433 expect_return_code
3 $CEPHADM unit
--fsid $FSID --name mon.a
-- status
434 $CEPHADM unit
--fsid $FSID --name mon.a
-- start
437 $CEPHADM shell
--fsid $FSID -- true
438 $CEPHADM shell
--fsid $FSID -- test -d /var
/log
/ceph
439 expect_false
$CEPHADM --timeout 10 shell
--fsid $FSID -- sleep 60
440 $CEPHADM --timeout 60 shell
--fsid $FSID -- sleep 10
441 $CEPHADM shell
--fsid $FSID --mount $TMPDIR $TMPDIR_TEST_MULTIPLE_MOUNTS -- stat
/mnt
/$
(basename $TMPDIR)
444 expect_false
$CEPHADM enter
445 $CEPHADM enter
--fsid $FSID --name mon.a
-- test -d /var
/lib
/ceph
/mon
/ceph-a
446 $CEPHADM enter
--fsid $FSID --name mgr.x
-- test -d /var
/lib
/ceph
/mgr
/ceph-x
447 $CEPHADM enter
--fsid $FSID --name mon.a
-- pidof ceph-mon
448 expect_false
$CEPHADM enter
--fsid $FSID --name mgr.x
-- pidof ceph-mon
449 $CEPHADM enter
--fsid $FSID --name mgr.x
-- pidof ceph-mgr
450 # this triggers a bug in older versions of podman, including 18.04's 1.6.2
451 #expect_false $CEPHADM --timeout 5 enter --fsid $FSID --name mon.a -- sleep 30
452 $CEPHADM --timeout 60 enter
--fsid $FSID --name mon.a
-- sleep 10
455 $CEPHADM ceph-volume
--fsid $FSID -- inventory
--format=json \
458 ## preserve test state
459 [ $CLEANUP = false
] && exit 0
462 # mon and osd require --force
463 expect_false
$CEPHADM rm-daemon
--fsid $FSID --name mon.a
465 $CEPHADM rm-daemon
--fsid $FSID --name mgr.x
467 expect_false
$CEPHADM zap-osds
--fsid $FSID
468 $CEPHADM zap-osds
--fsid $FSID --force
471 expect_false
$CEPHADM rm-cluster
--fsid $FSID --zap-osds
472 $CEPHADM rm-cluster
--fsid $FSID --force --zap-osds