3 # rbd_mirror_helpers.sh - shared rbd-mirror daemon helper functions
5 # The scripts starts two ("local" and "remote") clusters using mstart.sh script,
6 # creates a temporary directory, used for cluster configs, daemon logs, admin
7 # socket, temporary files, and launches rbd-mirror daemon.
9 # There are several env variables useful when troubleshooting a test failure:
11 # RBD_MIRROR_NOCLEANUP - if not empty, don't run the cleanup (stop processes,
12 # destroy the clusters and remove the temp directory)
13 # on exit, so it is possible to check the test state
15 # RBD_MIRROR_TEMDIR - use this path when creating the temporary directory
16 # (should not exist) instead of running mktemp(1).
17 # RBD_MIRROR_ARGS - use this to pass additional arguments to started
19 # RBD_MIRROR_VARGS - use this to pass additional arguments to vstart.sh
20 # when starting clusters.
22 # The cleanup can be done as a separate step, running the script with
23 # `cleanup ${RBD_MIRROR_TEMDIR}' arguments.
25 # Note, as other workunits tests, rbd_mirror.sh expects to find ceph binaries
28 # Thus a typical troubleshooting session:
30 # From Ceph src dir (CEPH_SRC_PATH), start the test in NOCLEANUP mode and with
31 # TEMPDIR pointing to a known location:
34 # PATH=$CEPH_SRC_PATH:$PATH
35 # RBD_MIRROR_NOCLEANUP=1 RBD_MIRROR_TEMDIR=/tmp/tmp.rbd_mirror \
36 # ../qa/workunits/rbd/rbd_mirror.sh
38 # After the test failure cd to TEMPDIR and check the current state:
40 # cd /tmp/tmp.rbd_mirror
42 # less rbd-mirror.cluster1_daemon.$pid.log
43 # ceph --cluster cluster1 -s
44 # ceph --cluster cluster1 -s
45 # rbd --cluster cluster2 -p mirror ls
46 # rbd --cluster cluster2 -p mirror journal status --image test
47 # ceph --admin-daemon rbd-mirror.cluster1_daemon.cluster1.$pid.asok help
50 # Also you can execute commands (functions) from the script:
53 # export RBD_MIRROR_TEMDIR=/tmp/tmp.rbd_mirror
54 # ../qa/workunits/rbd/rbd_mirror.sh status
55 # ../qa/workunits/rbd/rbd_mirror.sh stop_mirror cluster1
56 # ../qa/workunits/rbd/rbd_mirror.sh start_mirror cluster2
57 # ../qa/workunits/rbd/rbd_mirror.sh flush cluster2
60 # Eventually, run the cleanup:
63 # RBD_MIRROR_TEMDIR=/tmp/tmp.rbd_mirror \
64 # ../qa/workunits/rbd/rbd_mirror.sh cleanup
70 PARENT_POOL
=mirror_parent
73 export CEPH_ARGS
="--id ${USER_ID}"
75 CEPH_ROOT
=$
(readlink
-f $
(dirname $0)/..
/..
/..
/src
)
78 if [ -e CMakeCache.txt
]; then
83 # needed for ceph CLI under cmake
84 export LD_LIBRARY_PATH
=${CEPH_ROOT}/lib
:${LD_LIBRARY_PATH}
85 export PYTHONPATH
=${PYTHONPATH}:${CEPH_SRC}/pybind
86 for x
in ${CEPH_ROOT}/lib
/cython_modules
/lib
* ; do
87 export PYTHONPATH
="${PYTHONPATH}:${x}"
91 # These vars facilitate running this script in an environment with
92 # ceph installed from packages, like teuthology. These are not defined
95 # RBD_MIRROR_USE_EXISTING_CLUSTER - if set, do not start and stop ceph clusters
96 # RBD_MIRROR_USE_RBD_MIRROR - if set, use an existing instance of rbd-mirror
97 # running as ceph client $CEPH_ID. If empty,
98 # this script will start and stop rbd-mirror
104 # Parse a value in format cluster[:instance] and set cluster and instance vars.
105 set_cluster_instance
()
108 local cluster_var_name
=$2
109 local instance_var_name
=$3
114 if [ "${instance}" = "${val}" ]; then
115 # instance was not specified, use default
119 eval ${cluster_var_name}=${cluster}
120 eval ${instance_var_name}=${instance}
125 local local_cluster
=$1
129 set_cluster_instance
"${local_cluster}" local_cluster instance
131 if [ -n "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then
132 echo $
(ceph-conf
--cluster $local_cluster --name "client.${CEPH_ID}" 'admin socket')
134 echo "${TEMPDIR}/rbd-mirror.${local_cluster}_daemon.${instance}.${cluster}.asok"
143 set_cluster_instance
"${cluster}" cluster instance
145 if [ -n "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then
146 echo $
(ceph-conf
--cluster $cluster --name "client.${CEPH_ID}" 'pid file')
148 echo "${TEMPDIR}/rbd-mirror.${cluster}_daemon.${instance}.pid"
154 echo $
(date '+%F %T') $@ |
tee -a "${TEMPDIR}/rbd-mirror.test.log" >&2
159 local expected
="$1" ; shift
160 local out
=${TEMPDIR}/expect_failure.out
162 if "$@" > ${out} 2>&1 ; then
167 if [ -z "${expected}" ]; then
171 if ! grep -q "${expected}" ${out} ; then
182 trap cleanup INT TERM EXIT
184 if [ -n "${RBD_MIRROR_TEMDIR}" ]; then
185 test -d "${RBD_MIRROR_TEMDIR}" ||
186 mkdir
"${RBD_MIRROR_TEMDIR}"
187 TEMPDIR
="${RBD_MIRROR_TEMDIR}"
193 if [ -z "${RBD_MIRROR_USE_EXISTING_CLUSTER}" ]; then
195 CEPH_ARGS
='' ${CEPH_SRC}/mstart.sh ${CLUSTER1} -n ${RBD_MIRROR_VARGS}
196 CEPH_ARGS
='' ${CEPH_SRC}/mstart.sh ${CLUSTER2} -n ${RBD_MIRROR_VARGS}
198 CEPH_ARGS
='' ceph
--conf run
/${CLUSTER1}/ceph.conf \
199 auth get-or-create client.
${USER_ID} mon
'profile rbd' osd
'profile rbd' >> \
200 run
/${CLUSTER1}/keyring
201 CEPH_ARGS
='' ceph
--conf run
/${CLUSTER2}/ceph.conf \
202 auth get-or-create client.
${USER_ID} mon
'profile rbd' osd
'profile rbd' >> \
203 run
/${CLUSTER2}/keyring
205 rm -f ${TEMPDIR}/${CLUSTER1}.conf
206 ln -s $
(readlink
-f run
/${CLUSTER1}/ceph.conf
) \
207 ${TEMPDIR}/${CLUSTER1}.conf
208 rm -f ${TEMPDIR}/${CLUSTER2}.conf
209 ln -s $
(readlink
-f run
/${CLUSTER2}/ceph.conf
) \
210 ${TEMPDIR}/${CLUSTER2}.conf
215 CEPH_ARGS
='' ceph
--cluster ${CLUSTER1} osd pool create
${POOL} 64 64
216 CEPH_ARGS
='' ceph
--cluster ${CLUSTER1} osd pool create
${PARENT_POOL} 64 64
217 CEPH_ARGS
='' ceph
--cluster ${CLUSTER2} osd pool create
${PARENT_POOL} 64 64
218 CEPH_ARGS
='' ceph
--cluster ${CLUSTER2} osd pool create
${POOL} 64 64
220 CEPH_ARGS
='' rbd
--cluster ${CLUSTER1} pool init
${POOL}
221 CEPH_ARGS
='' rbd
--cluster ${CLUSTER2} pool init
${POOL}
222 CEPH_ARGS
='' rbd
--cluster ${CLUSTER1} pool init
${PARENT_POOL}
223 CEPH_ARGS
='' rbd
--cluster ${CLUSTER2} pool init
${PARENT_POOL}
225 rbd
--cluster ${CLUSTER1} mirror pool
enable ${POOL} pool
226 rbd
--cluster ${CLUSTER2} mirror pool
enable ${POOL} pool
227 rbd
--cluster ${CLUSTER1} mirror pool
enable ${PARENT_POOL} image
228 rbd
--cluster ${CLUSTER2} mirror pool
enable ${PARENT_POOL} image
230 rbd
--cluster ${CLUSTER1} mirror pool peer add ${POOL} ${CLUSTER2}
231 rbd
--cluster ${CLUSTER2} mirror pool peer add ${POOL} ${CLUSTER1}
232 rbd
--cluster ${CLUSTER1} mirror pool peer add ${PARENT_POOL} ${CLUSTER2}
233 rbd
--cluster ${CLUSTER2} mirror pool peer add ${PARENT_POOL} ${CLUSTER1}
238 test -n "${RBD_MIRROR_NOCLEANUP}" && return
239 local cluster instance
243 for cluster
in "${CLUSTER1}" "${CLUSTER2}"; do
244 for instance
in `seq 0 9`; do
245 stop_mirror
"${cluster}:${instance}"
249 if [ -z "${RBD_MIRROR_USE_EXISTING_CLUSTER}" ]; then
251 CEPH_ARGS
='' ${CEPH_SRC}/mstop.sh
${CLUSTER1}
252 CEPH_ARGS
='' ${CEPH_SRC}/mstop.sh
${CLUSTER2}
254 CEPH_ARGS
='' ceph
--cluster ${CLUSTER1} osd pool rm ${POOL} ${POOL} --yes-i-really-really-mean-it
255 CEPH_ARGS
='' ceph
--cluster ${CLUSTER2} osd pool rm ${POOL} ${POOL} --yes-i-really-really-mean-it
256 CEPH_ARGS
='' ceph
--cluster ${CLUSTER1} osd pool rm ${PARENT_POOL} ${PARENT_POOL} --yes-i-really-really-mean-it
257 CEPH_ARGS
='' ceph
--cluster ${CLUSTER2} osd pool rm ${PARENT_POOL} ${PARENT_POOL} --yes-i-really-really-mean-it
259 test "${RBD_MIRROR_TEMDIR}" = "${TEMPDIR}" ||
268 set_cluster_instance
"${cluster}" cluster instance
270 test -n "${RBD_MIRROR_USE_RBD_MIRROR}" && return
273 --cluster ${cluster} \
275 --pid-file=$
(daemon_pid_file
"${cluster}:${instance}") \
276 --log-file=${TEMPDIR}/rbd-mirror.${cluster}_daemon.${instance}.log \
277 --admin-socket=${TEMPDIR}/rbd-mirror.${cluster}_daemon.${instance}.\
$cluster.asok \
278 --rbd-mirror-delete-retry-interval=5 \
279 --rbd-mirror-image-state-check-interval=5 \
280 --rbd-mirror-journal-poll-age=1 \
281 --rbd-mirror-pool-replayers-refresh-interval=5 \
282 --debug-rbd=30 --debug-journaler=30 \
283 --debug-rbd_mirror=30 \
293 test -n "${RBD_MIRROR_USE_RBD_MIRROR}" && return
296 pid
=$
(cat $
(daemon_pid_file
"${cluster}") 2>/dev
/null
) ||
:
300 for s
in 1 2 4 8 16 32; do
302 ps auxww |
awk -v pid
=${pid} '$2 == pid {print; exit 1}' && break
304 ps auxww |
awk -v pid
=${pid} '$2 == pid {print; exit 1}'
306 rm -f $
(daemon_asok_file
"${cluster}" "${CLUSTER1}")
307 rm -f $
(daemon_asok_file
"${cluster}" "${CLUSTER2}")
308 rm -f $
(daemon_pid_file
"${cluster}")
313 local cluster
=$1 ; shift
316 set_cluster_instance
"${cluster}" cluster instance
318 local asok_file
=$
(daemon_asok_file
"${cluster}:${instance}" "${cluster}")
319 test -S "${asok_file}"
321 ceph
--admin-daemon ${asok_file} $@
326 local cluster daemon image_pool image
328 for cluster
in ${CLUSTER1} ${CLUSTER2}
330 echo "${cluster} status"
331 ceph
--cluster ${cluster} -s
334 for image_pool
in ${POOL} ${PARENT_POOL}
336 echo "${cluster} ${image_pool} images"
337 rbd
--cluster ${cluster} -p ${image_pool} ls
340 echo "${cluster} ${image_pool} mirror pool status"
341 rbd
--cluster ${cluster} -p ${image_pool} mirror pool status
--verbose
344 for image
in `rbd --cluster ${cluster} -p ${image_pool} ls 2>/dev/null`
346 echo "image ${image} info"
347 rbd
--cluster ${cluster} -p ${image_pool} info ${image}
349 echo "image ${image} journal status"
350 rbd
--cluster ${cluster} -p ${image_pool} journal status --image ${image}
358 for cluster
in "${CLUSTER1}" "${CLUSTER2}"
360 local pid_file
=$
(daemon_pid_file
${cluster} )
361 if [ ! -e ${pid_file} ]
363 echo "${cluster} rbd-mirror not running or unknown" \
364 "(${pid_file} not exist)"
369 pid
=$
(cat ${pid_file} 2>/dev
/null
) ||
:
372 echo "${cluster} rbd-mirror not running or unknown" \
373 "(can't find pid using ${pid_file})"
378 echo "${daemon} rbd-mirror process in ps output:"
380 awk -v pid
=${pid} 'NR == 1 {print} $2 == pid {print; exit 1}'
383 echo "${cluster} rbd-mirror not running" \
384 "(can't find pid $pid in ps output)"
390 local asok_file
=$
(daemon_asok_file
${cluster} ${cluster})
391 if [ ! -S "${asok_file}" ]
393 echo "${cluster} rbd-mirror asok is unknown (${asok_file} not exits)"
398 echo "${cluster} rbd-mirror status"
399 ceph
--admin-daemon ${asok_file} rbd mirror status
411 local cmd
="rbd mirror flush"
415 cmd
="${cmd} ${pool}/${image}"
418 admin_daemon
"${cluster}" ${cmd}
421 test_image_replay_state
()
427 local current_state
=stopped
429 admin_daemon
"${cluster}" help |
430 fgrep
"\"rbd mirror status ${pool}/${image}\"" &&
431 admin_daemon
"${cluster}" rbd mirror status ${pool}/${image} |
432 grep -i 'state.*Replaying' &&
433 current_state=started
435 test "${test_state}" = "${current_state}"
438 wait_for_image_replay_state()
446 # TODO: add a way to force rbd-mirror to update replayers
447 for s in 1 2 4 8 8 8 8 8 8 8 8 16 16; do
449 test_image_replay_state "${cluster}" "${pool}" "${image}" "${state}" && return 0
454 wait_for_image_replay_started()
460 wait_for_image_replay_state "${cluster}" "${pool}" "${image}" started
463 wait_for_image_replay_stopped()
469 wait_for_image_replay_state "${cluster}" "${pool}" "${image}" stopped
479 # Parse line like below, looking for the first position
480 # [id=, commit_position=[positions=[[object_number=1, tag_tid=3, entry_tid=9], [object_number=0, tag_tid=3, entry_tid=8], [object_number=3, tag_tid=3, entry_tid=7], [object_number=2, tag_tid=3, entry_tid=6]]]]
482 local status_log=${TEMPDIR}/${CLUSTER2}-${pool}-${image}.status
483 rbd --cluster ${cluster} -p ${pool} journal status --image ${image} |
484 tee ${status_log} >&2
485 sed -nEe 's/^.*\[id='"${id_regexp}"',.*positions=\[\[([^]]*)\],.*state=connected.*$/\1/p' \
489 get_master_position()
495 get_position "${cluster}" "${pool}" "${image}" ''
498 get_mirror_position()
504 get_position "${cluster}" "${pool}" "${image}" '..*'
507 wait_for_replay_complete()
509 local local_cluster=$1
513 local s master_pos mirror_pos last_mirror_pos
514 local master_tag master_entry mirror_tag mirror_entry
517 for s in 0.2 0.4 0.8 1.6 2 2 4 4 8 8 16 16 32 32; do
519 flush "${local_cluster}" "${pool}" "${image}"
520 master_pos=$(get_master_position "${cluster}" "${pool}" "${image}")
521 mirror_pos=$(get_mirror_position "${cluster}" "${pool}" "${image}")
522 test -n "${master_pos}" -a "${master_pos}" = "${mirror_pos}" && return 0
523 test "${mirror_pos}" != "${last_mirror_pos}" && break
526 test "${mirror_pos}" = "${last_mirror_pos}" && return 1
527 last_mirror_pos="${mirror_pos}"
529 # handle the case where the mirror is ahead of the master
530 master_tag=$(echo "${master_pos}" | grep -Eo "tag_tid
=[0-9]*" | cut -d'=' -f 2)
531 mirror_tag=$(echo "${mirror_pos}" | grep -Eo "tag_tid
=[0-9]*" | cut -d'=' -f 2)
532 master_entry=$(echo "${master_pos}" | grep -Eo "entry_tid
=[0-9]*" | cut -d'=' -f 2)
533 mirror_entry=$(echo "${mirror_pos}" | grep -Eo "entry_tid
=[0-9]*" | cut -d'=' -f 2)
534 test "${master_tag}" = "${mirror_tag}" -a ${master_entry} -le ${mirror_entry} && return 0
539 test_status_in_pool_dir
()
544 local state_pattern
=$4
545 local description_pattern
=$5
547 local status_log
=${TEMPDIR}/${cluster}-${image}.mirror_status
548 rbd
--cluster ${cluster} -p ${pool} mirror image status ${image} |
549 tee ${status_log} >&2
550 grep "state: .*${state_pattern}" ${status_log} ||
return 1
551 grep "description: .*${description_pattern}" ${status_log} ||
return 1
554 wait_for_status_in_pool_dir
()
559 local state_pattern
=$4
560 local description_pattern
=$5
562 for s
in 1 2 4 8 8 8 8 8 8 8 8 16 16; do
564 test_status_in_pool_dir
${cluster} ${pool} ${image} ${state_pattern} ${description_pattern} && return 0
571 local cluster
=$1 ; shift
572 local pool
=$1 ; shift
573 local image
=$1 ; shift
581 rbd
--cluster ${cluster} -p ${pool} create --size ${size} \
582 --image-feature layering
,exclusive-lock
,journaling $@
${image}
593 rbd
--cluster ${cluster} -p ${pool} image-meta set ${image} $key $val
602 rbd
--cluster=${cluster} -p ${pool} snap purge ${image}
603 rbd
--cluster=${cluster} -p ${pool} rm ${image}
612 for s
in 1 2 4 8 16 32; do
613 remove_image
${cluster} ${pool} ${image} && return 0
623 local parent_image
=$3
628 rbd
--cluster ${cluster} clone ${parent_pool}/${parent_image}@${parent_snap} \
629 ${clone_pool}/${clone_image} --image-feature layering
,exclusive-lock
,journaling
638 rbd
--cluster ${cluster} -p ${pool} journal client disconnect \
649 rbd
--cluster ${cluster} -p ${pool} snap create ${image}@${snap}
659 rbd
--cluster ${cluster} -p ${pool} snap rm ${image}@${snap}
670 rbd
--cluster ${cluster} -p ${pool} snap rename ${image}@${snap} ${image}@${new_snap}
679 rbd
--cluster ${cluster} -p ${pool} snap purge ${image}
689 rbd
--cluster ${cluster} -p ${pool} snap protect ${image}@${snap}
699 rbd
--cluster ${cluster} -p ${pool} snap unprotect ${image}@${snap}
702 wait_for_snap_present
()
710 for s
in 1 2 4 8 8 8 8 8 8 8 8 16 16 16 16 32 32 32 32; do
712 rbd
--cluster ${cluster} -p ${pool} info ${image}@${snap_name} ||
continue
726 test -n "${size}" || size
=4096
728 rbd
--cluster ${cluster} -p ${pool} bench ${image} --io-type write \
729 --io-size ${size} --io-threads 1 --io-total $
((size
* count
)) \
738 local duration
=$
(awk 'BEGIN {srand(); print int(10 * rand()) + 5}')
740 timeout
${duration}s ceph_test_rbd_mirror_random_write \
741 --cluster ${cluster} ${pool} ${image} \
742 --debug-rbd=20 --debug-journaler=20 \
743 2> ${TEMPDIR}/rbd-mirror-random-write.log || true
751 local rmt_export
=${TEMPDIR}/${CLUSTER2}-${pool}-${image}.
export
752 local loc_export
=${TEMPDIR}/${CLUSTER1}-${pool}-${image}.
export
754 rm -f ${rmt_export} ${loc_export}
755 rbd
--cluster ${CLUSTER2} -p ${pool} export ${image} ${rmt_export}
756 rbd
--cluster ${CLUSTER1} -p ${pool} export ${image} ${loc_export}
757 cmp ${rmt_export} ${loc_export}
758 rm -f ${rmt_export} ${loc_export}
767 rbd
--cluster=${cluster} mirror image demote ${pool}/${image}
777 rbd
--cluster=${cluster} mirror image promote ${pool}/${image} ${force}
780 set_pool_mirror_mode
()
786 rbd
--cluster=${cluster} -p ${pool} mirror pool enable ${mode}
795 rbd
--cluster=${cluster} mirror image disable ${pool}/${image}
804 rbd
--cluster=${cluster} mirror image enable ${pool}/${image}
814 local current_state
=deleted
815 local current_image_id
817 current_image_id
=$
(get_image_id
${cluster} ${pool} ${image})
818 test -n "${current_image_id}" &&
819 test -z "${image_id}" -o "${image_id}" = "${current_image_id}" &&
820 current_state
=present
822 test "${test_state}" = "${current_state}"
825 wait_for_image_present
()
834 test -n "${image_id}" ||
835 image_id
=$
(get_image_id
${cluster} ${pool} ${image})
837 # TODO: add a way to force rbd-mirror to update replayers
838 for s
in 0.1 1 2 4 8 8 8 8 8 8 8 8 16 16 32 32; do
841 "${cluster}" "${pool}" "${image}" "${state}" "${image_id}" &&
853 rbd
--cluster=${cluster} -p ${pool} info ${image} |
854 sed -ne 's/^.*block_name_prefix: rbd_data\.//p'
857 request_resync_image
()
862 local image_id_var_name
=$1
864 eval "${image_id_var_name}='$(get_image_id ${cluster} ${pool} ${image})'"
865 eval 'test -n "$'${image_id_var_name}'"'
867 rbd
--cluster=${cluster} -p ${pool} mirror image resync ${image}
876 if [ -z "${RBD_MIRROR_TEMDIR}" ]
878 echo "RBD_MIRROR_TEMDIR is not set" >&2
882 TEMPDIR
="${RBD_MIRROR_TEMDIR}"