]> git.proxmox.com Git - ceph.git/blob - ceph/qa/workunits/rbd/rbd_mirror_helpers.sh
5a214689d244447fba9d4721cb38c6abab752dc4
[ceph.git] / ceph / qa / workunits / rbd / rbd_mirror_helpers.sh
1 #!/bin/sh
2 #
3 # rbd_mirror_helpers.sh - shared rbd-mirror daemon helper functions
4 #
5 # The scripts starts two ("local" and "remote") clusters using mstart.sh script,
6 # creates a temporary directory, used for cluster configs, daemon logs, admin
7 # socket, temporary files, and launches rbd-mirror daemon.
8 #
9 # There are several env variables useful when troubleshooting a test failure:
10 #
11 # RBD_MIRROR_NOCLEANUP - if not empty, don't run the cleanup (stop processes,
12 # destroy the clusters and remove the temp directory)
13 # on exit, so it is possible to check the test state
14 # after failure.
15 # RBD_MIRROR_TEMDIR - use this path when creating the temporary directory
16 # (should not exist) instead of running mktemp(1).
17 # RBD_MIRROR_ARGS - use this to pass additional arguments to started
18 # rbd-mirror daemons.
19 # RBD_MIRROR_VARGS - use this to pass additional arguments to vstart.sh
20 # when starting clusters.
21 # RBD_MIRROR_INSTANCES - number of daemons to start per cluster
22 # RBD_MIRROR_CONFIG_KEY - if not empty, use config-key for remote cluster
23 # secrets
24 # The cleanup can be done as a separate step, running the script with
25 # `cleanup ${RBD_MIRROR_TEMDIR}' arguments.
26 #
27 # Note, as other workunits tests, rbd_mirror_journal.sh expects to find ceph binaries
28 # in PATH.
29 #
30 # Thus a typical troubleshooting session:
31 #
32 # From Ceph src dir (CEPH_SRC_PATH), start the test in NOCLEANUP mode and with
33 # TEMPDIR pointing to a known location:
34 #
35 # cd $CEPH_SRC_PATH
36 # PATH=$CEPH_SRC_PATH:$PATH
37 # RBD_MIRROR_NOCLEANUP=1 RBD_MIRROR_TEMDIR=/tmp/tmp.rbd_mirror \
38 # ../qa/workunits/rbd/rbd_mirror_journal.sh
39 #
40 # After the test failure cd to TEMPDIR and check the current state:
41 #
42 # cd /tmp/tmp.rbd_mirror
43 # ls
44 # less rbd-mirror.cluster1_daemon.$pid.log
45 # ceph --cluster cluster1 -s
46 # ceph --cluster cluster1 -s
47 # rbd --cluster cluster2 -p mirror ls
48 # rbd --cluster cluster2 -p mirror journal status --image test
49 # ceph --admin-daemon rbd-mirror.cluster1_daemon.cluster1.$pid.asok help
50 # ...
51 #
52 # Also you can execute commands (functions) from the script:
53 #
54 # cd $CEPH_SRC_PATH
55 # export RBD_MIRROR_TEMDIR=/tmp/tmp.rbd_mirror
56 # ../qa/workunits/rbd/rbd_mirror_journal.sh status
57 # ../qa/workunits/rbd/rbd_mirror_journal.sh stop_mirror cluster1
58 # ../qa/workunits/rbd/rbd_mirror_journal.sh start_mirror cluster2
59 # ../qa/workunits/rbd/rbd_mirror_journal.sh flush cluster2
60 # ...
61 #
62 # Eventually, run the cleanup:
63 #
64 # cd $CEPH_SRC_PATH
65 # RBD_MIRROR_TEMDIR=/tmp/tmp.rbd_mirror \
66 # ../qa/workunits/rbd/rbd_mirror_journal.sh cleanup
67 #
68
69 if type xmlstarlet > /dev/null 2>&1; then
70 XMLSTARLET=xmlstarlet
71 elif type xml > /dev/null 2>&1; then
72 XMLSTARLET=xml
73 else
74 echo "Missing xmlstarlet binary!"
75 exit 1
76 fi
77
78 RBD_MIRROR_INSTANCES=${RBD_MIRROR_INSTANCES:-2}
79
80 CLUSTER1=cluster1
81 CLUSTER2=cluster2
82 PEER_CLUSTER_SUFFIX=
83 POOL=mirror
84 PARENT_POOL=mirror_parent
85 NS1=ns1
86 NS2=ns2
87 TEMPDIR=
88 CEPH_ID=${CEPH_ID:-mirror}
89 RBD_IMAGE_FEATURES=${RBD_IMAGE_FEATURES:-layering,exclusive-lock,journaling}
90 MIRROR_USER_ID_PREFIX=${MIRROR_USER_ID_PREFIX:-${CEPH_ID}.}
91 MIRROR_POOL_MODE=${MIRROR_POOL_MODE:-pool}
92 MIRROR_IMAGE_MODE=${MIRROR_IMAGE_MODE:-journal}
93
94 export CEPH_ARGS="--id ${CEPH_ID}"
95
96 LAST_MIRROR_INSTANCE=$((${RBD_MIRROR_INSTANCES} - 1))
97
98 CEPH_ROOT=$(readlink -f $(dirname $0)/../../../src)
99 CEPH_BIN=.
100 CEPH_SRC=.
101 if [ -e CMakeCache.txt ]; then
102 CEPH_SRC=${CEPH_ROOT}
103 CEPH_ROOT=${PWD}
104 CEPH_BIN=./bin
105
106 # needed for ceph CLI under cmake
107 export LD_LIBRARY_PATH=${CEPH_ROOT}/lib:${LD_LIBRARY_PATH}
108 export PYTHONPATH=${PYTHONPATH}:${CEPH_SRC}/pybind:${CEPH_ROOT}/lib/cython_modules/lib.3
109 fi
110
111 # These vars facilitate running this script in an environment with
112 # ceph installed from packages, like teuthology. These are not defined
113 # by default.
114 #
115 # RBD_MIRROR_USE_EXISTING_CLUSTER - if set, do not start and stop ceph clusters
116 # RBD_MIRROR_USE_RBD_MIRROR - if set, use an existing instance of rbd-mirror
117 # running as ceph client $CEPH_ID. If empty,
118 # this script will start and stop rbd-mirror
119
120 #
121 # Functions
122 #
123
124 # Parse a value in format cluster[:instance] and set cluster and instance vars.
125 set_cluster_instance()
126 {
127 local val=$1
128 local cluster_var_name=$2
129 local instance_var_name=$3
130
131 cluster=${val%:*}
132 instance=${val##*:}
133
134 if [ "${instance}" = "${val}" ]; then
135 # instance was not specified, use default
136 instance=0
137 fi
138
139 eval ${cluster_var_name}=${cluster}
140 eval ${instance_var_name}=${instance}
141 }
142
143 daemon_asok_file()
144 {
145 local local_cluster=$1
146 local cluster=$2
147 local instance
148
149 set_cluster_instance "${local_cluster}" local_cluster instance
150
151 echo $(ceph-conf --cluster $local_cluster --name "client.${MIRROR_USER_ID_PREFIX}${instance}" 'admin socket')
152 }
153
154 daemon_pid_file()
155 {
156 local cluster=$1
157 local instance
158
159 set_cluster_instance "${cluster}" cluster instance
160
161 echo $(ceph-conf --cluster $cluster --name "client.${MIRROR_USER_ID_PREFIX}${instance}" 'pid file')
162 }
163
164 testlog()
165 {
166 echo $(date '+%F %T') $@ | tee -a "${TEMPDIR}/rbd-mirror.test.log" >&2
167 }
168
169 expect_failure()
170 {
171 local expected="$1" ; shift
172 local out=${TEMPDIR}/expect_failure.out
173
174 if "$@" > ${out} 2>&1 ; then
175 cat ${out} >&2
176 return 1
177 fi
178
179 if [ -z "${expected}" ]; then
180 return 0
181 fi
182
183 if ! grep -q "${expected}" ${out} ; then
184 cat ${out} >&2
185 return 1
186 fi
187
188 return 0
189 }
190
191 mkfname()
192 {
193 echo "$@" | sed -e 's|[/ ]|_|g'
194 }
195
196 create_users()
197 {
198 local cluster=$1
199
200 CEPH_ARGS='' ceph --cluster "${cluster}" \
201 auth get-or-create client.${CEPH_ID} \
202 mon 'profile rbd' osd 'profile rbd' mgr 'profile rbd' >> \
203 ${CEPH_ROOT}/run/${cluster}/keyring
204 for instance in `seq 0 ${LAST_MIRROR_INSTANCE}`; do
205 CEPH_ARGS='' ceph --cluster "${cluster}" \
206 auth get-or-create client.${MIRROR_USER_ID_PREFIX}${instance} \
207 mon 'profile rbd-mirror' osd 'profile rbd' mgr 'profile rbd' >> \
208 ${CEPH_ROOT}/run/${cluster}/keyring
209 done
210 }
211
212 setup_cluster()
213 {
214 local cluster=$1
215
216 CEPH_ARGS='' ${CEPH_SRC}/mstart.sh ${cluster} -n ${RBD_MIRROR_VARGS}
217
218 cd ${CEPH_ROOT}
219 rm -f ${TEMPDIR}/${cluster}.conf
220 ln -s $(readlink -f run/${cluster}/ceph.conf) \
221 ${TEMPDIR}/${cluster}.conf
222
223 cd ${TEMPDIR}
224 create_users "${cluster}"
225
226 for instance in `seq 0 ${LAST_MIRROR_INSTANCE}`; do
227 cat<<EOF >> ${TEMPDIR}/${cluster}.conf
228 [client.${MIRROR_USER_ID_PREFIX}${instance}]
229 admin socket = ${TEMPDIR}/rbd-mirror.\$cluster-\$name.asok
230 pid file = ${TEMPDIR}/rbd-mirror.\$cluster-\$name.pid
231 log file = ${TEMPDIR}/rbd-mirror.${cluster}_daemon.${instance}.log
232 EOF
233 done
234 }
235
236 peer_add()
237 {
238 local cluster=$1 ; shift
239 local pool=$1 ; shift
240 local client_cluster=$1 ; shift
241 local remote_cluster="${client_cluster##*@}"
242
243 local uuid_var_name
244 if [ -n "$1" ]; then
245 uuid_var_name=$1 ; shift
246 fi
247
248 local error_code
249 local peer_uuid
250
251 for s in 1 2 4 8 16 32; do
252 set +e
253 peer_uuid=$(rbd --cluster ${cluster} mirror pool peer add \
254 ${pool} ${client_cluster} $@)
255 error_code=$?
256 set -e
257
258 if [ $error_code -eq 17 ]; then
259 # raced with a remote heartbeat ping -- remove and retry
260 sleep $s
261 peer_uuid=$(rbd mirror pool info --cluster ${cluster} --pool ${pool} --format xml | \
262 xmlstarlet sel -t -v "//peers/peer[site_name='${remote_cluster}']/uuid")
263
264 CEPH_ARGS='' rbd --cluster ${cluster} --pool ${pool} mirror pool peer remove ${peer_uuid}
265 else
266 test $error_code -eq 0
267 if [ -n "$uuid_var_name" ]; then
268 eval ${uuid_var_name}=${peer_uuid}
269 fi
270 return 0
271 fi
272 done
273
274 return 1
275 }
276
277 setup_pools()
278 {
279 local cluster=$1
280 local remote_cluster=$2
281 local mon_map_file
282 local mon_addr
283 local admin_key_file
284 local uuid
285
286 CEPH_ARGS='' ceph --cluster ${cluster} osd pool create ${POOL} 64 64
287 CEPH_ARGS='' ceph --cluster ${cluster} osd pool create ${PARENT_POOL} 64 64
288
289 CEPH_ARGS='' rbd --cluster ${cluster} pool init ${POOL}
290 CEPH_ARGS='' rbd --cluster ${cluster} pool init ${PARENT_POOL}
291
292 if [ -n "${RBD_MIRROR_CONFIG_KEY}" ]; then
293 PEER_CLUSTER_SUFFIX=-DNE
294 fi
295
296 CEPH_ARGS='' rbd --cluster ${cluster} mirror pool enable \
297 --site-name ${cluster}${PEER_CLUSTER_SUFFIX} ${POOL} ${MIRROR_POOL_MODE}
298 rbd --cluster ${cluster} mirror pool enable ${PARENT_POOL} image
299
300 rbd --cluster ${cluster} namespace create ${POOL}/${NS1}
301 rbd --cluster ${cluster} namespace create ${POOL}/${NS2}
302
303 rbd --cluster ${cluster} mirror pool enable ${POOL}/${NS1} ${MIRROR_POOL_MODE}
304 rbd --cluster ${cluster} mirror pool enable ${POOL}/${NS2} image
305
306 if [ -z ${RBD_MIRROR_MANUAL_PEERS} ]; then
307 if [ -z ${RBD_MIRROR_CONFIG_KEY} ]; then
308 peer_add ${cluster} ${POOL} ${remote_cluster}
309 peer_add ${cluster} ${PARENT_POOL} ${remote_cluster}
310 else
311 mon_map_file=${TEMPDIR}/${remote_cluster}.monmap
312 CEPH_ARGS='' ceph --cluster ${remote_cluster} mon getmap > ${mon_map_file}
313 mon_addr=$(monmaptool --print ${mon_map_file} | grep -E 'mon\.' |
314 head -n 1 | sed -E 's/^[0-9]+: ([^ ]+).+$/\1/' | sed -E 's/\/[0-9]+//g')
315
316 admin_key_file=${TEMPDIR}/${remote_cluster}.client.${CEPH_ID}.key
317 CEPH_ARGS='' ceph --cluster ${remote_cluster} auth get-key client.${CEPH_ID} > ${admin_key_file}
318
319 CEPH_ARGS='' peer_add ${cluster} ${POOL} \
320 client.${CEPH_ID}@${remote_cluster}${PEER_CLUSTER_SUFFIX} '' \
321 --remote-mon-host "${mon_addr}" --remote-key-file ${admin_key_file}
322
323 peer_add ${cluster} ${PARENT_POOL} client.${CEPH_ID}@${remote_cluster}${PEER_CLUSTER_SUFFIX} uuid
324 CEPH_ARGS='' rbd --cluster ${cluster} mirror pool peer set ${PARENT_POOL} ${uuid} mon-host ${mon_addr}
325 CEPH_ARGS='' rbd --cluster ${cluster} mirror pool peer set ${PARENT_POOL} ${uuid} key-file ${admin_key_file}
326 fi
327 fi
328 }
329
330 setup_tempdir()
331 {
332 if [ -n "${RBD_MIRROR_TEMDIR}" ]; then
333 test -d "${RBD_MIRROR_TEMDIR}" ||
334 mkdir "${RBD_MIRROR_TEMDIR}"
335 TEMPDIR="${RBD_MIRROR_TEMDIR}"
336 cd ${TEMPDIR}
337 else
338 TEMPDIR=`mktemp -d`
339 fi
340 }
341
342 setup()
343 {
344 local c
345 trap 'cleanup $?' INT TERM EXIT
346
347 setup_tempdir
348 if [ -z "${RBD_MIRROR_USE_EXISTING_CLUSTER}" ]; then
349 setup_cluster "${CLUSTER1}"
350 setup_cluster "${CLUSTER2}"
351 fi
352
353 setup_pools "${CLUSTER1}" "${CLUSTER2}"
354 setup_pools "${CLUSTER2}" "${CLUSTER1}"
355
356 if [ -n "${RBD_MIRROR_MIN_COMPAT_CLIENT}" ]; then
357 CEPH_ARGS='' ceph --cluster ${CLUSTER1} osd \
358 set-require-min-compat-client ${RBD_MIRROR_MIN_COMPAT_CLIENT}
359 CEPH_ARGS='' ceph --cluster ${CLUSTER2} osd \
360 set-require-min-compat-client ${RBD_MIRROR_MIN_COMPAT_CLIENT}
361 fi
362 }
363
364 cleanup()
365 {
366 local error_code=$1
367
368 set +e
369
370 if [ "${error_code}" -ne 0 ]; then
371 status
372 fi
373
374 if [ -z "${RBD_MIRROR_NOCLEANUP}" ]; then
375 local cluster instance
376
377 CEPH_ARGS='' ceph --cluster ${CLUSTER1} osd pool rm ${POOL} ${POOL} --yes-i-really-really-mean-it
378 CEPH_ARGS='' ceph --cluster ${CLUSTER2} osd pool rm ${POOL} ${POOL} --yes-i-really-really-mean-it
379 CEPH_ARGS='' ceph --cluster ${CLUSTER1} osd pool rm ${PARENT_POOL} ${PARENT_POOL} --yes-i-really-really-mean-it
380 CEPH_ARGS='' ceph --cluster ${CLUSTER2} osd pool rm ${PARENT_POOL} ${PARENT_POOL} --yes-i-really-really-mean-it
381
382 for cluster in "${CLUSTER1}" "${CLUSTER2}"; do
383 stop_mirrors "${cluster}"
384 done
385
386 if [ -z "${RBD_MIRROR_USE_EXISTING_CLUSTER}" ]; then
387 cd ${CEPH_ROOT}
388 CEPH_ARGS='' ${CEPH_SRC}/mstop.sh ${CLUSTER1}
389 CEPH_ARGS='' ${CEPH_SRC}/mstop.sh ${CLUSTER2}
390 fi
391 test "${RBD_MIRROR_TEMDIR}" = "${TEMPDIR}" || rm -Rf ${TEMPDIR}
392 fi
393
394 if [ "${error_code}" -eq 0 ]; then
395 echo "OK"
396 else
397 echo "FAIL"
398 fi
399
400 exit ${error_code}
401 }
402
403 start_mirror()
404 {
405 local cluster=$1
406 local instance
407
408 set_cluster_instance "${cluster}" cluster instance
409
410 test -n "${RBD_MIRROR_USE_RBD_MIRROR}" && return
411
412 rbd-mirror \
413 --cluster ${cluster} \
414 --id ${MIRROR_USER_ID_PREFIX}${instance} \
415 --rbd-mirror-delete-retry-interval=5 \
416 --rbd-mirror-image-state-check-interval=5 \
417 --rbd-mirror-journal-poll-age=1 \
418 --rbd-mirror-pool-replayers-refresh-interval=5 \
419 --debug-rbd=30 --debug-journaler=30 \
420 --debug-rbd_mirror=30 \
421 --daemonize=true \
422 ${RBD_MIRROR_ARGS}
423 }
424
425 start_mirrors()
426 {
427 local cluster=$1
428
429 for instance in `seq 0 ${LAST_MIRROR_INSTANCE}`; do
430 start_mirror "${cluster}:${instance}"
431 done
432 }
433
434 stop_mirror()
435 {
436 local cluster=$1
437 local sig=$2
438
439 test -n "${RBD_MIRROR_USE_RBD_MIRROR}" && return
440
441 local pid
442 pid=$(cat $(daemon_pid_file "${cluster}") 2>/dev/null) || :
443 if [ -n "${pid}" ]
444 then
445 kill ${sig} ${pid}
446 for s in 1 2 4 8 16 32; do
447 sleep $s
448 ps auxww | awk -v pid=${pid} '$2 == pid {print; exit 1}' && break
449 done
450 ps auxww | awk -v pid=${pid} '$2 == pid {print; exit 1}'
451 fi
452 rm -f $(daemon_asok_file "${cluster}" "${CLUSTER1}")
453 rm -f $(daemon_asok_file "${cluster}" "${CLUSTER2}")
454 rm -f $(daemon_pid_file "${cluster}")
455 }
456
457 stop_mirrors()
458 {
459 local cluster=$1
460 local sig=$2
461
462 for instance in `seq 0 ${LAST_MIRROR_INSTANCE}`; do
463 stop_mirror "${cluster}:${instance}" "${sig}"
464 done
465 }
466
467 admin_daemon()
468 {
469 local cluster=$1 ; shift
470 local instance
471
472 set_cluster_instance "${cluster}" cluster instance
473
474 local asok_file=$(daemon_asok_file "${cluster}:${instance}" "${cluster}")
475 test -S "${asok_file}"
476
477 ceph --admin-daemon ${asok_file} $@
478 }
479
480 admin_daemons()
481 {
482 local cluster_instance=$1 ; shift
483 local cluster="${cluster_instance%:*}"
484 local instance="${cluster_instance##*:}"
485 local loop_instance
486
487 for s in 0 1 2 4 8 8 8 8 8 8 8 8 16 16; do
488 sleep ${s}
489 if [ "${instance}" != "${cluster_instance}" ]; then
490 admin_daemon "${cluster}:${instance}" $@ && return 0
491 else
492 for loop_instance in `seq 0 ${LAST_MIRROR_INSTANCE}`; do
493 admin_daemon "${cluster}:${loop_instance}" $@ && return 0
494 done
495 fi
496 done
497 return 1
498 }
499
500 all_admin_daemons()
501 {
502 local cluster=$1 ; shift
503
504 for instance in `seq 0 ${LAST_MIRROR_INSTANCE}`; do
505 admin_daemon "${cluster}:${instance}" $@
506 done
507 }
508
509 status()
510 {
511 local cluster daemon image_pool image_ns image
512
513 for cluster in ${CLUSTER1} ${CLUSTER2}
514 do
515 echo "${cluster} status"
516 CEPH_ARGS='' ceph --cluster ${cluster} -s
517 CEPH_ARGS='' ceph --cluster ${cluster} service dump
518 CEPH_ARGS='' ceph --cluster ${cluster} service status
519 echo
520
521 for image_pool in ${POOL} ${PARENT_POOL}
522 do
523 for image_ns in "" "${NS1}" "${NS2}"
524 do
525 echo "${cluster} ${image_pool} ${image_ns} images"
526 rbd --cluster ${cluster} -p ${image_pool} --namespace "${image_ns}" ls -l
527 echo
528
529 echo "${cluster} ${image_pool}${image_ns} mirror pool info"
530 rbd --cluster ${cluster} -p ${image_pool} --namespace "${image_ns}" mirror pool info
531 echo
532
533 echo "${cluster} ${image_pool}${image_ns} mirror pool status"
534 CEPH_ARGS='' rbd --cluster ${cluster} -p ${image_pool} --namespace "${image_ns}" mirror pool status --verbose
535 echo
536
537 for image in `rbd --cluster ${cluster} -p ${image_pool} --namespace "${image_ns}" ls 2>/dev/null`
538 do
539 echo "image ${image} info"
540 rbd --cluster ${cluster} -p ${image_pool} --namespace "${image_ns}" info ${image}
541 echo
542 echo "image ${image} journal status"
543 rbd --cluster ${cluster} -p ${image_pool} --namespace "${image_ns}" journal status --image ${image}
544 echo
545 done
546
547 echo "${cluster} ${image_pool} ${image_ns} rbd_mirroring omap vals"
548 rados --cluster ${cluster} -p ${image_pool} --namespace "${image_ns}" listomapvals rbd_mirroring
549 echo
550 done
551 done
552 done
553
554 local ret
555
556 for cluster in "${CLUSTER1}" "${CLUSTER2}"
557 do
558 for instance in `seq 0 ${LAST_MIRROR_INSTANCE}`; do
559 local pid_file=$(daemon_pid_file ${cluster}:${instance})
560 if [ ! -e ${pid_file} ]
561 then
562 echo "${cluster} rbd-mirror not running or unknown" \
563 "(${pid_file} not exist)"
564 continue
565 fi
566
567 local pid
568 pid=$(cat ${pid_file} 2>/dev/null) || :
569 if [ -z "${pid}" ]
570 then
571 echo "${cluster} rbd-mirror not running or unknown" \
572 "(can't find pid using ${pid_file})"
573 ret=1
574 continue
575 fi
576
577 echo "${daemon} rbd-mirror process in ps output:"
578 if ps auxww |
579 awk -v pid=${pid} 'NR == 1 {print} $2 == pid {print; exit 1}'
580 then
581 echo
582 echo "${cluster} rbd-mirror not running" \
583 "(can't find pid $pid in ps output)"
584 ret=1
585 continue
586 fi
587 echo
588
589 local asok_file=$(daemon_asok_file ${cluster}:${instance} ${cluster})
590 if [ ! -S "${asok_file}" ]
591 then
592 echo "${cluster} rbd-mirror asok is unknown (${asok_file} not exits)"
593 ret=1
594 continue
595 fi
596
597 echo "${cluster} rbd-mirror status"
598 ceph --admin-daemon ${asok_file} rbd mirror status
599 echo
600 done
601 done
602
603 return ${ret}
604 }
605
606 flush()
607 {
608 local cluster=$1
609 local pool=$2
610 local image=$3
611 local cmd="rbd mirror flush"
612
613 if [ -n "${image}" ]
614 then
615 cmd="${cmd} ${pool}/${image}"
616 fi
617
618 admin_daemons "${cluster}" ${cmd}
619 }
620
621 test_image_replay_state()
622 {
623 local cluster=$1
624 local pool=$2
625 local image=$3
626 local test_state=$4
627 local status_result
628 local current_state=stopped
629
630 status_result=$(admin_daemons "${cluster}" rbd mirror status ${pool}/${image} | grep -i 'state') || return 1
631 echo "${status_result}" | grep -i 'Replaying' && current_state=started
632 test "${test_state}" = "${current_state}"
633 }
634
635 wait_for_image_replay_state()
636 {
637 local cluster=$1
638 local pool=$2
639 local image=$3
640 local state=$4
641 local s
642
643 # TODO: add a way to force rbd-mirror to update replayers
644 for s in 1 2 4 8 8 8 8 8 8 8 8 16 16; do
645 sleep ${s}
646 test_image_replay_state "${cluster}" "${pool}" "${image}" "${state}" && return 0
647 done
648 return 1
649 }
650
651 wait_for_image_replay_started()
652 {
653 local cluster=$1
654 local pool=$2
655 local image=$3
656
657 wait_for_image_replay_state "${cluster}" "${pool}" "${image}" started
658 }
659
660 wait_for_image_replay_stopped()
661 {
662 local cluster=$1
663 local pool=$2
664 local image=$3
665
666 wait_for_image_replay_state "${cluster}" "${pool}" "${image}" stopped
667 }
668
669 get_journal_position()
670 {
671 local cluster=$1
672 local pool=$2
673 local image=$3
674 local id_regexp=$4
675
676 # Parse line like below, looking for the first position
677 # [id=, commit_position=[positions=[[object_number=1, tag_tid=3, entry_tid=9], [object_number=0, tag_tid=3, entry_tid=8], [object_number=3, tag_tid=3, entry_tid=7], [object_number=2, tag_tid=3, entry_tid=6]]]]
678
679 local status_log=${TEMPDIR}/$(mkfname ${CLUSTER2}-${pool}-${image}.status)
680 rbd --cluster ${cluster} journal status --image ${pool}/${image} |
681 tee ${status_log} >&2
682 sed -nEe 's/^.*\[id='"${id_regexp}"',.*positions=\[\[([^]]*)\],.*state=connected.*$/\1/p' \
683 ${status_log}
684 }
685
686 get_master_journal_position()
687 {
688 local cluster=$1
689 local pool=$2
690 local image=$3
691
692 get_journal_position "${cluster}" "${pool}" "${image}" ''
693 }
694
695 get_mirror_journal_position()
696 {
697 local cluster=$1
698 local pool=$2
699 local image=$3
700
701 get_journal_position "${cluster}" "${pool}" "${image}" '..*'
702 }
703
704 wait_for_journal_replay_complete()
705 {
706 local local_cluster=$1
707 local cluster=$2
708 local pool=$3
709 local image=$4
710 local s master_pos mirror_pos last_mirror_pos
711 local master_tag master_entry mirror_tag mirror_entry
712
713 while true; do
714 for s in 0.2 0.4 0.8 1.6 2 2 4 4 8 8 16 16 32 32; do
715 sleep ${s}
716 flush "${local_cluster}" "${pool}" "${image}"
717 master_pos=$(get_master_journal_position "${cluster}" "${pool}" "${image}")
718 mirror_pos=$(get_mirror_journal_position "${cluster}" "${pool}" "${image}")
719 test -n "${master_pos}" -a "${master_pos}" = "${mirror_pos}" && return 0
720 test "${mirror_pos}" != "${last_mirror_pos}" && break
721 done
722
723 test "${mirror_pos}" = "${last_mirror_pos}" && return 1
724 last_mirror_pos="${mirror_pos}"
725
726 # handle the case where the mirror is ahead of the master
727 master_tag=$(echo "${master_pos}" | grep -Eo "tag_tid=[0-9]*" | cut -d'=' -f 2)
728 mirror_tag=$(echo "${mirror_pos}" | grep -Eo "tag_tid=[0-9]*" | cut -d'=' -f 2)
729 master_entry=$(echo "${master_pos}" | grep -Eo "entry_tid=[0-9]*" | cut -d'=' -f 2)
730 mirror_entry=$(echo "${mirror_pos}" | grep -Eo "entry_tid=[0-9]*" | cut -d'=' -f 2)
731 test "${master_tag}" = "${mirror_tag}" -a ${master_entry} -le ${mirror_entry} && return 0
732 done
733 return 1
734 }
735
736 mirror_image_snapshot()
737 {
738 local cluster=$1
739 local pool=$2
740 local image=$3
741
742 rbd --cluster "${cluster}" mirror image snapshot "${pool}/${image}"
743 }
744
745 get_newest_mirror_snapshot()
746 {
747 local cluster=$1
748 local pool=$2
749 local image=$3
750 local log=$4
751
752 rbd --cluster "${cluster}" snap list --all "${pool}/${image}" --format xml | \
753 xmlstarlet sel -t -c "//snapshots/snapshot[namespace/complete='true' and position()=last()]" > \
754 ${log} || true
755 }
756
757 wait_for_snapshot_sync_complete()
758 {
759 local local_cluster=$1
760 local cluster=$2
761 local pool=$3
762 local image=$4
763
764 local status_log=${TEMPDIR}/$(mkfname ${cluster}-${pool}-${image}.status)
765 local local_status_log=${TEMPDIR}/$(mkfname ${local_cluster}-${pool}-${image}.status)
766
767 mirror_image_snapshot "${cluster}" "${pool}" "${image}"
768 get_newest_mirror_snapshot "${cluster}" "${pool}" "${image}" "${status_log}"
769 local snapshot_id=$(xmlstarlet sel -t -v "//snapshot/id" < ${status_log})
770
771 while true; do
772 for s in 0.2 0.4 0.8 1.6 2 2 4 4 8 8 16 16 32 32; do
773 sleep ${s}
774
775 get_newest_mirror_snapshot "${local_cluster}" "${pool}" "${image}" "${local_status_log}"
776 local primary_snapshot_id=$(xmlstarlet sel -t -v "//snapshot/namespace/primary_snap_id" < ${local_status_log})
777
778 test "${snapshot_id}" = "${primary_snapshot_id}" && return 0
779 done
780
781 return 1
782 done
783 return 1
784 }
785
786 wait_for_replay_complete()
787 {
788 local local_cluster=$1
789 local cluster=$2
790 local pool=$3
791 local image=$4
792
793 if [ "${MIRROR_IMAGE_MODE}" = "journal" ]; then
794 wait_for_journal_replay_complete ${local_cluster} ${cluster} ${pool} ${image}
795 elif [ "${MIRROR_IMAGE_MODE}" = "snapshot" ]; then
796 wait_for_snapshot_sync_complete ${local_cluster} ${cluster} ${pool} ${image}
797 else
798 return 1
799 fi
800 }
801
802
803 test_status_in_pool_dir()
804 {
805 local cluster=$1
806 local pool=$2
807 local image=$3
808 local state_pattern="$4"
809 local description_pattern="$5"
810 local service_pattern="$6"
811
812 local status_log=${TEMPDIR}/$(mkfname ${cluster}-${pool}-${image}.mirror_status)
813 CEPH_ARGS='' rbd --cluster ${cluster} mirror image status ${pool}/${image} |
814 tee ${status_log} >&2
815 grep "^ state: .*${state_pattern}" ${status_log} || return 1
816 grep "^ description: .*${description_pattern}" ${status_log} || return 1
817
818 if [ -n "${service_pattern}" ]; then
819 grep "service: *${service_pattern}" ${status_log} || return 1
820 elif echo ${state_pattern} | grep '^up+'; then
821 grep "service: *${MIRROR_USER_ID_PREFIX}.* on " ${status_log} || return 1
822 else
823 grep "service: " ${status_log} && return 1
824 fi
825
826 # recheck using `mirror pool status` command to stress test it.
827
828 local last_update="$(sed -nEe 's/^ last_update: *(.*) *$/\1/p' ${status_log})"
829 test_mirror_pool_status_verbose \
830 ${cluster} ${pool} ${image} "${state_pattern}" "${last_update}" &&
831 return 0
832
833 echo "'mirror pool status' test failed" >&2
834 exit 1
835 }
836
837 test_mirror_pool_status_verbose()
838 {
839 local cluster=$1
840 local pool=$2
841 local image=$3
842 local state_pattern="$4"
843 local prev_last_update="$5"
844
845 local status_log=${TEMPDIR}/$(mkfname ${cluster}-${pool}.mirror_status)
846
847 rbd --cluster ${cluster} mirror pool status ${pool} --verbose --format xml \
848 > ${status_log}
849
850 local last_update state
851 last_update=$($XMLSTARLET sel -t -v \
852 "//images/image[name='${image}']/last_update" < ${status_log})
853 state=$($XMLSTARLET sel -t -v \
854 "//images/image[name='${image}']/state" < ${status_log})
855
856 echo "${state}" | grep "${state_pattern}" ||
857 test "${last_update}" '>' "${prev_last_update}"
858 }
859
860 wait_for_status_in_pool_dir()
861 {
862 local cluster=$1
863 local pool=$2
864 local image=$3
865 local state_pattern="$4"
866 local description_pattern="$5"
867 local service_pattern="$6"
868
869 for s in 1 2 4 8 8 8 8 8 8 8 8 16 16; do
870 sleep ${s}
871 test_status_in_pool_dir ${cluster} ${pool} ${image} "${state_pattern}" \
872 "${description_pattern}" "${service_pattern}" &&
873 return 0
874 done
875 return 1
876 }
877
878 create_image()
879 {
880 local cluster=$1 ; shift
881 local pool=$1 ; shift
882 local image=$1 ; shift
883 local size=128
884
885 if [ -n "$1" ]; then
886 size=$1
887 shift
888 fi
889
890 rbd --cluster ${cluster} create --size ${size} \
891 --image-feature "${RBD_IMAGE_FEATURES}" $@ ${pool}/${image}
892 }
893
894 create_image_and_enable_mirror()
895 {
896 local cluster=$1 ; shift
897 local pool=$1 ; shift
898 local image=$1 ; shift
899 local mode=${1:-${MIRROR_IMAGE_MODE}}
900 if [ -n "$1" ]; then
901 shift
902 fi
903
904 create_image ${cluster} ${pool} ${image} $@
905 enable_mirror ${cluster} ${pool} ${image} ${mode}
906 }
907
908 enable_journaling()
909 {
910 local cluster=$1
911 local pool=$2
912 local image=$3
913
914 rbd --cluster ${cluster} feature enable ${pool}/${image} journaling
915 }
916
917 set_image_meta()
918 {
919 local cluster=$1
920 local pool=$2
921 local image=$3
922 local key=$4
923 local val=$5
924
925 rbd --cluster ${cluster} image-meta set ${pool}/${image} $key $val
926 }
927
928 compare_image_meta()
929 {
930 local cluster=$1
931 local pool=$2
932 local image=$3
933 local key=$4
934 local value=$5
935
936 test `rbd --cluster ${cluster} image-meta get ${pool}/${image} ${key}` = "${value}"
937 }
938
939 rename_image()
940 {
941 local cluster=$1
942 local pool=$2
943 local image=$3
944 local new_name=$4
945
946 rbd --cluster=${cluster} rename ${pool}/${image} ${pool}/${new_name}
947 }
948
949 remove_image()
950 {
951 local cluster=$1
952 local pool=$2
953 local image=$3
954
955 rbd --cluster=${cluster} snap purge ${pool}/${image}
956 rbd --cluster=${cluster} rm ${pool}/${image}
957 }
958
959 remove_image_retry()
960 {
961 local cluster=$1
962 local pool=$2
963 local image=$3
964
965 for s in 0 1 2 4 8 16 32; do
966 sleep ${s}
967 remove_image ${cluster} ${pool} ${image} && return 0
968 done
969 return 1
970 }
971
972 trash_move() {
973 local cluster=$1
974 local pool=$2
975 local image=$3
976
977 rbd --cluster=${cluster} trash move ${pool}/${image}
978 }
979
980 trash_restore() {
981 local cluster=$1
982 local pool=$2
983 local image_id=$3
984
985 rbd --cluster=${cluster} trash restore ${pool}/${image_id}
986 }
987
988 clone_image()
989 {
990 local cluster=$1
991 local parent_pool=$2
992 local parent_image=$3
993 local parent_snap=$4
994 local clone_pool=$5
995 local clone_image=$6
996
997 shift 6
998
999 rbd --cluster ${cluster} clone \
1000 ${parent_pool}/${parent_image}@${parent_snap} \
1001 ${clone_pool}/${clone_image} --image-feature "${RBD_IMAGE_FEATURES}" $@
1002 }
1003
1004 clone_image_and_enable_mirror()
1005 {
1006 local cluster=$1
1007 local parent_pool=$2
1008 local parent_image=$3
1009 local parent_snap=$4
1010 local clone_pool=$5
1011 local clone_image=$6
1012 shift 6
1013
1014 local mode=${1:-${MIRROR_IMAGE_MODE}}
1015 if [ -n "$1" ]; then
1016 shift
1017 fi
1018
1019 clone_image ${cluster} ${parent_pool} ${parent_image} ${parent_snap} ${clone_pool} ${clone_image} $@
1020 enable_mirror ${cluster} ${clone_pool} ${clone_image} ${mode}
1021 }
1022
1023 disconnect_image()
1024 {
1025 local cluster=$1
1026 local pool=$2
1027 local image=$3
1028
1029 rbd --cluster ${cluster} journal client disconnect \
1030 --image ${pool}/${image}
1031 }
1032
1033 create_snapshot()
1034 {
1035 local cluster=$1
1036 local pool=$2
1037 local image=$3
1038 local snap=$4
1039
1040 rbd --cluster ${cluster} snap create ${pool}/${image}@${snap}
1041 }
1042
1043 remove_snapshot()
1044 {
1045 local cluster=$1
1046 local pool=$2
1047 local image=$3
1048 local snap=$4
1049
1050 rbd --cluster ${cluster} snap rm ${pool}/${image}@${snap}
1051 }
1052
1053 rename_snapshot()
1054 {
1055 local cluster=$1
1056 local pool=$2
1057 local image=$3
1058 local snap=$4
1059 local new_snap=$5
1060
1061 rbd --cluster ${cluster} snap rename ${pool}/${image}@${snap} \
1062 ${pool}/${image}@${new_snap}
1063 }
1064
1065 purge_snapshots()
1066 {
1067 local cluster=$1
1068 local pool=$2
1069 local image=$3
1070
1071 rbd --cluster ${cluster} snap purge ${pool}/${image}
1072 }
1073
1074 protect_snapshot()
1075 {
1076 local cluster=$1
1077 local pool=$2
1078 local image=$3
1079 local snap=$4
1080
1081 rbd --cluster ${cluster} snap protect ${pool}/${image}@${snap}
1082 }
1083
1084 unprotect_snapshot()
1085 {
1086 local cluster=$1
1087 local pool=$2
1088 local image=$3
1089 local snap=$4
1090
1091 rbd --cluster ${cluster} snap unprotect ${pool}/${image}@${snap}
1092 }
1093
1094 wait_for_snap_present()
1095 {
1096 local cluster=$1
1097 local pool=$2
1098 local image=$3
1099 local snap_name=$4
1100 local s
1101
1102 for s in 1 2 4 8 8 8 8 8 8 8 8 16 16 16 16 32 32 32 32; do
1103 sleep ${s}
1104 rbd --cluster ${cluster} info ${pool}/${image}@${snap_name} || continue
1105 return 0
1106 done
1107 return 1
1108 }
1109
1110 test_snap_moved_to_trash()
1111 {
1112 local cluster=$1
1113 local pool=$2
1114 local image=$3
1115 local snap_name=$4
1116
1117 rbd --cluster ${cluster} snap ls ${pool}/${image} --all |
1118 grep -F " trash (${snap_name})"
1119 }
1120
1121 wait_for_snap_moved_to_trash()
1122 {
1123 local s
1124
1125 for s in 1 2 4 8 8 8 8 8 8 8 8 16 16 16 16 32 32 32 32; do
1126 sleep ${s}
1127 test_snap_moved_to_trash $@ || continue
1128 return 0
1129 done
1130 return 1
1131 }
1132
1133 test_snap_removed_from_trash()
1134 {
1135 test_snap_moved_to_trash $@ && return 1
1136 return 0
1137 }
1138
1139 wait_for_snap_removed_from_trash()
1140 {
1141 local s
1142
1143 for s in 1 2 4 8 8 8 8 8 8 8 8 16 16 16 16 32 32 32 32; do
1144 sleep ${s}
1145 test_snap_removed_from_trash $@ || continue
1146 return 0
1147 done
1148 return 1
1149 }
1150
1151 write_image()
1152 {
1153 local cluster=$1
1154 local pool=$2
1155 local image=$3
1156 local count=$4
1157 local size=$5
1158
1159 test -n "${size}" || size=4096
1160
1161 rbd --cluster ${cluster} bench ${pool}/${image} --io-type write \
1162 --io-size ${size} --io-threads 1 --io-total $((size * count)) \
1163 --io-pattern rand
1164 }
1165
1166 stress_write_image()
1167 {
1168 local cluster=$1
1169 local pool=$2
1170 local image=$3
1171 local duration=$(awk 'BEGIN {srand(); print int(10 * rand()) + 5}')
1172
1173 timeout ${duration}s ceph_test_rbd_mirror_random_write \
1174 --cluster ${cluster} ${pool} ${image} \
1175 --debug-rbd=20 --debug-journaler=20 \
1176 2> ${TEMPDIR}/rbd-mirror-random-write.log || true
1177 }
1178
1179 show_diff()
1180 {
1181 local file1=$1
1182 local file2=$2
1183
1184 xxd ${file1} > ${file1}.xxd
1185 xxd ${file2} > ${file2}.xxd
1186 sdiff -s ${file1}.xxd ${file2}.xxd | head -n 64
1187 rm -f ${file1}.xxd ${file2}.xxd
1188 }
1189
1190 compare_images()
1191 {
1192 local pool=$1
1193 local image=$2
1194 local ret=0
1195
1196 local rmt_export=${TEMPDIR}/$(mkfname ${CLUSTER2}-${pool}-${image}.export)
1197 local loc_export=${TEMPDIR}/$(mkfname ${CLUSTER1}-${pool}-${image}.export)
1198
1199 rm -f ${rmt_export} ${loc_export}
1200 rbd --cluster ${CLUSTER2} export ${pool}/${image} ${rmt_export}
1201 rbd --cluster ${CLUSTER1} export ${pool}/${image} ${loc_export}
1202 if ! cmp ${rmt_export} ${loc_export}
1203 then
1204 show_diff ${rmt_export} ${loc_export}
1205 ret=1
1206 fi
1207 rm -f ${rmt_export} ${loc_export}
1208 return ${ret}
1209 }
1210
1211 compare_image_snapshots()
1212 {
1213 local pool=$1
1214 local image=$2
1215 local ret=0
1216
1217 local rmt_export=${TEMPDIR}/${CLUSTER2}-${pool}-${image}.export
1218 local loc_export=${TEMPDIR}/${CLUSTER1}-${pool}-${image}.export
1219
1220 for snap_name in $(rbd --cluster ${CLUSTER1} --format xml \
1221 snap list ${pool}/${image} | \
1222 $XMLSTARLET sel -t -v "//snapshot/name" | \
1223 grep -E -v "^\.rbd-mirror\."); do
1224 rm -f ${rmt_export} ${loc_export}
1225 rbd --cluster ${CLUSTER2} export ${pool}/${image}@${snap_name} ${rmt_export}
1226 rbd --cluster ${CLUSTER1} export ${pool}/${image}@${snap_name} ${loc_export}
1227 if ! cmp ${rmt_export} ${loc_export}
1228 then
1229 show_diff ${rmt_export} ${loc_export}
1230 ret=1
1231 fi
1232 done
1233 rm -f ${rmt_export} ${loc_export}
1234 return ${ret}
1235 }
1236
1237 demote_image()
1238 {
1239 local cluster=$1
1240 local pool=$2
1241 local image=$3
1242
1243 rbd --cluster=${cluster} mirror image demote ${pool}/${image}
1244 }
1245
1246 promote_image()
1247 {
1248 local cluster=$1
1249 local pool=$2
1250 local image=$3
1251 local force=$4
1252
1253 rbd --cluster=${cluster} mirror image promote ${pool}/${image} ${force}
1254 }
1255
1256 set_pool_mirror_mode()
1257 {
1258 local cluster=$1
1259 local pool=$2
1260 local mode=${3:-${MIRROR_POOL_MODE}}
1261
1262 rbd --cluster=${cluster} mirror pool enable ${pool} ${mode}
1263 }
1264
1265 disable_mirror()
1266 {
1267 local cluster=$1
1268 local pool=$2
1269 local image=$3
1270
1271 rbd --cluster=${cluster} mirror image disable ${pool}/${image}
1272 }
1273
1274 enable_mirror()
1275 {
1276 local cluster=$1
1277 local pool=$2
1278 local image=$3
1279 local mode=${4:-${MIRROR_IMAGE_MODE}}
1280
1281 rbd --cluster=${cluster} mirror image enable ${pool}/${image} ${mode}
1282 }
1283
1284 test_image_present()
1285 {
1286 local cluster=$1
1287 local pool=$2
1288 local image=$3
1289 local test_state=$4
1290 local image_id=$5
1291 local current_state=deleted
1292 local current_image_id
1293
1294 current_image_id=$(get_image_id ${cluster} ${pool} ${image})
1295 test -n "${current_image_id}" &&
1296 test -z "${image_id}" -o "${image_id}" = "${current_image_id}" &&
1297 current_state=present
1298
1299 test "${test_state}" = "${current_state}"
1300 }
1301
1302 wait_for_image_present()
1303 {
1304 local cluster=$1
1305 local pool=$2
1306 local image=$3
1307 local state=$4
1308 local image_id=$5
1309 local s
1310
1311 test -n "${image_id}" ||
1312 image_id=$(get_image_id ${cluster} ${pool} ${image})
1313
1314 # TODO: add a way to force rbd-mirror to update replayers
1315 for s in 0.1 1 2 4 8 8 8 8 8 8 8 8 16 16 32 32; do
1316 sleep ${s}
1317 test_image_present \
1318 "${cluster}" "${pool}" "${image}" "${state}" "${image_id}" &&
1319 return 0
1320 done
1321 return 1
1322 }
1323
1324 get_image_id()
1325 {
1326 local cluster=$1
1327 local pool=$2
1328 local image=$3
1329
1330 rbd --cluster=${cluster} info ${pool}/${image} |
1331 sed -ne 's/^.*block_name_prefix: rbd_data\.//p'
1332 }
1333
1334 request_resync_image()
1335 {
1336 local cluster=$1
1337 local pool=$2
1338 local image=$3
1339 local image_id_var_name=$4
1340
1341 eval "${image_id_var_name}='$(get_image_id ${cluster} ${pool} ${image})'"
1342 eval 'test -n "$'${image_id_var_name}'"'
1343
1344 rbd --cluster=${cluster} mirror image resync ${pool}/${image}
1345 }
1346
1347 get_image_data_pool()
1348 {
1349 local cluster=$1
1350 local pool=$2
1351 local image=$3
1352
1353 rbd --cluster ${cluster} info ${pool}/${image} |
1354 awk '$1 == "data_pool:" {print $2}'
1355 }
1356
1357 get_clone_format()
1358 {
1359 local cluster=$1
1360 local pool=$2
1361 local image=$3
1362
1363 rbd --cluster ${cluster} info ${pool}/${image} |
1364 awk 'BEGIN {
1365 format = 1
1366 }
1367 $1 == "parent:" {
1368 parent = $2
1369 }
1370 /op_features: .*clone-child/ {
1371 format = 2
1372 }
1373 END {
1374 if (!parent) exit 1
1375 print format
1376 }'
1377 }
1378
1379 #
1380 # Main
1381 #
1382
1383 if [ "$#" -gt 0 ]
1384 then
1385 if [ -z "${RBD_MIRROR_TEMDIR}" ]
1386 then
1387 echo "RBD_MIRROR_TEMDIR is not set" >&2
1388 exit 1
1389 fi
1390
1391 TEMPDIR="${RBD_MIRROR_TEMDIR}"
1392 cd ${TEMPDIR}
1393 $@
1394 exit $?
1395 fi