3 # Copyright (C) 2016 Piotr Dałek <git@predictor.org.pl>
4 # Copyright (C) 2014, 2015 Red Hat <contact@redhat.com>
6 # Author: Piotr Dałek <git@predictor.org.pl>
8 # This program is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU Library Public License as published by
10 # the Free Software Foundation; either version 2, or (at your option)
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU Library Public License for more details.
19 source $CEPH_ROOT/qa
/standalone
/ceph-helpers.sh
20 MAX_PROPAGATION_TIME
=30
26 export CEPH_MON
="127.0.0.1:7126" # git grep '\<7126\>' : there must be only one
28 CEPH_ARGS
+="--fsid=$(uuidgen) --auth-supported=none "
29 CEPH_ARGS
+="--mon-host=$CEPH_MON "
31 # Disable device auto class feature for this testing,
32 # as it will automatically make root clones based on new class types
33 # and hence affect the down osd counting.
36 # ID WEIGHT TYPE NAME UP/DOWN REWEIGHT PRIMARY-AFFINITY
37 # -4 3.00000 root default~hdd
38 # -3 3.00000 host gitbuilder-ceph-rpm-centos7-amd64-basic~hdd
39 # 0 1.00000 osd.0 down 1.00000 1.00000
40 # 1 1.00000 osd.1 up 1.00000 1.00000
41 # 2 1.00000 osd.2 up 1.00000 1.00000
42 # -1 3.00000 root default
43 # -2 3.00000 host gitbuilder-ceph-rpm-centos7-amd64-basic
44 # 0 1.00000 osd.0 down 1.00000 1.00000
45 # 1 1.00000 osd.1 up 1.00000 1.00000
46 # 2 1.00000 osd.2 up 1.00000 1.00000
48 CEPH_ARGS
+="--osd-class-update-on-start=false "
51 CEPH_ARGS
+="--osd-fast-fail-on-connection-refused=false "
52 echo "Ensuring old behavior is there..."
53 test_fast_kill
$dir && (echo "OSDs died too early! Old behavior doesn't work." ; return 1)
55 CEPH_ARGS
=$OLD_ARGS"--osd-fast-fail-on-connection-refused=true "
58 CEPH_ARGS
+="--ms_type=simple"
59 echo "Testing simple msgr..."
60 test_fast_kill
$dir ||
return 1
62 CEPH_ARGS
=$OLD_ARGS"--ms_type=async"
63 echo "Testing async msgr..."
64 test_fast_kill
$dir ||
return 1
70 function test_fast_kill
() {
71 # create cluster with 3 osds
72 setup
$dir ||
return 1
73 run_mon
$dir a
--osd_pool_default_size=3 ||
return 1
74 run_mgr
$dir x ||
return 1
76 run_osd
$dir $oi ||
return 1
77 pids
[$oi]=$
(cat $dir/osd.
$oi.pid
)
80 create_rbd_pool ||
return 1
82 # make some objects so osds to ensure connectivity between osds
83 rados
-p rbd bench
10 write -b 4096 --max-objects 128 --no-cleanup
89 # kill random osd and see if after max MAX_PROPAGATION_TIME, the osd count decreased.
91 while [ $killid -eq $previd ]; do
92 killid
=${pids[$RANDOM%${#pids[@]}]}
97 time_left
=$MAX_PROPAGATION_TIME
100 while [ $time_left -gt 0 ]; do
102 time_left
=$
[$time_left - 1];
104 grep -m 1 -c -F "ms_handle_refused" $dir/osd.
*.log
> /dev
/null
105 if [ $?
-ne 0 ]; then
109 down_osds
=$
(ceph osd tree |
grep -c down
)
110 if [ $down_osds -lt $i ]; then
111 # osds not marked down yet, try again in a second
113 elif [ $down_osds -gt $i ]; then
114 echo Too many \
($down_osds\
) osds died
!
121 if [ $down_osds -lt $i ]; then
122 echo Killed the OSD
, yet it is not marked down
128 teardown
$dir ||
return 1
131 main osd-fast-mark-down
"$@"
134 # compile-command: "cd ../.. ; make -j4 && test/osd/osd-fast-mark-down.sh"