]> git.proxmox.com Git - ceph.git/blob - ceph/qa/standalone/osd/repeer-on-acting-back.sh
import quincy beta 17.1.0
[ceph.git] / ceph / qa / standalone / osd / repeer-on-acting-back.sh
1 #!/usr/bin/env bash
2 #
3 # Copyright (C) 2020 ZTE Corporation <contact@zte.com.cn>
4 #
5 # Author: xie xingguo <xie.xingguo@zte.com.cn>
6 # Author: Yan Jun <yan.jun8@zte.com.cn>
7 #
8 # This program is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU Library Public License as published by
10 # the Free Software Foundation; either version 2, or (at your option)
11 # any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU Library Public License for more details.
17 #
18
19 source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
20
21 function run() {
22 local dir=$1
23 shift
24
25 export poolname=test
26 export testobjects=100
27 export loglen=12
28 export trim=$(expr $loglen / 2)
29 export CEPH_MON="127.0.0.1:7115" # git grep '\<7115\>' : there must be only one
30 export CEPH_ARGS
31 CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
32 CEPH_ARGS+="--mon-host=$CEPH_MON "
33 # so we will not force auth_log_shard to be acting_primary
34 CEPH_ARGS+="--osd_force_auth_primary_missing_objects=1000000 "
35 # use small pg_log settings, so we always do backfill instead of recovery
36 CEPH_ARGS+="--osd_min_pg_log_entries=$loglen --osd_max_pg_log_entries=$loglen --osd_pg_log_trim_min=$trim "
37 CEPH_ARGS+="--osd_mclock_profile=high_recovery_ops "
38
39 local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
40 for func in $funcs ; do
41 setup $dir || return 1
42 $func $dir || return 1
43 teardown $dir || return 1
44 done
45 }
46
47
48 function TEST_repeer_on_down_acting_member_coming_back() {
49 local dir=$1
50 local dummyfile='/etc/fstab'
51
52 local num_osds=6
53 local osds="$(seq 0 $(expr $num_osds - 1))"
54 run_mon $dir a || return 1
55 run_mgr $dir x || return 1
56 for i in $osds
57 do
58 run_osd $dir $i || return 1
59 done
60
61 create_pool $poolname 1 1
62 ceph osd pool set $poolname size 3
63 ceph osd pool set $poolname min_size 2
64 local poolid=$(ceph pg dump pools -f json | jq '.pool_stats' | jq '.[].poolid')
65 local pgid=$poolid.0
66
67 # enable required feature-bits for upmap
68 ceph osd set-require-min-compat-client luminous
69 # reset up to [1,2,3]
70 ceph osd pg-upmap $pgid 1 2 3 || return 1
71
72 flush_pg_stats || return 1
73 wait_for_clean || return 1
74
75 echo "writing initial objects"
76 # write a bunch of objects
77 for i in $(seq 1 $testobjects)
78 do
79 rados -p $poolname put existing_$i $dummyfile
80 done
81
82 WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
83
84 # reset up to [1,4,5]
85 ceph osd pg-upmap $pgid 1 4 5 || return 1
86
87 # wait for peering to complete
88 sleep 2
89
90 # make sure osd.2 belongs to current acting set
91 ceph pg $pgid query | jq '.acting' | grep 2 || return 1
92
93 # kill osd.2
94 kill_daemons $dir KILL osd.2 || return 1
95 ceph osd down osd.2
96
97 # again, wait for peering to complete
98 sleep 2
99
100 # osd.2 should have been moved out from acting set
101 ceph pg $pgid query | jq '.acting' | grep 2 && return 1
102
103 # bring up osd.2
104 activate_osd $dir 2 || return 1
105 wait_for_osd up 2
106
107 # again, wait for peering to complete
108 sleep 2
109
110 # primary should be able to re-add osd.2 into acting
111 ceph pg $pgid query | jq '.acting' | grep 2 || return 1
112
113 WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
114
115 if ! grep -q "Active: got notify from previous acting member.*, requesting pg_temp change" $(find $dir -name '*osd*log')
116 then
117 echo failure
118 return 1
119 fi
120 echo "success"
121
122 delete_pool $poolname
123 kill_daemons $dir || return 1
124 }
125
126 main repeer-on-acting-back "$@"
127
128 # Local Variables:
129 # compile-command: "make -j4 && ../qa/run-standalone.sh repeer-on-acting-back.sh"
130 # End: