]> git.proxmox.com Git - ceph.git/blob - ceph/qa/standalone/osd/osd-recovery-space.sh
07ed09b4380fa0ad41c350287ddbcf952a387bae
[ceph.git] / ceph / qa / standalone / osd / osd-recovery-space.sh
1 #!/usr/bin/env bash
2 #
3 # Copyright (C) 2018 Red Hat <contact@redhat.com>
4 #
5 # Author: David Zafman <dzafman@redhat.com>
6 #
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU Library Public License as published by
9 # the Free Software Foundation; either version 2, or (at your option)
10 # any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU Library Public License for more details.
16 #
17
18 source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
19
20 function run() {
21 local dir=$1
22 shift
23
24 export CEPH_MON="127.0.0.1:7221" # git grep '\<7221\>' : there must be only one
25 export CEPH_ARGS
26 CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
27 CEPH_ARGS+="--mon-host=$CEPH_MON "
28 CEPH_ARGS+="--osd_max_backfills=10 "
29 export objects=600
30 export poolprefix=test
31
32 local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
33 for func in $funcs ; do
34 setup $dir || return 1
35 $func $dir || return 1
36 teardown $dir || return 1
37 done
38 }
39
40
41 function get_num_in_state() {
42 local state=$1
43 local expression
44 expression+="select(contains(\"${state}\"))"
45 ceph --format json pg dump pgs 2>/dev/null | \
46 jq ".pg_stats | [.[] | .state | $expression] | length"
47 }
48
49
50 function wait_for_state() {
51 local state=$1
52 local cur_in_state
53 local -a delays=($(get_timeout_delays $2 5))
54 local -i loop=0
55
56 flush_pg_stats || return 1
57 while test $(get_num_pgs) == 0 ; do
58 sleep 1
59 done
60
61 while true ; do
62 cur_in_state=$(get_num_in_state ${state})
63 test $cur_in_state -gt 0 && break
64 if (( $loop >= ${#delays[*]} )) ; then
65 ceph pg dump pgs
66 return 1
67 fi
68 sleep ${delays[$loop]}
69 loop+=1
70 done
71 return 0
72 }
73
74
75 function wait_for_recovery_toofull() {
76 local timeout=$1
77 wait_for_state recovery_toofull $timeout
78 }
79
80
81 # Create 1 pools with size 1
82 # set ful-ratio to 50%
83 # Write data 600 5K (3000K)
84 # Inject fake_statfs_for_testing to 3600K (83% full)
85 # Incresase the pool size to 2
86 # The pool shouldn't have room to recovery
87 function TEST_recovery_test_simple() {
88 local dir=$1
89 local pools=1
90 local OSDS=2
91
92 run_mon $dir a || return 1
93 run_mgr $dir x || return 1
94 export CEPH_ARGS
95
96 for osd in $(seq 0 $(expr $OSDS - 1))
97 do
98 run_osd $dir $osd || return 1
99 done
100
101 ceph osd set-nearfull-ratio .40
102 ceph osd set-backfillfull-ratio .45
103 ceph osd set-full-ratio .50
104
105 for p in $(seq 1 $pools)
106 do
107 create_pool "${poolprefix}$p" 1 1
108 ceph osd pool set "${poolprefix}$p" size 1 --yes-i-really-mean-it
109 done
110
111 wait_for_clean || return 1
112
113 dd if=/dev/urandom of=$dir/datafile bs=1024 count=5
114 for o in $(seq 1 $objects)
115 do
116 rados -p "${poolprefix}$p" put obj$o $dir/datafile
117 done
118
119 for o in $(seq 0 $(expr $OSDS - 1))
120 do
121 ceph tell osd.$o injectargs '--fake_statfs_for_testing 3686400' || return 1
122 done
123 sleep 5
124
125 ceph pg dump pgs
126
127 for p in $(seq 1 $pools)
128 do
129 ceph osd pool set "${poolprefix}$p" size 2
130 done
131
132 # If this times out, we'll detected errors below
133 wait_for_recovery_toofull 30
134
135 ERRORS=0
136 if [ "$(ceph pg dump pgs | grep +recovery_toofull | wc -l)" != "1" ];
137 then
138 echo "One pool should have been in recovery_toofull"
139 ERRORS="$(expr $ERRORS + 1)"
140 fi
141
142 ceph pg dump pgs
143 ceph status
144 ceph status --format=json-pretty > $dir/stat.json
145
146 eval SEV=$(jq '.health.checks.PG_RECOVERY_FULL.severity' $dir/stat.json)
147 if [ "$SEV" != "HEALTH_ERR" ]; then
148 echo "PG_RECOVERY_FULL severity $SEV not HEALTH_ERR"
149 ERRORS="$(expr $ERRORS + 1)"
150 fi
151 eval MSG=$(jq '.health.checks.PG_RECOVERY_FULL.summary.message' $dir/stat.json)
152 if [ "$MSG" != "Full OSDs blocking recovery: 1 pg recovery_toofull" ]; then
153 echo "PG_RECOVERY_FULL message '$MSG' mismatched"
154 ERRORS="$(expr $ERRORS + 1)"
155 fi
156 rm -f $dir/stat.json
157
158 if [ $ERRORS != "0" ];
159 then
160 return 1
161 fi
162
163 for i in $(seq 1 $pools)
164 do
165 delete_pool "${poolprefix}$i"
166 done
167 kill_daemons $dir || return 1
168 }
169
170
171 main osd-recovery-space "$@"
172
173 # Local Variables:
174 # compile-command: "make -j4 && ../qa/run-standalone.sh osd-recovery-space.sh"
175 # End: