]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | """ |
2 | Handle osdfailsafe configuration settings (nearfull ratio and full ratio) | |
3 | """ | |
f67539c2 | 4 | from io import StringIO |
7c673cae FG |
5 | import logging |
6 | import time | |
7 | ||
8 | from teuthology.orchestra import run | |
e306af50 | 9 | from tasks.util.rados import rados |
7c673cae FG |
10 | from teuthology import misc as teuthology |
11 | ||
12 | log = logging.getLogger(__name__) | |
13 | ||
14 | def task(ctx, config): | |
15 | """ | |
16 | Test handling of osd_failsafe_nearfull_ratio and osd_failsafe_full_ratio | |
17 | configuration settings | |
18 | ||
cd265ab1 | 19 | In order for test to pass must use log-ignorelist as follows |
7c673cae FG |
20 | |
21 | tasks: | |
22 | - chef: | |
23 | - install: | |
24 | - ceph: | |
cd265ab1 | 25 | log-ignorelist: ['OSD near full', 'OSD full dropping all updates'] |
7c673cae FG |
26 | - osd_failsafe_enospc: |
27 | ||
28 | """ | |
29 | if config is None: | |
30 | config = {} | |
31 | assert isinstance(config, dict), \ | |
32 | 'osd_failsafe_enospc task only accepts a dict for configuration' | |
33 | ||
34 | # Give 2 seconds for injectargs + osd_op_complaint_time (30) + 2 * osd_heartbeat_interval (6) + 6 padding | |
35 | sleep_time = 50 | |
36 | ||
37 | # something that is always there | |
38 | dummyfile = '/etc/fstab' | |
39 | dummyfile2 = '/etc/resolv.conf' | |
40 | ||
41 | manager = ctx.managers['ceph'] | |
42 | ||
43 | # create 1 pg pool with 1 rep which can only be on osd.0 | |
44 | osds = manager.get_osd_dump() | |
45 | for osd in osds: | |
46 | if osd['osd'] != 0: | |
47 | manager.mark_out_osd(osd['osd']) | |
48 | ||
49 | log.info('creating pool foo') | |
50 | manager.create_pool("foo") | |
51 | manager.raw_cluster_cmd('osd', 'pool', 'set', 'foo', 'size', '1') | |
52 | ||
53 | # State NONE -> NEAR | |
54 | log.info('1. Verify warning messages when exceeding nearfull_ratio') | |
55 | ||
56 | first_mon = teuthology.get_first_mon(ctx, config) | |
9f95a23c | 57 | (mon,) = ctx.cluster.only(first_mon).remotes.keys() |
7c673cae FG |
58 | |
59 | proc = mon.run( | |
60 | args=[ | |
61 | 'sudo', | |
62 | 'daemon-helper', | |
63 | 'kill', | |
64 | 'ceph', '-w' | |
65 | ], | |
66 | stdin=run.PIPE, | |
f67539c2 | 67 | stdout=StringIO(), |
7c673cae FG |
68 | wait=False, |
69 | ) | |
70 | ||
71 | manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_nearfull_ratio .00001') | |
72 | ||
73 | time.sleep(sleep_time) | |
74 | proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w | |
75 | proc.wait() | |
76 | ||
f67539c2 | 77 | lines = proc.stdout.getvalue().split('\n') |
7c673cae FG |
78 | |
79 | count = len(filter(lambda line: '[WRN] OSD near full' in line, lines)) | |
80 | assert count == 2, 'Incorrect number of warning messages expected 2 got %d' % count | |
81 | count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) | |
82 | assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count | |
83 | ||
84 | # State NEAR -> FULL | |
85 | log.info('2. Verify error messages when exceeding full_ratio') | |
86 | ||
87 | proc = mon.run( | |
88 | args=[ | |
89 | 'sudo', | |
90 | 'daemon-helper', | |
91 | 'kill', | |
92 | 'ceph', '-w' | |
93 | ], | |
94 | stdin=run.PIPE, | |
f67539c2 | 95 | stdout=StringIO(), |
7c673cae FG |
96 | wait=False, |
97 | ) | |
98 | ||
99 | manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .00001') | |
100 | ||
101 | time.sleep(sleep_time) | |
102 | proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w | |
103 | proc.wait() | |
104 | ||
f67539c2 | 105 | lines = proc.stdout.getvalue().split('\n') |
7c673cae FG |
106 | |
107 | count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) | |
108 | assert count == 2, 'Incorrect number of error messages expected 2 got %d' % count | |
109 | ||
110 | log.info('3. Verify write failure when exceeding full_ratio') | |
111 | ||
112 | # Write data should fail | |
113 | ret = rados(ctx, mon, ['-p', 'foo', 'put', 'newfile1', dummyfile]) | |
114 | assert ret != 0, 'Expected write failure but it succeeded with exit status 0' | |
115 | ||
116 | # Put back default | |
117 | manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .97') | |
118 | time.sleep(10) | |
119 | ||
120 | # State FULL -> NEAR | |
121 | log.info('4. Verify write success when NOT exceeding full_ratio') | |
122 | ||
123 | # Write should succeed | |
124 | ret = rados(ctx, mon, ['-p', 'foo', 'put', 'newfile2', dummyfile2]) | |
125 | assert ret == 0, 'Expected write to succeed, but got exit status %d' % ret | |
126 | ||
127 | log.info('5. Verify warning messages again when exceeding nearfull_ratio') | |
128 | ||
129 | proc = mon.run( | |
130 | args=[ | |
131 | 'sudo', | |
132 | 'daemon-helper', | |
133 | 'kill', | |
134 | 'ceph', '-w' | |
135 | ], | |
136 | stdin=run.PIPE, | |
f67539c2 | 137 | stdout=StringIO(), |
7c673cae FG |
138 | wait=False, |
139 | ) | |
140 | ||
141 | time.sleep(sleep_time) | |
142 | proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w | |
143 | proc.wait() | |
144 | ||
f67539c2 | 145 | lines = proc.stdout.getvalue().split('\n') |
7c673cae FG |
146 | |
147 | count = len(filter(lambda line: '[WRN] OSD near full' in line, lines)) | |
148 | assert count == 1 or count == 2, 'Incorrect number of warning messages expected 1 or 2 got %d' % count | |
149 | count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) | |
150 | assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count | |
151 | ||
152 | manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_nearfull_ratio .90') | |
153 | time.sleep(10) | |
154 | ||
155 | # State NONE -> FULL | |
156 | log.info('6. Verify error messages again when exceeding full_ratio') | |
157 | ||
158 | proc = mon.run( | |
159 | args=[ | |
160 | 'sudo', | |
161 | 'daemon-helper', | |
162 | 'kill', | |
163 | 'ceph', '-w' | |
164 | ], | |
165 | stdin=run.PIPE, | |
f67539c2 | 166 | stdout=StringIO(), |
7c673cae FG |
167 | wait=False, |
168 | ) | |
169 | ||
170 | manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .00001') | |
171 | ||
172 | time.sleep(sleep_time) | |
173 | proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w | |
174 | proc.wait() | |
175 | ||
f67539c2 | 176 | lines = proc.stdout.getvalue().split('\n') |
7c673cae FG |
177 | |
178 | count = len(filter(lambda line: '[WRN] OSD near full' in line, lines)) | |
179 | assert count == 0, 'Incorrect number of warning messages expected 0 got %d' % count | |
180 | count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) | |
181 | assert count == 2, 'Incorrect number of error messages expected 2 got %d' % count | |
182 | ||
183 | # State FULL -> NONE | |
184 | log.info('7. Verify no messages settings back to default') | |
185 | ||
186 | manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .97') | |
187 | time.sleep(10) | |
188 | ||
189 | proc = mon.run( | |
190 | args=[ | |
191 | 'sudo', | |
192 | 'daemon-helper', | |
193 | 'kill', | |
194 | 'ceph', '-w' | |
195 | ], | |
196 | stdin=run.PIPE, | |
f67539c2 | 197 | stdout=StringIO(), |
7c673cae FG |
198 | wait=False, |
199 | ) | |
200 | ||
201 | time.sleep(sleep_time) | |
202 | proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w | |
203 | proc.wait() | |
204 | ||
f67539c2 | 205 | lines = proc.stdout.getvalue().split('\n') |
7c673cae FG |
206 | |
207 | count = len(filter(lambda line: '[WRN] OSD near full' in line, lines)) | |
208 | assert count == 0, 'Incorrect number of warning messages expected 0 got %d' % count | |
209 | count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) | |
210 | assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count | |
211 | ||
212 | log.info('Test Passed') | |
213 | ||
214 | # Bring all OSDs back in | |
215 | manager.remove_pool("foo") | |
216 | for osd in osds: | |
217 | if osd['osd'] != 0: | |
218 | manager.mark_in_osd(osd['osd']) |