]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | """ |
2 | Handle osdfailsafe configuration settings (nearfull ratio and full ratio) | |
3 | """ | |
9f95a23c | 4 | from io import BytesIO |
7c673cae | 5 | import logging |
9f95a23c | 6 | import six |
7c673cae FG |
7 | import time |
8 | ||
9 | from teuthology.orchestra import run | |
e306af50 | 10 | from tasks.util.rados import rados |
7c673cae FG |
11 | from teuthology import misc as teuthology |
12 | ||
13 | log = logging.getLogger(__name__) | |
14 | ||
15 | def task(ctx, config): | |
16 | """ | |
17 | Test handling of osd_failsafe_nearfull_ratio and osd_failsafe_full_ratio | |
18 | configuration settings | |
19 | ||
cd265ab1 | 20 | In order for test to pass must use log-ignorelist as follows |
7c673cae FG |
21 | |
22 | tasks: | |
23 | - chef: | |
24 | - install: | |
25 | - ceph: | |
cd265ab1 | 26 | log-ignorelist: ['OSD near full', 'OSD full dropping all updates'] |
7c673cae FG |
27 | - osd_failsafe_enospc: |
28 | ||
29 | """ | |
30 | if config is None: | |
31 | config = {} | |
32 | assert isinstance(config, dict), \ | |
33 | 'osd_failsafe_enospc task only accepts a dict for configuration' | |
34 | ||
35 | # Give 2 seconds for injectargs + osd_op_complaint_time (30) + 2 * osd_heartbeat_interval (6) + 6 padding | |
36 | sleep_time = 50 | |
37 | ||
38 | # something that is always there | |
39 | dummyfile = '/etc/fstab' | |
40 | dummyfile2 = '/etc/resolv.conf' | |
41 | ||
42 | manager = ctx.managers['ceph'] | |
43 | ||
44 | # create 1 pg pool with 1 rep which can only be on osd.0 | |
45 | osds = manager.get_osd_dump() | |
46 | for osd in osds: | |
47 | if osd['osd'] != 0: | |
48 | manager.mark_out_osd(osd['osd']) | |
49 | ||
50 | log.info('creating pool foo') | |
51 | manager.create_pool("foo") | |
52 | manager.raw_cluster_cmd('osd', 'pool', 'set', 'foo', 'size', '1') | |
53 | ||
54 | # State NONE -> NEAR | |
55 | log.info('1. Verify warning messages when exceeding nearfull_ratio') | |
56 | ||
57 | first_mon = teuthology.get_first_mon(ctx, config) | |
9f95a23c | 58 | (mon,) = ctx.cluster.only(first_mon).remotes.keys() |
7c673cae FG |
59 | |
60 | proc = mon.run( | |
61 | args=[ | |
62 | 'sudo', | |
63 | 'daemon-helper', | |
64 | 'kill', | |
65 | 'ceph', '-w' | |
66 | ], | |
67 | stdin=run.PIPE, | |
9f95a23c | 68 | stdout=BytesIO(), |
7c673cae FG |
69 | wait=False, |
70 | ) | |
71 | ||
72 | manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_nearfull_ratio .00001') | |
73 | ||
74 | time.sleep(sleep_time) | |
75 | proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w | |
76 | proc.wait() | |
77 | ||
9f95a23c | 78 | lines = six.ensure_str(proc.stdout.getvalue()).split('\n') |
7c673cae FG |
79 | |
80 | count = len(filter(lambda line: '[WRN] OSD near full' in line, lines)) | |
81 | assert count == 2, 'Incorrect number of warning messages expected 2 got %d' % count | |
82 | count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) | |
83 | assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count | |
84 | ||
85 | # State NEAR -> FULL | |
86 | log.info('2. Verify error messages when exceeding full_ratio') | |
87 | ||
88 | proc = mon.run( | |
89 | args=[ | |
90 | 'sudo', | |
91 | 'daemon-helper', | |
92 | 'kill', | |
93 | 'ceph', '-w' | |
94 | ], | |
95 | stdin=run.PIPE, | |
9f95a23c | 96 | stdout=BytesIO(), |
7c673cae FG |
97 | wait=False, |
98 | ) | |
99 | ||
100 | manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .00001') | |
101 | ||
102 | time.sleep(sleep_time) | |
103 | proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w | |
104 | proc.wait() | |
105 | ||
9f95a23c | 106 | lines = six.ensure_str(proc.stdout.getvalue()).split('\n') |
7c673cae FG |
107 | |
108 | count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) | |
109 | assert count == 2, 'Incorrect number of error messages expected 2 got %d' % count | |
110 | ||
111 | log.info('3. Verify write failure when exceeding full_ratio') | |
112 | ||
113 | # Write data should fail | |
114 | ret = rados(ctx, mon, ['-p', 'foo', 'put', 'newfile1', dummyfile]) | |
115 | assert ret != 0, 'Expected write failure but it succeeded with exit status 0' | |
116 | ||
117 | # Put back default | |
118 | manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .97') | |
119 | time.sleep(10) | |
120 | ||
121 | # State FULL -> NEAR | |
122 | log.info('4. Verify write success when NOT exceeding full_ratio') | |
123 | ||
124 | # Write should succeed | |
125 | ret = rados(ctx, mon, ['-p', 'foo', 'put', 'newfile2', dummyfile2]) | |
126 | assert ret == 0, 'Expected write to succeed, but got exit status %d' % ret | |
127 | ||
128 | log.info('5. Verify warning messages again when exceeding nearfull_ratio') | |
129 | ||
130 | proc = mon.run( | |
131 | args=[ | |
132 | 'sudo', | |
133 | 'daemon-helper', | |
134 | 'kill', | |
135 | 'ceph', '-w' | |
136 | ], | |
137 | stdin=run.PIPE, | |
9f95a23c | 138 | stdout=BytesIO(), |
7c673cae FG |
139 | wait=False, |
140 | ) | |
141 | ||
142 | time.sleep(sleep_time) | |
143 | proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w | |
144 | proc.wait() | |
145 | ||
9f95a23c | 146 | lines = six.ensure_str(proc.stdout.getvalue()).split('\n') |
7c673cae FG |
147 | |
148 | count = len(filter(lambda line: '[WRN] OSD near full' in line, lines)) | |
149 | assert count == 1 or count == 2, 'Incorrect number of warning messages expected 1 or 2 got %d' % count | |
150 | count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) | |
151 | assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count | |
152 | ||
153 | manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_nearfull_ratio .90') | |
154 | time.sleep(10) | |
155 | ||
156 | # State NONE -> FULL | |
157 | log.info('6. Verify error messages again when exceeding full_ratio') | |
158 | ||
159 | proc = mon.run( | |
160 | args=[ | |
161 | 'sudo', | |
162 | 'daemon-helper', | |
163 | 'kill', | |
164 | 'ceph', '-w' | |
165 | ], | |
166 | stdin=run.PIPE, | |
9f95a23c | 167 | stdout=BytesIO(), |
7c673cae FG |
168 | wait=False, |
169 | ) | |
170 | ||
171 | manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .00001') | |
172 | ||
173 | time.sleep(sleep_time) | |
174 | proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w | |
175 | proc.wait() | |
176 | ||
9f95a23c | 177 | lines = six.ensure_str(proc.stdout.getvalue()).split('\n') |
7c673cae FG |
178 | |
179 | count = len(filter(lambda line: '[WRN] OSD near full' in line, lines)) | |
180 | assert count == 0, 'Incorrect number of warning messages expected 0 got %d' % count | |
181 | count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) | |
182 | assert count == 2, 'Incorrect number of error messages expected 2 got %d' % count | |
183 | ||
184 | # State FULL -> NONE | |
185 | log.info('7. Verify no messages settings back to default') | |
186 | ||
187 | manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .97') | |
188 | time.sleep(10) | |
189 | ||
190 | proc = mon.run( | |
191 | args=[ | |
192 | 'sudo', | |
193 | 'daemon-helper', | |
194 | 'kill', | |
195 | 'ceph', '-w' | |
196 | ], | |
197 | stdin=run.PIPE, | |
9f95a23c | 198 | stdout=BytesIO(), |
7c673cae FG |
199 | wait=False, |
200 | ) | |
201 | ||
202 | time.sleep(sleep_time) | |
203 | proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w | |
204 | proc.wait() | |
205 | ||
9f95a23c | 206 | lines = six.ensure_str(proc.stdout.getvalue()).split('\n') |
7c673cae FG |
207 | |
208 | count = len(filter(lambda line: '[WRN] OSD near full' in line, lines)) | |
209 | assert count == 0, 'Incorrect number of warning messages expected 0 got %d' % count | |
210 | count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) | |
211 | assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count | |
212 | ||
213 | log.info('Test Passed') | |
214 | ||
215 | # Bring all OSDs back in | |
216 | manager.remove_pool("foo") | |
217 | for osd in osds: | |
218 | if osd['osd'] != 0: | |
219 | manager.mark_in_osd(osd['osd']) |