]> git.proxmox.com Git - ceph.git/blame - ceph/qa/tasks/osd_failsafe_enospc.py
bump version to 18.2.2-pve1
[ceph.git] / ceph / qa / tasks / osd_failsafe_enospc.py
CommitLineData
7c673cae
FG
1"""
2Handle osdfailsafe configuration settings (nearfull ratio and full ratio)
3"""
f67539c2 4from io import StringIO
7c673cae
FG
5import logging
6import time
7
8from teuthology.orchestra import run
e306af50 9from tasks.util.rados import rados
7c673cae
FG
10from teuthology import misc as teuthology
11
12log = logging.getLogger(__name__)
13
14def task(ctx, config):
15 """
16 Test handling of osd_failsafe_nearfull_ratio and osd_failsafe_full_ratio
17 configuration settings
18
cd265ab1 19 In order for test to pass must use log-ignorelist as follows
7c673cae
FG
20
21 tasks:
22 - chef:
23 - install:
24 - ceph:
cd265ab1 25 log-ignorelist: ['OSD near full', 'OSD full dropping all updates']
7c673cae
FG
26 - osd_failsafe_enospc:
27
28 """
29 if config is None:
30 config = {}
31 assert isinstance(config, dict), \
32 'osd_failsafe_enospc task only accepts a dict for configuration'
33
34 # Give 2 seconds for injectargs + osd_op_complaint_time (30) + 2 * osd_heartbeat_interval (6) + 6 padding
35 sleep_time = 50
36
37 # something that is always there
38 dummyfile = '/etc/fstab'
39 dummyfile2 = '/etc/resolv.conf'
40
41 manager = ctx.managers['ceph']
42
43 # create 1 pg pool with 1 rep which can only be on osd.0
44 osds = manager.get_osd_dump()
45 for osd in osds:
46 if osd['osd'] != 0:
47 manager.mark_out_osd(osd['osd'])
48
49 log.info('creating pool foo')
50 manager.create_pool("foo")
51 manager.raw_cluster_cmd('osd', 'pool', 'set', 'foo', 'size', '1')
52
53 # State NONE -> NEAR
54 log.info('1. Verify warning messages when exceeding nearfull_ratio')
55
56 first_mon = teuthology.get_first_mon(ctx, config)
9f95a23c 57 (mon,) = ctx.cluster.only(first_mon).remotes.keys()
7c673cae
FG
58
59 proc = mon.run(
60 args=[
61 'sudo',
62 'daemon-helper',
63 'kill',
64 'ceph', '-w'
65 ],
66 stdin=run.PIPE,
f67539c2 67 stdout=StringIO(),
7c673cae
FG
68 wait=False,
69 )
70
71 manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_nearfull_ratio .00001')
72
73 time.sleep(sleep_time)
74 proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w
75 proc.wait()
76
f67539c2 77 lines = proc.stdout.getvalue().split('\n')
7c673cae
FG
78
79 count = len(filter(lambda line: '[WRN] OSD near full' in line, lines))
80 assert count == 2, 'Incorrect number of warning messages expected 2 got %d' % count
81 count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines))
82 assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count
83
84 # State NEAR -> FULL
85 log.info('2. Verify error messages when exceeding full_ratio')
86
87 proc = mon.run(
88 args=[
89 'sudo',
90 'daemon-helper',
91 'kill',
92 'ceph', '-w'
93 ],
94 stdin=run.PIPE,
f67539c2 95 stdout=StringIO(),
7c673cae
FG
96 wait=False,
97 )
98
99 manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .00001')
100
101 time.sleep(sleep_time)
102 proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w
103 proc.wait()
104
f67539c2 105 lines = proc.stdout.getvalue().split('\n')
7c673cae
FG
106
107 count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines))
108 assert count == 2, 'Incorrect number of error messages expected 2 got %d' % count
109
110 log.info('3. Verify write failure when exceeding full_ratio')
111
112 # Write data should fail
113 ret = rados(ctx, mon, ['-p', 'foo', 'put', 'newfile1', dummyfile])
114 assert ret != 0, 'Expected write failure but it succeeded with exit status 0'
115
116 # Put back default
117 manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .97')
118 time.sleep(10)
119
120 # State FULL -> NEAR
121 log.info('4. Verify write success when NOT exceeding full_ratio')
122
123 # Write should succeed
124 ret = rados(ctx, mon, ['-p', 'foo', 'put', 'newfile2', dummyfile2])
125 assert ret == 0, 'Expected write to succeed, but got exit status %d' % ret
126
127 log.info('5. Verify warning messages again when exceeding nearfull_ratio')
128
129 proc = mon.run(
130 args=[
131 'sudo',
132 'daemon-helper',
133 'kill',
134 'ceph', '-w'
135 ],
136 stdin=run.PIPE,
f67539c2 137 stdout=StringIO(),
7c673cae
FG
138 wait=False,
139 )
140
141 time.sleep(sleep_time)
142 proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w
143 proc.wait()
144
f67539c2 145 lines = proc.stdout.getvalue().split('\n')
7c673cae
FG
146
147 count = len(filter(lambda line: '[WRN] OSD near full' in line, lines))
148 assert count == 1 or count == 2, 'Incorrect number of warning messages expected 1 or 2 got %d' % count
149 count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines))
150 assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count
151
152 manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_nearfull_ratio .90')
153 time.sleep(10)
154
155 # State NONE -> FULL
156 log.info('6. Verify error messages again when exceeding full_ratio')
157
158 proc = mon.run(
159 args=[
160 'sudo',
161 'daemon-helper',
162 'kill',
163 'ceph', '-w'
164 ],
165 stdin=run.PIPE,
f67539c2 166 stdout=StringIO(),
7c673cae
FG
167 wait=False,
168 )
169
170 manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .00001')
171
172 time.sleep(sleep_time)
173 proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w
174 proc.wait()
175
f67539c2 176 lines = proc.stdout.getvalue().split('\n')
7c673cae
FG
177
178 count = len(filter(lambda line: '[WRN] OSD near full' in line, lines))
179 assert count == 0, 'Incorrect number of warning messages expected 0 got %d' % count
180 count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines))
181 assert count == 2, 'Incorrect number of error messages expected 2 got %d' % count
182
183 # State FULL -> NONE
184 log.info('7. Verify no messages settings back to default')
185
186 manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .97')
187 time.sleep(10)
188
189 proc = mon.run(
190 args=[
191 'sudo',
192 'daemon-helper',
193 'kill',
194 'ceph', '-w'
195 ],
196 stdin=run.PIPE,
f67539c2 197 stdout=StringIO(),
7c673cae
FG
198 wait=False,
199 )
200
201 time.sleep(sleep_time)
202 proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w
203 proc.wait()
204
f67539c2 205 lines = proc.stdout.getvalue().split('\n')
7c673cae
FG
206
207 count = len(filter(lambda line: '[WRN] OSD near full' in line, lines))
208 assert count == 0, 'Incorrect number of warning messages expected 0 got %d' % count
209 count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines))
210 assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count
211
212 log.info('Test Passed')
213
214 # Bring all OSDs back in
215 manager.remove_pool("foo")
216 for osd in osds:
217 if osd['osd'] != 0:
218 manager.mark_in_osd(osd['osd'])