]>
git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/osd_failsafe_enospc.py
2 Handle osdfailsafe configuration settings (nearfull ratio and full ratio)
9 from teuthology
.orchestra
import run
10 from util
.rados
import rados
11 from teuthology
import misc
as teuthology
13 log
= logging
.getLogger(__name__
)
15 def task(ctx
, config
):
17 Test handling of osd_failsafe_nearfull_ratio and osd_failsafe_full_ratio
18 configuration settings
20 In order for test to pass must use log-whitelist as follows
26 log-whitelist: ['OSD near full', 'OSD full dropping all updates']
27 - osd_failsafe_enospc:
32 assert isinstance(config
, dict), \
33 'osd_failsafe_enospc task only accepts a dict for configuration'
35 # Give 2 seconds for injectargs + osd_op_complaint_time (30) + 2 * osd_heartbeat_interval (6) + 6 padding
38 # something that is always there
39 dummyfile
= '/etc/fstab'
40 dummyfile2
= '/etc/resolv.conf'
42 manager
= ctx
.managers
['ceph']
44 # create 1 pg pool with 1 rep which can only be on osd.0
45 osds
= manager
.get_osd_dump()
48 manager
.mark_out_osd(osd
['osd'])
50 log
.info('creating pool foo')
51 manager
.create_pool("foo")
52 manager
.raw_cluster_cmd('osd', 'pool', 'set', 'foo', 'size', '1')
55 log
.info('1. Verify warning messages when exceeding nearfull_ratio')
57 first_mon
= teuthology
.get_first_mon(ctx
, config
)
58 (mon
,) = ctx
.cluster
.only(first_mon
).remotes
.keys()
72 manager
.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_nearfull_ratio .00001')
74 time
.sleep(sleep_time
)
75 proc
.stdin
.close() # causes daemon-helper send SIGKILL to ceph -w
78 lines
= six
.ensure_str(proc
.stdout
.getvalue()).split('\n')
80 count
= len(filter(lambda line
: '[WRN] OSD near full' in line
, lines
))
81 assert count
== 2, 'Incorrect number of warning messages expected 2 got %d' % count
82 count
= len(filter(lambda line
: '[ERR] OSD full dropping all updates' in line
, lines
))
83 assert count
== 0, 'Incorrect number of error messages expected 0 got %d' % count
86 log
.info('2. Verify error messages when exceeding full_ratio')
100 manager
.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .00001')
102 time
.sleep(sleep_time
)
103 proc
.stdin
.close() # causes daemon-helper send SIGKILL to ceph -w
106 lines
= six
.ensure_str(proc
.stdout
.getvalue()).split('\n')
108 count
= len(filter(lambda line
: '[ERR] OSD full dropping all updates' in line
, lines
))
109 assert count
== 2, 'Incorrect number of error messages expected 2 got %d' % count
111 log
.info('3. Verify write failure when exceeding full_ratio')
113 # Write data should fail
114 ret
= rados(ctx
, mon
, ['-p', 'foo', 'put', 'newfile1', dummyfile
])
115 assert ret
!= 0, 'Expected write failure but it succeeded with exit status 0'
118 manager
.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .97')
122 log
.info('4. Verify write success when NOT exceeding full_ratio')
124 # Write should succeed
125 ret
= rados(ctx
, mon
, ['-p', 'foo', 'put', 'newfile2', dummyfile2
])
126 assert ret
== 0, 'Expected write to succeed, but got exit status %d' % ret
128 log
.info('5. Verify warning messages again when exceeding nearfull_ratio')
142 time
.sleep(sleep_time
)
143 proc
.stdin
.close() # causes daemon-helper send SIGKILL to ceph -w
146 lines
= six
.ensure_str(proc
.stdout
.getvalue()).split('\n')
148 count
= len(filter(lambda line
: '[WRN] OSD near full' in line
, lines
))
149 assert count
== 1 or count
== 2, 'Incorrect number of warning messages expected 1 or 2 got %d' % count
150 count
= len(filter(lambda line
: '[ERR] OSD full dropping all updates' in line
, lines
))
151 assert count
== 0, 'Incorrect number of error messages expected 0 got %d' % count
153 manager
.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_nearfull_ratio .90')
157 log
.info('6. Verify error messages again when exceeding full_ratio')
171 manager
.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .00001')
173 time
.sleep(sleep_time
)
174 proc
.stdin
.close() # causes daemon-helper send SIGKILL to ceph -w
177 lines
= six
.ensure_str(proc
.stdout
.getvalue()).split('\n')
179 count
= len(filter(lambda line
: '[WRN] OSD near full' in line
, lines
))
180 assert count
== 0, 'Incorrect number of warning messages expected 0 got %d' % count
181 count
= len(filter(lambda line
: '[ERR] OSD full dropping all updates' in line
, lines
))
182 assert count
== 2, 'Incorrect number of error messages expected 2 got %d' % count
185 log
.info('7. Verify no messages settings back to default')
187 manager
.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .97')
202 time
.sleep(sleep_time
)
203 proc
.stdin
.close() # causes daemon-helper send SIGKILL to ceph -w
206 lines
= six
.ensure_str(proc
.stdout
.getvalue()).split('\n')
208 count
= len(filter(lambda line
: '[WRN] OSD near full' in line
, lines
))
209 assert count
== 0, 'Incorrect number of warning messages expected 0 got %d' % count
210 count
= len(filter(lambda line
: '[ERR] OSD full dropping all updates' in line
, lines
))
211 assert count
== 0, 'Incorrect number of error messages expected 0 got %d' % count
213 log
.info('Test Passed')
215 # Bring all OSDs back in
216 manager
.remove_pool("foo")
219 manager
.mark_in_osd(osd
['osd'])