]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | """ |
2 | Monitor thrash | |
3 | """ | |
4 | import logging | |
5 | import contextlib | |
7c673cae FG |
6 | import random |
7 | import time | |
8 | import gevent | |
9 | import json | |
10 | import math | |
11 | from teuthology import misc as teuthology | |
9f95a23c TL |
12 | from tasks import ceph_manager |
13 | from tasks.cephfs.filesystem import MDSCluster | |
14 | from tasks.thrasher import Thrasher | |
7c673cae FG |
15 | |
16 | log = logging.getLogger(__name__) | |
17 | ||
18 | def _get_mons(ctx): | |
19 | """ | |
20 | Get monitor names from the context value. | |
21 | """ | |
22 | mons = [f[len('mon.'):] for f in teuthology.get_mon_names(ctx)] | |
23 | return mons | |
24 | ||
9f95a23c | 25 | class MonitorThrasher(Thrasher): |
7c673cae FG |
26 | """ |
27 | How it works:: | |
28 | ||
29 | - pick a monitor | |
30 | - kill it | |
31 | - wait for quorum to be formed | |
32 | - sleep for 'revive_delay' seconds | |
33 | - revive monitor | |
34 | - wait for quorum to be formed | |
35 | - sleep for 'thrash_delay' seconds | |
36 | ||
37 | Options:: | |
38 | ||
39 | seed Seed to use on the RNG to reproduce a previous | |
40 | behaviour (default: None; i.e., not set) | |
41 | revive_delay Number of seconds to wait before reviving | |
42 | the monitor (default: 10) | |
43 | thrash_delay Number of seconds to wait in-between | |
44 | test iterations (default: 0) | |
9f95a23c TL |
45 | store_thrash Thrash monitor store before killing the monitor being thrashed (default: False) |
46 | store_thrash_probability Probability of thrashing a monitor's store | |
7c673cae FG |
47 | (default: 50) |
48 | thrash_many Thrash multiple monitors instead of just one. If | |
9f95a23c | 49 | 'maintain_quorum' is set to False, then we will |
7c673cae FG |
50 | thrash up to as many monitors as there are |
51 | available. (default: False) | |
52 | maintain_quorum Always maintain quorum, taking care on how many | |
53 | monitors we kill during the thrashing. If we | |
54 | happen to only have one or two monitors configured, | |
55 | if this option is set to True, then we won't run | |
56 | this task as we cannot guarantee maintenance of | |
57 | quorum. Setting it to false however would allow the | |
58 | task to run with as many as just one single monitor. | |
59 | (default: True) | |
60 | freeze_mon_probability: how often to freeze the mon instead of killing it, | |
61 | in % (default: 0) | |
62 | freeze_mon_duration: how many seconds to freeze the mon (default: 15) | |
63 | scrub Scrub after each iteration (default: True) | |
9f95a23c | 64 | check_mds_failover Check if mds failover happened (default: False) |
7c673cae | 65 | |
9f95a23c | 66 | Note: if 'store_thrash' is set to True, then 'maintain_quorum' must also |
7c673cae FG |
67 | be set to True. |
68 | ||
69 | For example:: | |
70 | ||
71 | tasks: | |
72 | - ceph: | |
73 | - mon_thrash: | |
74 | revive_delay: 20 | |
75 | thrash_delay: 1 | |
9f95a23c TL |
76 | store_thrash: true |
77 | store_thrash_probability: 40 | |
7c673cae FG |
78 | seed: 31337 |
79 | maintain_quorum: true | |
80 | thrash_many: true | |
9f95a23c | 81 | check_mds_failover: True |
7c673cae FG |
82 | - ceph-fuse: |
83 | - workunit: | |
84 | clients: | |
85 | all: | |
86 | - mon/workloadgen.sh | |
87 | """ | |
9f95a23c TL |
88 | def __init__(self, ctx, manager, config, name, logger): |
89 | super(MonitorThrasher, self).__init__() | |
90 | ||
7c673cae FG |
91 | self.ctx = ctx |
92 | self.manager = manager | |
93 | self.manager.wait_for_clean() | |
94 | ||
95 | self.stopping = False | |
96 | self.logger = logger | |
97 | self.config = config | |
9f95a23c | 98 | self.name = name |
7c673cae FG |
99 | |
100 | if self.config is None: | |
101 | self.config = dict() | |
102 | ||
103 | """ Test reproducibility """ | |
104 | self.random_seed = self.config.get('seed', None) | |
105 | ||
106 | if self.random_seed is None: | |
107 | self.random_seed = int(time.time()) | |
108 | ||
109 | self.rng = random.Random() | |
110 | self.rng.seed(int(self.random_seed)) | |
111 | ||
112 | """ Monitor thrashing """ | |
113 | self.revive_delay = float(self.config.get('revive_delay', 10.0)) | |
114 | self.thrash_delay = float(self.config.get('thrash_delay', 0.0)) | |
115 | ||
116 | self.thrash_many = self.config.get('thrash_many', False) | |
117 | self.maintain_quorum = self.config.get('maintain_quorum', True) | |
118 | ||
119 | self.scrub = self.config.get('scrub', True) | |
120 | ||
121 | self.freeze_mon_probability = float(self.config.get('freeze_mon_probability', 10)) | |
122 | self.freeze_mon_duration = float(self.config.get('freeze_mon_duration', 15.0)) | |
123 | ||
124 | assert self.max_killable() > 0, \ | |
125 | 'Unable to kill at least one monitor with the current config.' | |
126 | ||
127 | """ Store thrashing """ | |
128 | self.store_thrash = self.config.get('store_thrash', False) | |
129 | self.store_thrash_probability = int( | |
130 | self.config.get('store_thrash_probability', 50)) | |
131 | if self.store_thrash: | |
132 | assert self.store_thrash_probability > 0, \ | |
133 | 'store_thrash is set, probability must be > 0' | |
134 | assert self.maintain_quorum, \ | |
135 | 'store_thrash = true must imply maintain_quorum = true' | |
136 | ||
9f95a23c TL |
137 | #MDS failover |
138 | self.mds_failover = self.config.get('check_mds_failover', False) | |
139 | ||
140 | if self.mds_failover: | |
141 | self.mds_cluster = MDSCluster(ctx) | |
142 | ||
7c673cae FG |
143 | self.thread = gevent.spawn(self.do_thrash) |
144 | ||
145 | def log(self, x): | |
146 | """ | |
147 | locally log info messages | |
148 | """ | |
149 | self.logger.info(x) | |
150 | ||
151 | def do_join(self): | |
152 | """ | |
153 | Break out of this processes thrashing loop. | |
154 | """ | |
155 | self.stopping = True | |
156 | self.thread.get() | |
157 | ||
158 | def should_thrash_store(self): | |
159 | """ | |
160 | If allowed, indicate that we should thrash a certain percentage of | |
161 | the time as determined by the store_thrash_probability value. | |
162 | """ | |
163 | if not self.store_thrash: | |
164 | return False | |
165 | return self.rng.randrange(0, 101) < self.store_thrash_probability | |
166 | ||
167 | def thrash_store(self, mon): | |
168 | """ | |
169 | Thrash the monitor specified. | |
170 | :param mon: monitor to thrash | |
171 | """ | |
9f95a23c TL |
172 | self.log('thrashing mon.{id} store'.format(id=mon)) |
173 | out = self.manager.raw_cluster_cmd( | |
174 | 'tell', 'mon.%s' % mon, 'sync_force', | |
175 | '--yes-i-really-mean-it') | |
7c673cae FG |
176 | j = json.loads(out) |
177 | assert j['ret'] == 0, \ | |
178 | 'error forcing store sync on mon.{id}:\n{ret}'.format( | |
179 | id=mon,ret=out) | |
180 | ||
181 | def should_freeze_mon(self): | |
182 | """ | |
183 | Indicate that we should freeze a certain percentago of the time | |
184 | as determined by the freeze_mon_probability value. | |
185 | """ | |
186 | return self.rng.randrange(0, 101) < self.freeze_mon_probability | |
187 | ||
188 | def freeze_mon(self, mon): | |
189 | """ | |
190 | Send STOP signal to freeze the monitor. | |
191 | """ | |
192 | log.info('Sending STOP to mon %s', mon) | |
193 | self.manager.signal_mon(mon, 19) # STOP | |
194 | ||
195 | def unfreeze_mon(self, mon): | |
196 | """ | |
197 | Send CONT signal to unfreeze the monitor. | |
198 | """ | |
199 | log.info('Sending CONT to mon %s', mon) | |
200 | self.manager.signal_mon(mon, 18) # CONT | |
201 | ||
202 | def kill_mon(self, mon): | |
203 | """ | |
204 | Kill the monitor specified | |
205 | """ | |
206 | self.log('killing mon.{id}'.format(id=mon)) | |
207 | self.manager.kill_mon(mon) | |
208 | ||
209 | def revive_mon(self, mon): | |
210 | """ | |
211 | Revive the monitor specified | |
212 | """ | |
213 | self.log('killing mon.{id}'.format(id=mon)) | |
214 | self.log('reviving mon.{id}'.format(id=mon)) | |
215 | self.manager.revive_mon(mon) | |
216 | ||
217 | def max_killable(self): | |
218 | """ | |
219 | Return the maximum number of monitors we can kill. | |
220 | """ | |
221 | m = len(_get_mons(self.ctx)) | |
222 | if self.maintain_quorum: | |
223 | return max(math.ceil(m/2.0)-1, 0) | |
224 | else: | |
225 | return m | |
226 | ||
227 | def do_thrash(self): | |
228 | """ | |
9f95a23c TL |
229 | _do_thrash() wrapper. |
230 | """ | |
231 | try: | |
232 | self._do_thrash() | |
233 | except Exception as e: | |
234 | # See _run exception comment for MDSThrasher | |
235 | self.set_thrasher_exception(e) | |
236 | self.logger.exception("exception:") | |
237 | # Allow successful completion so gevent doesn't see an exception. | |
238 | # The DaemonWatchdog will observe the error and tear down the test. | |
239 | ||
240 | def _do_thrash(self): | |
241 | """ | |
242 | Continuously loop and thrash the monitors. | |
7c673cae | 243 | """ |
9f95a23c TL |
244 | #status before mon thrashing |
245 | if self.mds_failover: | |
246 | oldstatus = self.mds_cluster.status() | |
247 | ||
7c673cae FG |
248 | self.log('start thrashing') |
249 | self.log('seed: {s}, revive delay: {r}, thrash delay: {t} '\ | |
250 | 'thrash many: {tm}, maintain quorum: {mq} '\ | |
251 | 'store thrash: {st}, probability: {stp} '\ | |
252 | 'freeze mon: prob {fp} duration {fd}'.format( | |
253 | s=self.random_seed,r=self.revive_delay,t=self.thrash_delay, | |
254 | tm=self.thrash_many, mq=self.maintain_quorum, | |
255 | st=self.store_thrash,stp=self.store_thrash_probability, | |
256 | fp=self.freeze_mon_probability,fd=self.freeze_mon_duration, | |
257 | )) | |
258 | ||
259 | while not self.stopping: | |
260 | mons = _get_mons(self.ctx) | |
261 | self.manager.wait_for_mon_quorum_size(len(mons)) | |
262 | self.log('making sure all monitors are in the quorum') | |
263 | for m in mons: | |
264 | s = self.manager.get_mon_status(m) | |
265 | assert s['state'] == 'leader' or s['state'] == 'peon' | |
266 | assert len(s['quorum']) == len(mons) | |
267 | ||
268 | kill_up_to = self.rng.randrange(1, self.max_killable()+1) | |
269 | mons_to_kill = self.rng.sample(mons, kill_up_to) | |
270 | self.log('monitors to thrash: {m}'.format(m=mons_to_kill)) | |
271 | ||
272 | mons_to_freeze = [] | |
273 | for mon in mons: | |
274 | if mon in mons_to_kill: | |
275 | continue | |
276 | if self.should_freeze_mon(): | |
277 | mons_to_freeze.append(mon) | |
278 | self.log('monitors to freeze: {m}'.format(m=mons_to_freeze)) | |
279 | ||
280 | for mon in mons_to_kill: | |
281 | self.log('thrashing mon.{m}'.format(m=mon)) | |
282 | ||
283 | """ we only thrash stores if we are maintaining quorum """ | |
284 | if self.should_thrash_store() and self.maintain_quorum: | |
285 | self.thrash_store(mon) | |
286 | ||
287 | self.kill_mon(mon) | |
288 | ||
289 | if mons_to_freeze: | |
290 | for mon in mons_to_freeze: | |
291 | self.freeze_mon(mon) | |
292 | self.log('waiting for {delay} secs to unfreeze mons'.format( | |
293 | delay=self.freeze_mon_duration)) | |
294 | time.sleep(self.freeze_mon_duration) | |
295 | for mon in mons_to_freeze: | |
296 | self.unfreeze_mon(mon) | |
297 | ||
298 | if self.maintain_quorum: | |
299 | self.manager.wait_for_mon_quorum_size(len(mons)-len(mons_to_kill)) | |
300 | for m in mons: | |
301 | if m in mons_to_kill: | |
302 | continue | |
303 | s = self.manager.get_mon_status(m) | |
304 | assert s['state'] == 'leader' or s['state'] == 'peon' | |
305 | assert len(s['quorum']) == len(mons)-len(mons_to_kill) | |
306 | ||
307 | self.log('waiting for {delay} secs before reviving monitors'.format( | |
308 | delay=self.revive_delay)) | |
309 | time.sleep(self.revive_delay) | |
310 | ||
311 | for mon in mons_to_kill: | |
312 | self.revive_mon(mon) | |
313 | # do more freezes | |
314 | if mons_to_freeze: | |
315 | for mon in mons_to_freeze: | |
316 | self.freeze_mon(mon) | |
317 | self.log('waiting for {delay} secs to unfreeze mons'.format( | |
318 | delay=self.freeze_mon_duration)) | |
319 | time.sleep(self.freeze_mon_duration) | |
320 | for mon in mons_to_freeze: | |
321 | self.unfreeze_mon(mon) | |
322 | ||
323 | self.manager.wait_for_mon_quorum_size(len(mons)) | |
324 | for m in mons: | |
325 | s = self.manager.get_mon_status(m) | |
326 | assert s['state'] == 'leader' or s['state'] == 'peon' | |
327 | assert len(s['quorum']) == len(mons) | |
328 | ||
329 | if self.scrub: | |
330 | self.log('triggering scrub') | |
331 | try: | |
9f95a23c TL |
332 | self.manager.raw_cluster_cmd('mon', 'scrub') |
333 | except Exception as e: | |
334 | log.warning("Ignoring exception while triggering scrub: %s", e) | |
7c673cae FG |
335 | |
336 | if self.thrash_delay > 0.0: | |
337 | self.log('waiting for {delay} secs before continuing thrashing'.format( | |
338 | delay=self.thrash_delay)) | |
339 | time.sleep(self.thrash_delay) | |
340 | ||
9f95a23c TL |
341 | #status after thrashing |
342 | if self.mds_failover: | |
343 | status = self.mds_cluster.status() | |
344 | assert not oldstatus.hadfailover(status), \ | |
345 | 'MDS Failover' | |
346 | ||
347 | ||
7c673cae FG |
348 | @contextlib.contextmanager |
349 | def task(ctx, config): | |
350 | """ | |
351 | Stress test the monitor by thrashing them while another task/workunit | |
352 | is running. | |
353 | ||
354 | Please refer to MonitorThrasher class for further information on the | |
355 | available options. | |
356 | """ | |
357 | if config is None: | |
358 | config = {} | |
359 | assert isinstance(config, dict), \ | |
360 | 'mon_thrash task only accepts a dict for configuration' | |
361 | assert len(_get_mons(ctx)) > 2, \ | |
362 | 'mon_thrash task requires at least 3 monitors' | |
9f95a23c TL |
363 | |
364 | if 'cluster' not in config: | |
365 | config['cluster'] = 'ceph' | |
366 | ||
7c673cae FG |
367 | log.info('Beginning mon_thrash...') |
368 | first_mon = teuthology.get_first_mon(ctx, config) | |
9f95a23c | 369 | (mon,) = ctx.cluster.only(first_mon).remotes.keys() |
7c673cae FG |
370 | manager = ceph_manager.CephManager( |
371 | mon, | |
372 | ctx=ctx, | |
373 | logger=log.getChild('ceph_manager'), | |
374 | ) | |
375 | thrash_proc = MonitorThrasher(ctx, | |
9f95a23c | 376 | manager, config, "MonitorThrasher", |
7c673cae | 377 | logger=log.getChild('mon_thrasher')) |
9f95a23c | 378 | ctx.ceph[config['cluster']].thrashers.append(thrash_proc) |
7c673cae FG |
379 | try: |
380 | log.debug('Yielding') | |
381 | yield | |
382 | finally: | |
383 | log.info('joining mon_thrasher') | |
384 | thrash_proc.do_join() | |
385 | mons = _get_mons(ctx) | |
386 | manager.wait_for_mon_quorum_size(len(mons)) |