]> git.proxmox.com Git - ceph.git/blame - ceph/qa/tasks/mds_thrash.py
import quincy beta 17.1.0
[ceph.git] / ceph / qa / tasks / mds_thrash.py
CommitLineData
7c673cae
FG
1"""
2Thrash mds by simulating failures
3"""
4import logging
5import contextlib
7c673cae
FG
6import itertools
7import random
7c673cae
FG
8import time
9
10from gevent import sleep
11from gevent.greenlet import Greenlet
12from gevent.event import Event
13from teuthology import misc as teuthology
14
e306af50 15from tasks import ceph_manager
f67539c2 16from tasks.cephfs.filesystem import MDSCluster, Filesystem, FSMissing
9f95a23c 17from tasks.thrasher import Thrasher
7c673cae
FG
18
19log = logging.getLogger(__name__)
20
9f95a23c 21class MDSThrasher(Thrasher, Greenlet):
7c673cae
FG
22 """
23 MDSThrasher::
24
25 The MDSThrasher thrashes MDSs during execution of other tasks (workunits, etc).
26
27 The config is optional. Many of the config parameters are a a maximum value
28 to use when selecting a random value from a range. To always use the maximum
29 value, set no_random to true. The config is a dict containing some or all of:
30
31 max_thrash: [default: 1] the maximum number of active MDSs per FS that will be thrashed at
32 any given time.
33
34 max_thrash_delay: [default: 30] maximum number of seconds to delay before
35 thrashing again.
36
37 max_replay_thrash_delay: [default: 4] maximum number of seconds to delay while in
38 the replay state before thrashing.
39
40 max_revive_delay: [default: 10] maximum number of seconds to delay before
41 bringing back a thrashed MDS.
42
43 randomize: [default: true] enables randomization and use the max/min values
44
45 seed: [no default] seed the random number generator
46
47 thrash_in_replay: [default: 0.0] likelihood that the MDS will be thrashed
48 during replay. Value should be between 0.0 and 1.0.
49
c07f9fc5 50 thrash_max_mds: [default: 0.05] likelihood that the max_mds of the mds
7c673cae 51 cluster will be modified to a value [1, current) or (current, starting
11fdf7f2 52 max_mds]. Value should be between 0.0 and 1.0.
7c673cae
FG
53
54 thrash_while_stopping: [default: false] thrash an MDS while there
55 are MDS in up:stopping (because max_mds was changed and some
56 MDS were deactivated).
57
58 thrash_weights: allows specific MDSs to be thrashed more/less frequently.
59 This option overrides anything specified by max_thrash. This option is a
60 dict containing mds.x: weight pairs. For example, [mds.a: 0.7, mds.b:
61 0.3, mds.c: 0.0]. Each weight is a value from 0.0 to 1.0. Any MDSs not
62 specified will be automatically given a weight of 0.0 (not thrashed).
63 For a given MDS, by default the trasher delays for up to
64 max_thrash_delay, trashes, waits for the MDS to recover, and iterates.
65 If a non-zero weight is specified for an MDS, for each iteration the
66 thrasher chooses whether to thrash during that iteration based on a
67 random value [0-1] not exceeding the weight of that MDS.
68
69 Examples::
70
71
72 The following example sets the likelihood that mds.a will be thrashed
73 to 80%, mds.b to 20%, and other MDSs will not be thrashed. It also sets the
74 likelihood that an MDS will be thrashed in replay to 40%.
75 Thrash weights do not have to sum to 1.
76
77 tasks:
78 - ceph:
79 - mds_thrash:
80 thrash_weights:
81 - mds.a: 0.8
82 - mds.b: 0.2
83 thrash_in_replay: 0.4
84 - ceph-fuse:
85 - workunit:
86 clients:
87 all: [suites/fsx.sh]
88
89 The following example disables randomization, and uses the max delay values:
90
91 tasks:
92 - ceph:
93 - mds_thrash:
94 max_thrash_delay: 10
95 max_revive_delay: 1
96 max_replay_thrash_delay: 4
97
98 """
99
100 def __init__(self, ctx, manager, config, fs, max_mds):
9f95a23c 101 super(MDSThrasher, self).__init__()
7c673cae
FG
102
103 self.config = config
104 self.ctx = ctx
7c673cae
FG
105 self.logger = log.getChild('fs.[{f}]'.format(f = fs.name))
106 self.fs = fs
107 self.manager = manager
108 self.max_mds = max_mds
109 self.name = 'thrasher.fs.[{f}]'.format(f = fs.name)
110 self.stopping = Event()
111
112 self.randomize = bool(self.config.get('randomize', True))
c07f9fc5 113 self.thrash_max_mds = float(self.config.get('thrash_max_mds', 0.05))
7c673cae
FG
114 self.max_thrash = int(self.config.get('max_thrash', 1))
115 self.max_thrash_delay = float(self.config.get('thrash_delay', 120.0))
116 self.thrash_in_replay = float(self.config.get('thrash_in_replay', False))
117 assert self.thrash_in_replay >= 0.0 and self.thrash_in_replay <= 1.0, 'thrash_in_replay ({v}) must be between [0.0, 1.0]'.format(
118 v=self.thrash_in_replay)
119 self.max_replay_thrash_delay = float(self.config.get('max_replay_thrash_delay', 4.0))
120 self.max_revive_delay = float(self.config.get('max_revive_delay', 10.0))
121
122 def _run(self):
123 try:
124 self.do_thrash()
f67539c2
TL
125 except FSMissing:
126 pass
7c673cae
FG
127 except Exception as e:
128 # Log exceptions here so we get the full backtrace (gevent loses them).
11fdf7f2 129 # Also allow successful completion as gevent exception handling is a broken mess:
7c673cae
FG
130 #
131 # 2017-02-03T14:34:01.259 CRITICAL:root: File "gevent.libev.corecext.pyx", line 367, in gevent.libev.corecext.loop.handle_error (src/gevent/libev/gevent.corecext.c:5051)
132 # File "/home/teuthworker/src/git.ceph.com_git_teuthology_master/virtualenv/local/lib/python2.7/site-packages/gevent/hub.py", line 558, in handle_error
133 # self.print_exception(context, type, value, tb)
134 # File "/home/teuthworker/src/git.ceph.com_git_teuthology_master/virtualenv/local/lib/python2.7/site-packages/gevent/hub.py", line 605, in print_exception
135 # traceback.print_exception(type, value, tb, file=errstream)
136 # File "/usr/lib/python2.7/traceback.py", line 124, in print_exception
137 # _print(file, 'Traceback (most recent call last):')
138 # File "/usr/lib/python2.7/traceback.py", line 13, in _print
139 # file.write(str+terminator)
140 # 2017-02-03T14:34:01.261 CRITICAL:root:IOError
9f95a23c 141 self.set_thrasher_exception(e)
7c673cae
FG
142 self.logger.exception("exception:")
143 # allow successful completion so gevent doesn't see an exception...
144
145 def log(self, x):
9f95a23c 146 """Write data to the logger assigned to MDSThrasher"""
7c673cae
FG
147 self.logger.info(x)
148
149 def stop(self):
150 self.stopping.set()
151
152 def kill_mds(self, mds):
153 if self.config.get('powercycle'):
154 (remote,) = (self.ctx.cluster.only('mds.{m}'.format(m=mds)).
9f95a23c 155 remotes.keys())
7c673cae
FG
156 self.log('kill_mds on mds.{m} doing powercycle of {s}'.
157 format(m=mds, s=remote.name))
158 self._assert_ipmi(remote)
159 remote.console.power_off()
160 else:
161 self.ctx.daemons.get_daemon('mds', mds).stop()
162
163 @staticmethod
164 def _assert_ipmi(remote):
165 assert remote.console.has_ipmi_credentials, (
166 "powercycling requested but RemoteConsole is not "
167 "initialized. Check ipmi config.")
168
11fdf7f2 169 def revive_mds(self, mds):
7c673cae
FG
170 """
171 Revive mds -- do an ipmpi powercycle (if indicated by the config)
11fdf7f2 172 and then restart.
7c673cae
FG
173 """
174 if self.config.get('powercycle'):
175 (remote,) = (self.ctx.cluster.only('mds.{m}'.format(m=mds)).
9f95a23c 176 remotes.keys())
7c673cae
FG
177 self.log('revive_mds on mds.{m} doing powercycle of {s}'.
178 format(m=mds, s=remote.name))
179 self._assert_ipmi(remote)
180 remote.console.power_on()
181 self.manager.make_admin_daemon_dir(self.ctx, remote)
182 args = []
7c673cae
FG
183 self.ctx.daemons.get_daemon('mds', mds).restart(*args)
184
185 def wait_for_stable(self, rank = None, gid = None):
186 self.log('waiting for mds cluster to stabilize...')
187 for itercount in itertools.count():
188 status = self.fs.status()
189 max_mds = status.get_fsmap(self.fs.id)['mdsmap']['max_mds']
190 ranks = list(status.get_ranks(self.fs.id))
e306af50
TL
191 stopping = sum(1 for _ in ranks if "up:stopping" == _['state'])
192 actives = sum(1 for _ in ranks
193 if "up:active" == _['state'] and "laggy_since" not in _)
7c673cae 194
e306af50 195 if not bool(self.config.get('thrash_while_stopping', False)) and stopping > 0:
7c673cae
FG
196 if itercount % 5 == 0:
197 self.log('cluster is considered unstable while MDS are in up:stopping (!thrash_while_stopping)')
198 else:
199 if rank is not None:
200 try:
201 info = status.get_rank(self.fs.id, rank)
202 if info['gid'] != gid and "up:active" == info['state']:
203 self.log('mds.{name} has gained rank={rank}, replacing gid={gid}'.format(name = info['name'], rank = rank, gid = gid))
204 return status
205 except:
206 pass # no rank present
e306af50 207 if actives >= max_mds:
7c673cae 208 # no replacement can occur!
9f95a23c 209 self.log("cluster has {actives} actives (max_mds is {max_mds}), no MDS can replace rank {rank}".format(
e306af50 210 actives=actives, max_mds=max_mds, rank=rank))
7c673cae
FG
211 return status
212 else:
e306af50
TL
213 if actives == max_mds:
214 self.log('mds cluster has {count} alive and active, now stable!'.format(count = actives))
7c673cae
FG
215 return status, None
216 if itercount > 300/2: # 5 minutes
217 raise RuntimeError('timeout waiting for cluster to stabilize')
218 elif itercount % 5 == 0:
219 self.log('mds map: {status}'.format(status=status))
220 else:
221 self.log('no change')
222 sleep(2)
223
224 def do_thrash(self):
225 """
226 Perform the random thrashing action
227 """
228
229 self.log('starting mds_do_thrash for fs {fs}'.format(fs = self.fs.name))
230 stats = {
231 "max_mds": 0,
232 "deactivate": 0,
233 "kill": 0,
234 }
235
236 while not self.stopping.is_set():
237 delay = self.max_thrash_delay
238 if self.randomize:
239 delay = random.randrange(0.0, self.max_thrash_delay)
240
241 if delay > 0.0:
242 self.log('waiting for {delay} secs before thrashing'.format(delay=delay))
243 self.stopping.wait(delay)
244 if self.stopping.is_set():
245 continue
246
247 status = self.fs.status()
248
249 if random.random() <= self.thrash_max_mds:
250 max_mds = status.get_fsmap(self.fs.id)['mdsmap']['max_mds']
f67539c2 251 options = [i for i in range(1, self.max_mds + 1) if i != max_mds]
7c673cae 252 if len(options) > 0:
f67539c2 253 new_max_mds = random.choice(options)
7c673cae
FG
254 self.log('thrashing max_mds: %d -> %d' % (max_mds, new_max_mds))
255 self.fs.set_max_mds(new_max_mds)
256 stats['max_mds'] += 1
11fdf7f2 257 self.wait_for_stable()
7c673cae
FG
258
259 count = 0
260 for info in status.get_ranks(self.fs.id):
261 name = info['name']
262 label = 'mds.' + name
263 rank = info['rank']
264 gid = info['gid']
265
266 # if thrash_weights isn't specified and we've reached max_thrash,
267 # we're done
268 count = count + 1
269 if 'thrash_weights' not in self.config and count > self.max_thrash:
270 break
271
272 weight = 1.0
273 if 'thrash_weights' in self.config:
274 weight = self.config['thrash_weights'].get(label, '0.0')
20effc67 275 skip = random.random()
7c673cae
FG
276 if weight <= skip:
277 self.log('skipping thrash iteration with skip ({skip}) > weight ({weight})'.format(skip=skip, weight=weight))
278 continue
279
280 self.log('kill {label} (rank={rank})'.format(label=label, rank=rank))
281 self.kill_mds(name)
282 stats['kill'] += 1
283
284 # wait for mon to report killed mds as crashed
285 last_laggy_since = None
286 itercount = 0
287 while True:
288 status = self.fs.status()
289 info = status.get_mds(name)
290 if not info:
291 break
292 if 'laggy_since' in info:
293 last_laggy_since = info['laggy_since']
294 break
295 if any([(f == name) for f in status.get_fsmap(self.fs.id)['mdsmap']['failed']]):
296 break
297 self.log(
298 'waiting till mds map indicates {label} is laggy/crashed, in failed state, or {label} is removed from mdsmap'.format(
299 label=label))
300 itercount = itercount + 1
301 if itercount > 10:
302 self.log('mds map: {status}'.format(status=status))
303 sleep(2)
304
305 if last_laggy_since:
306 self.log(
307 '{label} reported laggy/crashed since: {since}'.format(label=label, since=last_laggy_since))
308 else:
9f95a23c 309 self.log('{label} down, removed from mdsmap'.format(label=label))
7c673cae
FG
310
311 # wait for a standby mds to takeover and become active
312 status = self.wait_for_stable(rank, gid)
313
314 # wait for a while before restarting old active to become new
315 # standby
316 delay = self.max_revive_delay
317 if self.randomize:
318 delay = random.randrange(0.0, self.max_revive_delay)
319
320 self.log('waiting for {delay} secs before reviving {label}'.format(
321 delay=delay, label=label))
322 sleep(delay)
323
324 self.log('reviving {label}'.format(label=label))
325 self.revive_mds(name)
326
327 for itercount in itertools.count():
328 if itercount > 300/2: # 5 minutes
329 raise RuntimeError('timeout waiting for MDS to revive')
330 status = self.fs.status()
331 info = status.get_mds(name)
332 if info and info['state'] in ('up:standby', 'up:standby-replay', 'up:active'):
333 self.log('{label} reported in {state} state'.format(label=label, state=info['state']))
334 break
335 self.log(
336 'waiting till mds map indicates {label} is in active, standby or standby-replay'.format(label=label))
337 sleep(2)
338
339 for stat in stats:
340 self.log("stat['{key}'] = {value}".format(key = stat, value = stats[stat]))
341
342 # don't do replay thrashing right now
343# for info in status.get_replays(self.fs.id):
344# # this might race with replay -> active transition...
345# if status['state'] == 'up:replay' and random.randrange(0.0, 1.0) < self.thrash_in_replay:
346# delay = self.max_replay_thrash_delay
347# if self.randomize:
348# delay = random.randrange(0.0, self.max_replay_thrash_delay)
349# sleep(delay)
350# self.log('kill replaying mds.{id}'.format(id=self.to_kill))
351# self.kill_mds(self.to_kill)
352#
353# delay = self.max_revive_delay
354# if self.randomize:
355# delay = random.randrange(0.0, self.max_revive_delay)
356#
357# self.log('waiting for {delay} secs before reviving mds.{id}'.format(
358# delay=delay, id=self.to_kill))
359# sleep(delay)
360#
361# self.log('revive mds.{id}'.format(id=self.to_kill))
362# self.revive_mds(self.to_kill)
363
364
365@contextlib.contextmanager
366def task(ctx, config):
367 """
368 Stress test the mds by thrashing while another task/workunit
369 is running.
370
371 Please refer to MDSThrasher class for further information on the
372 available options.
373 """
374
375 mds_cluster = MDSCluster(ctx)
376
377 if config is None:
378 config = {}
379 assert isinstance(config, dict), \
380 'mds_thrash task only accepts a dict for configuration'
381 mdslist = list(teuthology.all_roles_of_type(ctx.cluster, 'mds'))
382 assert len(mdslist) > 1, \
383 'mds_thrash task requires at least 2 metadata servers'
384
385 # choose random seed
386 if 'seed' in config:
387 seed = int(config['seed'])
388 else:
389 seed = int(time.time())
390 log.info('mds thrasher using random seed: {seed}'.format(seed=seed))
391 random.seed(seed)
392
9f95a23c 393 (first,) = ctx.cluster.only('mds.{_id}'.format(_id=mdslist[0])).remotes.keys()
7c673cae
FG
394 manager = ceph_manager.CephManager(
395 first, ctx=ctx, logger=log.getChild('ceph_manager'),
396 )
397
398 # make sure everyone is in active, standby, or standby-replay
399 log.info('Wait for all MDSs to reach steady state...')
400 status = mds_cluster.status()
401 while True:
402 steady = True
403 for info in status.get_all():
404 state = info['state']
405 if state not in ('up:active', 'up:standby', 'up:standby-replay'):
406 steady = False
407 break
408 if steady:
409 break
410 sleep(2)
411 status = mds_cluster.status()
412 log.info('Ready to start thrashing')
413
7c673cae
FG
414 manager.wait_for_clean()
415 assert manager.is_clean()
9f95a23c
TL
416
417 if 'cluster' not in config:
418 config['cluster'] = 'ceph'
419
7c673cae 420 for fs in status.get_filesystems():
f67539c2 421 thrasher = MDSThrasher(ctx, manager, config, Filesystem(ctx, fscid=fs['id']), fs['mdsmap']['max_mds'])
7c673cae 422 thrasher.start()
9f95a23c 423 ctx.ceph[config['cluster']].thrashers.append(thrasher)
7c673cae
FG
424
425 try:
426 log.debug('Yielding')
427 yield
428 finally:
9f95a23c
TL
429 log.info('joining mds_thrasher')
430 thrasher.stop()
431 if thrasher.exception is not None:
432 raise RuntimeError('error during thrashing')
433 thrasher.join()
7c673cae 434 log.info('done joining')