]>
git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/rbd_mirror_thrash.py
2 Task for thrashing rbd-mirror daemons
12 from gevent
import sleep
13 from gevent
.greenlet
import Greenlet
14 from gevent
.event
import Event
16 from teuthology
.exceptions
import CommandFailedError
17 from teuthology
.orchestra
import run
18 from tasks
.thrasher
import Thrasher
20 log
= logging
.getLogger(__name__
)
23 class RBDMirrorThrasher(Thrasher
, Greenlet
):
27 The RBDMirrorThrasher thrashes rbd-mirror daemons during execution of other
28 tasks (workunits, etc).
30 The config is optional. Many of the config parameters are a maximum value
31 to use when selecting a random value from a range. The config is a dict
32 containing some or all of:
34 cluster: [default: ceph] cluster to thrash
36 max_thrash: [default: 1] the maximum number of active rbd-mirror daemons per
37 cluster will be thrashed at any given time.
39 min_thrash_delay: [default: 60] minimum number of seconds to delay before
42 max_thrash_delay: [default: 120] maximum number of seconds to delay before
45 max_revive_delay: [default: 10] maximum number of seconds to delay before
46 bringing back a thrashed rbd-mirror daemon.
48 randomize: [default: true] enables randomization and use the max/min values
50 seed: [no default] seed the random number generator
54 The following example disables randomization, and uses the max delay
64 def __init__(self
, ctx
, config
, cluster
, daemons
):
65 super(RBDMirrorThrasher
, self
).__init
__()
69 self
.cluster
= cluster
70 self
.daemons
= daemons
73 self
.name
= 'thrasher.rbd_mirror.[{cluster}]'.format(cluster
= cluster
)
74 self
.stopping
= Event()
76 self
.randomize
= bool(self
.config
.get('randomize', True))
77 self
.max_thrash
= int(self
.config
.get('max_thrash', 1))
78 self
.min_thrash_delay
= float(self
.config
.get('min_thrash_delay', 60.0))
79 self
.max_thrash_delay
= float(self
.config
.get('max_thrash_delay', 120.0))
80 self
.max_revive_delay
= float(self
.config
.get('max_revive_delay', 10.0))
85 except Exception as e
:
86 # See _run exception comment for MDSThrasher
87 self
.set_thrasher_exception(e
)
88 self
.logger
.exception("exception:")
89 # Allow successful completion so gevent doesn't see an exception.
90 # The DaemonWatchdog will observe the error and tear down the test.
93 """Write data to logger assigned to this RBDMirrorThrasher"""
101 Perform the random thrashing action
104 self
.log('starting thrash for cluster {cluster}'.format(cluster
=self
.cluster
))
109 while not self
.stopping
.is_set():
110 delay
= self
.max_thrash_delay
112 delay
= random
.randrange(self
.min_thrash_delay
, self
.max_thrash_delay
)
115 self
.log('waiting for {delay} secs before thrashing'.format(delay
=delay
))
116 self
.stopping
.wait(delay
)
117 if self
.stopping
.is_set():
122 weight
= 1.0 / len(self
.daemons
)
124 for daemon
in self
.daemons
:
125 skip
= random
.uniform(0.0, 1.0)
127 self
.log('skipping daemon {label} with skip ({skip}) > weight ({weight})'.format(
128 label
=daemon
.id_
, skip
=skip
, weight
=weight
))
131 self
.log('kill {label}'.format(label
=daemon
.id_
))
133 daemon
.signal(signal
.SIGTERM
)
136 killed_daemons
.append(daemon
)
139 # if we've reached max_thrash, we're done
141 if count
>= self
.max_thrash
:
145 # wait for a while before restarting
146 delay
= self
.max_revive_delay
148 delay
= random
.randrange(0.0, self
.max_revive_delay
)
150 self
.log('waiting for {delay} secs before reviving daemons'.format(delay
=delay
))
153 for daemon
in killed_daemons
:
154 self
.log('waiting for {label}'.format(label
=daemon
.id_
))
156 run
.wait([daemon
.proc
], timeout
=600)
157 except CommandFailedError
:
160 self
.log('Failed to stop {label}'.format(label
=daemon
.id_
))
163 # try to capture a core dump
164 daemon
.signal(signal
.SIGABRT
)
171 for daemon
in killed_daemons
:
172 self
.log('reviving {label}'.format(label
=daemon
.id_
))
176 self
.log("stat['{key}'] = {value}".format(key
= stat
, value
= stats
[stat
]))
178 @contextlib.contextmanager
179 def task(ctx
, config
):
181 Stress test the rbd-mirror by thrashing while another task/workunit
184 Please refer to RBDMirrorThrasher class for further information on the
189 assert isinstance(config
, dict), \
190 'rbd_mirror_thrash task only accepts a dict for configuration'
192 cluster
= config
.get('cluster', 'ceph')
193 daemons
= list(ctx
.daemons
.iter_daemons_of_role('rbd-mirror', cluster
))
194 assert len(daemons
) > 0, \
195 'rbd_mirror_thrash task requires at least 1 rbd-mirror daemon'
199 seed
= int(config
['seed'])
201 seed
= int(time
.time())
202 log
.info('rbd_mirror_thrash using random seed: {seed}'.format(seed
=seed
))
205 thrasher
= RBDMirrorThrasher(ctx
, config
, cluster
, daemons
)
207 ctx
.ceph
[cluster
].thrashers
.append(thrasher
)
210 log
.debug('Yielding')
213 log
.info('joining rbd_mirror_thrash')
215 if thrasher
.exception
is not None:
216 raise RuntimeError('error during thrashing')
218 log
.info('done joining')