]>
git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/mds_thrash.py
2 Thrash mds by simulating failures
10 from gevent
import sleep
11 from gevent
. greenlet
import Greenlet
12 from gevent
. event
import Event
13 from teuthology
import misc
as teuthology
15 from tasks
import ceph_manager
16 from tasks
. cephfs
. filesystem
import MDSCluster
, Filesystem
17 from tasks
. thrasher
import Thrasher
19 log
= logging
. getLogger ( __name__
)
21 class MDSThrasher ( Thrasher
, Greenlet
):
25 The MDSThrasher thrashes MDSs during execution of other tasks (workunits, etc).
27 The config is optional. Many of the config parameters are a a maximum value
28 to use when selecting a random value from a range. To always use the maximum
29 value, set no_random to true. The config is a dict containing some or all of:
31 max_thrash: [default: 1] the maximum number of active MDSs per FS that will be thrashed at
34 max_thrash_delay: [default: 30] maximum number of seconds to delay before
37 max_replay_thrash_delay: [default: 4] maximum number of seconds to delay while in
38 the replay state before thrashing.
40 max_revive_delay: [default: 10] maximum number of seconds to delay before
41 bringing back a thrashed MDS.
43 randomize: [default: true] enables randomization and use the max/min values
45 seed: [no default] seed the random number generator
47 thrash_in_replay: [default: 0.0] likelihood that the MDS will be thrashed
48 during replay. Value should be between 0.0 and 1.0.
50 thrash_max_mds: [default: 0.05] likelihood that the max_mds of the mds
51 cluster will be modified to a value [1, current) or (current, starting
52 max_mds]. Value should be between 0.0 and 1.0.
54 thrash_while_stopping: [default: false] thrash an MDS while there
55 are MDS in up:stopping (because max_mds was changed and some
56 MDS were deactivated).
58 thrash_weights: allows specific MDSs to be thrashed more/less frequently.
59 This option overrides anything specified by max_thrash. This option is a
60 dict containing mds.x: weight pairs. For example, [mds.a: 0.7, mds.b:
61 0.3, mds.c: 0.0]. Each weight is a value from 0.0 to 1.0. Any MDSs not
62 specified will be automatically given a weight of 0.0 (not thrashed).
63 For a given MDS, by default the trasher delays for up to
64 max_thrash_delay, trashes, waits for the MDS to recover, and iterates.
65 If a non-zero weight is specified for an MDS, for each iteration the
66 thrasher chooses whether to thrash during that iteration based on a
67 random value [0-1] not exceeding the weight of that MDS.
72 The following example sets the likelihood that mds.a will be thrashed
73 to 80%, mds.b to 20%, and other MDSs will not be thrashed. It also sets the
74 likelihood that an MDS will be thrashed in replay to 40%.
75 Thrash weights do not have to sum to 1.
89 The following example disables randomization, and uses the max delay values:
96 max_replay_thrash_delay: 4
100 def __init__ ( self
, ctx
, manager
, config
, fs
, max_mds
):
101 super ( MDSThrasher
, self
) .__ init
__ ()
105 self
. logger
= log
. getChild ( 'fs.[ {f} ]' . format ( f
= fs
. name
))
107 self
. manager
= manager
108 self
. max_mds
= max_mds
109 self
. name
= 'thrasher.fs.[ {f} ]' . format ( f
= fs
. name
)
110 self
. stopping
= Event ()
112 self
. randomize
= bool ( self
. config
. get ( 'randomize' , True ))
113 self
. thrash_max_mds
= float ( self
. config
. get ( 'thrash_max_mds' , 0.05 ))
114 self
. max_thrash
= int ( self
. config
. get ( 'max_thrash' , 1 ))
115 self
. max_thrash_delay
= float ( self
. config
. get ( 'thrash_delay' , 120.0 ))
116 self
. thrash_in_replay
= float ( self
. config
. get ( 'thrash_in_replay' , False ))
117 assert self
. thrash_in_replay
>= 0.0 and self
. thrash_in_replay
<= 1.0 , 'thrash_in_replay ( {v} ) must be between [0.0, 1.0]' . format (
118 v
= self
. thrash_in_replay
)
119 self
. max_replay_thrash_delay
= float ( self
. config
. get ( 'max_replay_thrash_delay' , 4.0 ))
120 self
. max_revive_delay
= float ( self
. config
. get ( 'max_revive_delay' , 10.0 ))
125 except Exception as e
:
126 # Log exceptions here so we get the full backtrace (gevent loses them).
127 # Also allow successful completion as gevent exception handling is a broken mess:
129 # 2017-02-03T14:34:01.259 CRITICAL:root: File "gevent.libev.corecext.pyx", line 367, in gevent.libev.corecext.loop.handle_error (src/gevent/libev/gevent.corecext.c:5051)
130 # File "/home/teuthworker/src/git.ceph.com_git_teuthology_master/virtualenv/local/lib/python2.7/site-packages/gevent/hub.py", line 558, in handle_error
131 # self.print_exception(context, type, value, tb)
132 # File "/home/teuthworker/src/git.ceph.com_git_teuthology_master/virtualenv/local/lib/python2.7/site-packages/gevent/hub.py", line 605, in print_exception
133 # traceback.print_exception(type, value, tb, file=errstream)
134 # File "/usr/lib/python2.7/traceback.py", line 124, in print_exception
135 # _print(file, 'Traceback (most recent call last):')
136 # File "/usr/lib/python2.7/traceback.py", line 13, in _print
137 # file.write(str+terminator)
138 # 2017-02-03T14:34:01.261 CRITICAL:root:IOError
139 self
. set_thrasher_exception ( e
)
140 self
. logger
. exception ( "exception:" )
141 # allow successful completion so gevent doesn't see an exception...
144 """Write data to the logger assigned to MDSThrasher"""
150 def kill_mds ( self
, mds
):
151 if self
. config
. get ( 'powercycle' ):
152 ( remote
,) = ( self
. ctx
. cluster
. only ( 'mds. {m} ' . format ( m
= mds
)).
154 self
. log ( 'kill_mds on mds. {m} doing powercycle of {s} ' .
155 format ( m
= mds
, s
= remote
. name
))
156 self
._ assert
_ ipmi
( remote
)
157 remote
. console
. power_off ()
159 self
. ctx
. daemons
. get_daemon ( 'mds' , mds
). stop ()
162 def _assert_ipmi ( remote
):
163 assert remote
. console
. has_ipmi_credentials
, (
164 "powercycling requested but RemoteConsole is not "
165 "initialized. Check ipmi config." )
167 def revive_mds ( self
, mds
):
169 Revive mds -- do an ipmpi powercycle (if indicated by the config)
172 if self
. config
. get ( 'powercycle' ):
173 ( remote
,) = ( self
. ctx
. cluster
. only ( 'mds. {m} ' . format ( m
= mds
)).
175 self
. log ( 'revive_mds on mds. {m} doing powercycle of {s} ' .
176 format ( m
= mds
, s
= remote
. name
))
177 self
._ assert
_ ipmi
( remote
)
178 remote
. console
. power_on ()
179 self
. manager
. make_admin_daemon_dir ( self
. ctx
, remote
)
181 self
. ctx
. daemons
. get_daemon ( 'mds' , mds
). restart (* args
)
183 def wait_for_stable ( self
, rank
= None , gid
= None ):
184 self
. log ( 'waiting for mds cluster to stabilize...' )
185 for itercount
in itertools
. count ():
186 status
= self
. fs
. status ()
187 max_mds
= status
. get_fsmap ( self
. fs
. id )[ 'mdsmap' ][ 'max_mds' ]
188 ranks
= list ( status
. get_ranks ( self
. fs
. id ))
189 stopping
= sum ( 1 for _
in ranks
if "up:stopping" == _
[ 'state' ])
190 actives
= sum ( 1 for _
in ranks
191 if "up:active" == _
[ 'state' ] and "laggy_since" not in _
)
193 if not bool ( self
. config
. get ( 'thrash_while_stopping' , False )) and stopping
> 0 :
194 if itercount
% 5 == 0 :
195 self
. log ( 'cluster is considered unstable while MDS are in up:stopping (!thrash_while_stopping)' )
199 info
= status
. get_rank ( self
. fs
. id , rank
)
200 if info
[ 'gid' ] != gid
and "up:active" == info
[ 'state' ]:
201 self
. log ( 'mds. {name} has gained rank= {rank} , replacing gid= {gid} ' . format ( name
= info
[ 'name' ], rank
= rank
, gid
= gid
))
204 pass # no rank present
205 if actives
>= max_mds
:
206 # no replacement can occur!
207 self
. log ( "cluster has {actives} actives (max_mds is {max_mds} ), no MDS can replace rank {rank} " . format (
208 actives
= actives
, max_mds
= max_mds
, rank
= rank
))
211 if actives
== max_mds
:
212 self
. log ( 'mds cluster has {count} alive and active, now stable!' . format ( count
= actives
))
214 if itercount
> 300 / 2 : # 5 minutes
215 raise RuntimeError ( 'timeout waiting for cluster to stabilize' )
216 elif itercount
% 5 == 0 :
217 self
. log ( 'mds map: {status} ' . format ( status
= status
))
219 self
. log ( 'no change' )
224 Perform the random thrashing action
227 self
. log ( 'starting mds_do_thrash for fs {fs} ' . format ( fs
= self
. fs
. name
))
234 while not self
. stopping
. is_set ():
235 delay
= self
. max_thrash_delay
237 delay
= random
. randrange ( 0.0 , self
. max_thrash_delay
)
240 self
. log ( 'waiting for {delay} secs before thrashing' . format ( delay
= delay
))
241 self
. stopping
. wait ( delay
)
242 if self
. stopping
. is_set ():
245 status
= self
. fs
. status ()
247 if random
. random () <= self
. thrash_max_mds
:
248 max_mds
= status
. get_fsmap ( self
. fs
. id )[ 'mdsmap' ][ 'max_mds' ]
249 options
= list ( range ( 1 , max_mds
))+ list ( range ( max_mds
+ 1 , self
. max_mds
+ 1 ))
251 sample
= random
. sample ( options
, 1 )
252 new_max_mds
= sample
[ 0 ]
253 self
. log ( 'thrashing max_mds: %d -> %d ' % ( max_mds
, new_max_mds
))
254 self
. fs
. set_max_mds ( new_max_mds
)
255 stats
[ 'max_mds' ] += 1
256 self
. wait_for_stable ()
259 for info
in status
. get_ranks ( self
. fs
. id ):
261 label
= 'mds.' + name
265 # if thrash_weights isn't specified and we've reached max_thrash,
268 if 'thrash_weights' not in self
. config
and count
> self
. max_thrash
:
272 if 'thrash_weights' in self
. config
:
273 weight
= self
. config
[ 'thrash_weights' ]. get ( label
, '0.0' )
274 skip
= random
. randrange ( 0.0 , 1.0 )
276 self
. log ( 'skipping thrash iteration with skip ( {skip} ) > weight ( {weight} )' . format ( skip
= skip
, weight
= weight
))
279 self
. log ( 'kill {label} (rank= {rank} )' . format ( label
= label
, rank
= rank
))
283 # wait for mon to report killed mds as crashed
284 last_laggy_since
= None
287 status
= self
. fs
. status ()
288 info
= status
. get_mds ( name
)
291 if 'laggy_since' in info
:
292 last_laggy_since
= info
[ 'laggy_since' ]
294 if any ([( f
== name
) for f
in status
. get_fsmap ( self
. fs
. id )[ 'mdsmap' ][ 'failed' ]]):
297 'waiting till mds map indicates {label} is laggy/crashed, in failed state, or {label} is removed from mdsmap' . format (
299 itercount
= itercount
+ 1
301 self
. log ( 'mds map: {status} ' . format ( status
= status
))
306 ' {label} reported laggy/crashed since: {since} ' . format ( label
= label
, since
= last_laggy_since
))
308 self
. log ( ' {label} down, removed from mdsmap' . format ( label
= label
))
310 # wait for a standby mds to takeover and become active
311 status
= self
. wait_for_stable ( rank
, gid
)
313 # wait for a while before restarting old active to become new
315 delay
= self
. max_revive_delay
317 delay
= random
. randrange ( 0.0 , self
. max_revive_delay
)
319 self
. log ( 'waiting for {delay} secs before reviving {label} ' . format (
320 delay
= delay
, label
= label
))
323 self
. log ( 'reviving {label} ' . format ( label
= label
))
324 self
. revive_mds ( name
)
326 for itercount
in itertools
. count ():
327 if itercount
> 300 / 2 : # 5 minutes
328 raise RuntimeError ( 'timeout waiting for MDS to revive' )
329 status
= self
. fs
. status ()
330 info
= status
. get_mds ( name
)
331 if info
and info
[ 'state' ] in ( 'up:standby' , 'up:standby-replay' , 'up:active' ):
332 self
. log ( ' {label} reported in {state} state' . format ( label
= label
, state
= info
[ 'state' ]))
335 'waiting till mds map indicates {label} is in active, standby or standby-replay' . format ( label
= label
))
339 self
. log ( "stat[' {key} '] = {value} " . format ( key
= stat
, value
= stats
[ stat
]))
341 # don't do replay thrashing right now
342 # for info in status.get_replays(self.fs.id):
343 # # this might race with replay -> active transition...
344 # if status['state'] == 'up:replay' and random.randrange(0.0, 1.0) < self.thrash_in_replay:
345 # delay = self.max_replay_thrash_delay
347 # delay = random.randrange(0.0, self.max_replay_thrash_delay)
349 # self.log('kill replaying mds.{id}'.format(id=self.to_kill))
350 # self.kill_mds(self.to_kill)
352 # delay = self.max_revive_delay
354 # delay = random.randrange(0.0, self.max_revive_delay)
356 # self.log('waiting for {delay} secs before reviving mds.{id}'.format(
357 # delay=delay, id=self.to_kill))
360 # self.log('revive mds.{id}'.format(id=self.to_kill))
361 # self.revive_mds(self.to_kill)
364 @contextlib . contextmanager
365 def task ( ctx
, config
):
367 Stress test the mds by thrashing while another task/workunit
370 Please refer to MDSThrasher class for further information on the
374 mds_cluster
= MDSCluster ( ctx
)
378 assert isinstance ( config
, dict ), \
379 'mds_thrash task only accepts a dict for configuration'
380 mdslist
= list ( teuthology
. all_roles_of_type ( ctx
. cluster
, 'mds' ))
381 assert len ( mdslist
) > 1 , \
382 'mds_thrash task requires at least 2 metadata servers'
386 seed
= int ( config
[ 'seed' ])
388 seed
= int ( time
. time ())
389 log
. info ( 'mds thrasher using random seed: {seed} ' . format ( seed
= seed
))
392 ( first
,) = ctx
. cluster
. only ( 'mds. {_id} ' . format ( _id
= mdslist
[ 0 ])). remotes
. keys ()
393 manager
= ceph_manager
. CephManager (
394 first
, ctx
= ctx
, logger
= log
. getChild ( 'ceph_manager' ),
397 # make sure everyone is in active, standby, or standby-replay
398 log
. info ( 'Wait for all MDSs to reach steady state...' )
399 status
= mds_cluster
. status ()
402 for info
in status
. get_all ():
403 state
= info
[ 'state' ]
404 if state
not in ( 'up:active' , 'up:standby' , 'up:standby-replay' ):
410 status
= mds_cluster
. status ()
411 log
. info ( 'Ready to start thrashing' )
413 manager
. wait_for_clean ()
414 assert manager
. is_clean ()
416 if 'cluster' not in config
:
417 config
[ 'cluster' ] = 'ceph'
419 for fs
in status
. get_filesystems ():
420 thrasher
= MDSThrasher ( ctx
, manager
, config
, Filesystem ( ctx
, fs
[ 'id' ]), fs
[ 'mdsmap' ][ 'max_mds' ])
422 ctx
. ceph
[ config
[ 'cluster' ]]. thrashers
. append ( thrasher
)
425 log
. debug ( 'Yielding' )
428 log
. info ( 'joining mds_thrasher' )
430 if thrasher
. exception
is not None :
431 raise RuntimeError ( 'error during thrashing' )
433 log
. info ( 'done joining' )