]>
git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/mds_thrash.py
7b7b420f9ea5878397222ae78fdde2b1d5eb5f04
2 Thrash mds by simulating failures
10 from gevent
import sleep
11 from gevent
. greenlet
import Greenlet
12 from gevent
. event
import Event
13 from teuthology
import misc
as teuthology
15 from tasks
import ceph_manager
16 from tasks
. cephfs
. filesystem
import MDSCluster
, Filesystem
, FSMissing
17 from tasks
. thrasher
import Thrasher
19 log
= logging
. getLogger ( __name__
)
21 class MDSThrasher ( Thrasher
, Greenlet
):
25 The MDSThrasher thrashes MDSs during execution of other tasks (workunits, etc).
27 The config is optional. Many of the config parameters are a a maximum value
28 to use when selecting a random value from a range. To always use the maximum
29 value, set no_random to true. The config is a dict containing some or all of:
31 max_thrash: [default: 1] the maximum number of active MDSs per FS that will be thrashed at
34 max_thrash_delay: [default: 30] maximum number of seconds to delay before
37 max_replay_thrash_delay: [default: 4] maximum number of seconds to delay while in
38 the replay state before thrashing.
40 max_revive_delay: [default: 10] maximum number of seconds to delay before
41 bringing back a thrashed MDS.
43 randomize: [default: true] enables randomization and use the max/min values
45 seed: [no default] seed the random number generator
47 thrash_in_replay: [default: 0.0] likelihood that the MDS will be thrashed
48 during replay. Value should be between 0.0 and 1.0.
50 thrash_max_mds: [default: 0.05] likelihood that the max_mds of the mds
51 cluster will be modified to a value [1, current) or (current, starting
52 max_mds]. Value should be between 0.0 and 1.0.
54 thrash_while_stopping: [default: false] thrash an MDS while there
55 are MDS in up:stopping (because max_mds was changed and some
56 MDS were deactivated).
58 thrash_weights: allows specific MDSs to be thrashed more/less frequently.
59 This option overrides anything specified by max_thrash. This option is a
60 dict containing mds.x: weight pairs. For example, [mds.a: 0.7, mds.b:
61 0.3, mds.c: 0.0]. Each weight is a value from 0.0 to 1.0. Any MDSs not
62 specified will be automatically given a weight of 0.0 (not thrashed).
63 For a given MDS, by default the trasher delays for up to
64 max_thrash_delay, trashes, waits for the MDS to recover, and iterates.
65 If a non-zero weight is specified for an MDS, for each iteration the
66 thrasher chooses whether to thrash during that iteration based on a
67 random value [0-1] not exceeding the weight of that MDS.
72 The following example sets the likelihood that mds.a will be thrashed
73 to 80%, mds.b to 20%, and other MDSs will not be thrashed. It also sets the
74 likelihood that an MDS will be thrashed in replay to 40%.
75 Thrash weights do not have to sum to 1.
89 The following example disables randomization, and uses the max delay values:
96 max_replay_thrash_delay: 4
100 def __init__ ( self
, ctx
, manager
, config
, fs
, max_mds
):
101 super ( MDSThrasher
, self
) .__ init
__ ()
105 self
. logger
= log
. getChild ( 'fs.[ {f} ]' . format ( f
= fs
. name
))
107 self
. manager
= manager
108 self
. max_mds
= max_mds
109 self
. name
= 'thrasher.fs.[ {f} ]' . format ( f
= fs
. name
)
110 self
. stopping
= Event ()
112 self
. randomize
= bool ( self
. config
. get ( 'randomize' , True ))
113 self
. thrash_max_mds
= float ( self
. config
. get ( 'thrash_max_mds' , 0.05 ))
114 self
. max_thrash
= int ( self
. config
. get ( 'max_thrash' , 1 ))
115 self
. max_thrash_delay
= float ( self
. config
. get ( 'thrash_delay' , 120.0 ))
116 self
. thrash_in_replay
= float ( self
. config
. get ( 'thrash_in_replay' , False ))
117 assert self
. thrash_in_replay
>= 0.0 and self
. thrash_in_replay
<= 1.0 , 'thrash_in_replay ( {v} ) must be between [0.0, 1.0]' . format (
118 v
= self
. thrash_in_replay
)
119 self
. max_replay_thrash_delay
= float ( self
. config
. get ( 'max_replay_thrash_delay' , 4.0 ))
120 self
. max_revive_delay
= float ( self
. config
. get ( 'max_revive_delay' , 10.0 ))
127 except Exception as e
:
128 # Log exceptions here so we get the full backtrace (gevent loses them).
129 # Also allow successful completion as gevent exception handling is a broken mess:
131 # 2017-02-03T14:34:01.259 CRITICAL:root: File "gevent.libev.corecext.pyx", line 367, in gevent.libev.corecext.loop.handle_error (src/gevent/libev/gevent.corecext.c:5051)
132 # File "/home/teuthworker/src/git.ceph.com_git_teuthology_master/virtualenv/local/lib/python2.7/site-packages/gevent/hub.py", line 558, in handle_error
133 # self.print_exception(context, type, value, tb)
134 # File "/home/teuthworker/src/git.ceph.com_git_teuthology_master/virtualenv/local/lib/python2.7/site-packages/gevent/hub.py", line 605, in print_exception
135 # traceback.print_exception(type, value, tb, file=errstream)
136 # File "/usr/lib/python2.7/traceback.py", line 124, in print_exception
137 # _print(file, 'Traceback (most recent call last):')
138 # File "/usr/lib/python2.7/traceback.py", line 13, in _print
139 # file.write(str+terminator)
140 # 2017-02-03T14:34:01.261 CRITICAL:root:IOError
141 self
. set_thrasher_exception ( e
)
142 self
. logger
. exception ( "exception:" )
143 # allow successful completion so gevent doesn't see an exception...
146 """Write data to the logger assigned to MDSThrasher"""
152 def kill_mds ( self
, mds
):
153 if self
. config
. get ( 'powercycle' ):
154 ( remote
,) = ( self
. ctx
. cluster
. only ( 'mds. {m} ' . format ( m
= mds
)).
156 self
. log ( 'kill_mds on mds. {m} doing powercycle of {s} ' .
157 format ( m
= mds
, s
= remote
. name
))
158 self
._ assert
_ ipmi
( remote
)
159 remote
. console
. power_off ()
161 self
. ctx
. daemons
. get_daemon ( 'mds' , mds
). stop ()
164 def _assert_ipmi ( remote
):
165 assert remote
. console
. has_ipmi_credentials
, (
166 "powercycling requested but RemoteConsole is not "
167 "initialized. Check ipmi config." )
169 def revive_mds ( self
, mds
):
171 Revive mds -- do an ipmpi powercycle (if indicated by the config)
174 if self
. config
. get ( 'powercycle' ):
175 ( remote
,) = ( self
. ctx
. cluster
. only ( 'mds. {m} ' . format ( m
= mds
)).
177 self
. log ( 'revive_mds on mds. {m} doing powercycle of {s} ' .
178 format ( m
= mds
, s
= remote
. name
))
179 self
._ assert
_ ipmi
( remote
)
180 remote
. console
. power_on ()
181 self
. manager
. make_admin_daemon_dir ( self
. ctx
, remote
)
183 self
. ctx
. daemons
. get_daemon ( 'mds' , mds
). restart (* args
)
185 def wait_for_stable ( self
, rank
= None , gid
= None ):
186 self
. log ( 'waiting for mds cluster to stabilize...' )
187 for itercount
in itertools
. count ():
188 status
= self
. fs
. status ()
189 max_mds
= status
. get_fsmap ( self
. fs
. id )[ 'mdsmap' ][ 'max_mds' ]
190 ranks
= list ( status
. get_ranks ( self
. fs
. id ))
191 stopping
= sum ( 1 for _
in ranks
if "up:stopping" == _
[ 'state' ])
192 actives
= sum ( 1 for _
in ranks
193 if "up:active" == _
[ 'state' ] and "laggy_since" not in _
)
195 if not bool ( self
. config
. get ( 'thrash_while_stopping' , False )) and stopping
> 0 :
196 if itercount
% 5 == 0 :
197 self
. log ( 'cluster is considered unstable while MDS are in up:stopping (!thrash_while_stopping)' )
201 info
= status
. get_rank ( self
. fs
. id , rank
)
202 if info
[ 'gid' ] != gid
and "up:active" == info
[ 'state' ]:
203 self
. log ( 'mds. {name} has gained rank= {rank} , replacing gid= {gid} ' . format ( name
= info
[ 'name' ], rank
= rank
, gid
= gid
))
206 pass # no rank present
207 if actives
>= max_mds
:
208 # no replacement can occur!
209 self
. log ( "cluster has {actives} actives (max_mds is {max_mds} ), no MDS can replace rank {rank} " . format (
210 actives
= actives
, max_mds
= max_mds
, rank
= rank
))
213 if actives
== max_mds
:
214 self
. log ( 'mds cluster has {count} alive and active, now stable!' . format ( count
= actives
))
216 if itercount
> 300 / 2 : # 5 minutes
217 raise RuntimeError ( 'timeout waiting for cluster to stabilize' )
218 elif itercount
% 5 == 0 :
219 self
. log ( 'mds map: {status} ' . format ( status
= status
))
221 self
. log ( 'no change' )
226 Perform the random thrashing action
229 self
. log ( 'starting mds_do_thrash for fs {fs} ' . format ( fs
= self
. fs
. name
))
236 while not self
. stopping
. is_set ():
237 delay
= self
. max_thrash_delay
239 delay
= random
. randrange ( 0.0 , self
. max_thrash_delay
)
242 self
. log ( 'waiting for {delay} secs before thrashing' . format ( delay
= delay
))
243 self
. stopping
. wait ( delay
)
244 if self
. stopping
. is_set ():
247 status
= self
. fs
. status ()
249 if random
. random () <= self
. thrash_max_mds
:
250 max_mds
= status
. get_fsmap ( self
. fs
. id )[ 'mdsmap' ][ 'max_mds' ]
251 options
= [ i
for i
in range ( 1 , self
. max_mds
+ 1 ) if i
!= max_mds
]
253 new_max_mds
= random
. choice ( options
)
254 self
. log ( 'thrashing max_mds: %d -> %d ' % ( max_mds
, new_max_mds
))
255 self
. fs
. set_max_mds ( new_max_mds
)
256 stats
[ 'max_mds' ] += 1
257 self
. wait_for_stable ()
260 for info
in status
. get_ranks ( self
. fs
. id ):
262 label
= 'mds.' + name
266 # if thrash_weights isn't specified and we've reached max_thrash,
269 if 'thrash_weights' not in self
. config
and count
> self
. max_thrash
:
273 if 'thrash_weights' in self
. config
:
274 weight
= self
. config
[ 'thrash_weights' ]. get ( label
, '0.0' )
275 skip
= random
. random ()
277 self
. log ( 'skipping thrash iteration with skip ( {skip} ) > weight ( {weight} )' . format ( skip
= skip
, weight
= weight
))
280 self
. log ( 'kill {label} (rank= {rank} )' . format ( label
= label
, rank
= rank
))
284 # wait for mon to report killed mds as crashed
285 last_laggy_since
= None
288 status
= self
. fs
. status ()
289 info
= status
. get_mds ( name
)
292 if 'laggy_since' in info
:
293 last_laggy_since
= info
[ 'laggy_since' ]
295 if any ([( f
== name
) for f
in status
. get_fsmap ( self
. fs
. id )[ 'mdsmap' ][ 'failed' ]]):
298 'waiting till mds map indicates {label} is laggy/crashed, in failed state, or {label} is removed from mdsmap' . format (
300 itercount
= itercount
+ 1
302 self
. log ( 'mds map: {status} ' . format ( status
= status
))
307 ' {label} reported laggy/crashed since: {since} ' . format ( label
= label
, since
= last_laggy_since
))
309 self
. log ( ' {label} down, removed from mdsmap' . format ( label
= label
))
311 # wait for a standby mds to takeover and become active
312 status
= self
. wait_for_stable ( rank
, gid
)
314 # wait for a while before restarting old active to become new
316 delay
= self
. max_revive_delay
318 delay
= random
. randrange ( 0.0 , self
. max_revive_delay
)
320 self
. log ( 'waiting for {delay} secs before reviving {label} ' . format (
321 delay
= delay
, label
= label
))
324 self
. log ( 'reviving {label} ' . format ( label
= label
))
325 self
. revive_mds ( name
)
327 for itercount
in itertools
. count ():
328 if itercount
> 300 / 2 : # 5 minutes
329 raise RuntimeError ( 'timeout waiting for MDS to revive' )
330 status
= self
. fs
. status ()
331 info
= status
. get_mds ( name
)
332 if info
and info
[ 'state' ] in ( 'up:standby' , 'up:standby-replay' , 'up:active' ):
333 self
. log ( ' {label} reported in {state} state' . format ( label
= label
, state
= info
[ 'state' ]))
336 'waiting till mds map indicates {label} is in active, standby or standby-replay' . format ( label
= label
))
340 self
. log ( "stat[' {key} '] = {value} " . format ( key
= stat
, value
= stats
[ stat
]))
342 # don't do replay thrashing right now
343 # for info in status.get_replays(self.fs.id):
344 # # this might race with replay -> active transition...
345 # if status['state'] == 'up:replay' and random.randrange(0.0, 1.0) < self.thrash_in_replay:
346 # delay = self.max_replay_thrash_delay
348 # delay = random.randrange(0.0, self.max_replay_thrash_delay)
350 # self.log('kill replaying mds.{id}'.format(id=self.to_kill))
351 # self.kill_mds(self.to_kill)
353 # delay = self.max_revive_delay
355 # delay = random.randrange(0.0, self.max_revive_delay)
357 # self.log('waiting for {delay} secs before reviving mds.{id}'.format(
358 # delay=delay, id=self.to_kill))
361 # self.log('revive mds.{id}'.format(id=self.to_kill))
362 # self.revive_mds(self.to_kill)
365 @contextlib . contextmanager
366 def task ( ctx
, config
):
368 Stress test the mds by thrashing while another task/workunit
371 Please refer to MDSThrasher class for further information on the
375 mds_cluster
= MDSCluster ( ctx
)
379 assert isinstance ( config
, dict ), \
380 'mds_thrash task only accepts a dict for configuration'
381 mdslist
= list ( teuthology
. all_roles_of_type ( ctx
. cluster
, 'mds' ))
382 assert len ( mdslist
) > 1 , \
383 'mds_thrash task requires at least 2 metadata servers'
387 seed
= int ( config
[ 'seed' ])
389 seed
= int ( time
. time ())
390 log
. info ( 'mds thrasher using random seed: {seed} ' . format ( seed
= seed
))
393 ( first
,) = ctx
. cluster
. only ( 'mds. {_id} ' . format ( _id
= mdslist
[ 0 ])). remotes
. keys ()
394 manager
= ceph_manager
. CephManager (
395 first
, ctx
= ctx
, logger
= log
. getChild ( 'ceph_manager' ),
398 # make sure everyone is in active, standby, or standby-replay
399 log
. info ( 'Wait for all MDSs to reach steady state...' )
400 status
= mds_cluster
. status ()
403 for info
in status
. get_all ():
404 state
= info
[ 'state' ]
405 if state
not in ( 'up:active' , 'up:standby' , 'up:standby-replay' ):
411 status
= mds_cluster
. status ()
412 log
. info ( 'Ready to start thrashing' )
414 manager
. wait_for_clean ()
415 assert manager
. is_clean ()
417 if 'cluster' not in config
:
418 config
[ 'cluster' ] = 'ceph'
420 for fs
in status
. get_filesystems ():
421 thrasher
= MDSThrasher ( ctx
, manager
, config
, Filesystem ( ctx
, fscid
= fs
[ 'id' ]), fs
[ 'mdsmap' ][ 'max_mds' ])
423 ctx
. ceph
[ config
[ 'cluster' ]]. thrashers
. append ( thrasher
)
426 log
. debug ( 'Yielding' )
429 log
. info ( 'joining mds_thrasher' )
431 if thrasher
. exception
is not None :
432 raise RuntimeError ( 'error during thrashing' )
434 log
. info ( 'done joining' )