]>
git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/mds_creation_failure.py
6 from teuthology
import misc
7 from teuthology
.orchestra
.run
import CommandFailedError
, Raw
9 log
= logging
.getLogger(__name__
)
12 @contextlib.contextmanager
13 def task(ctx
, config
):
15 Go through filesystem creation with a synthetic failure in an MDS
16 in its 'up:creating' state, to exercise the retry behaviour.
18 # Grab handles to the teuthology objects of interest
19 mdslist
= list(misc
.all_roles_of_type(ctx
.cluster
, 'mds'))
21 # Require exactly one MDS, the code path for creation failure when
22 # a standby is available is different
23 raise RuntimeError("This task requires exactly one MDS")
26 (mds_remote
,) = ctx
.cluster
.only('mds.{_id}'.format(_id
=mds_id
)).remotes
.iterkeys()
27 manager
= ceph_manager
.CephManager(
28 mds_remote
, ctx
=ctx
, logger
=log
.getChild('ceph_manager'),
32 manager
.raw_cluster_cmd('mds', 'set', "max_mds", "0")
33 mds
= ctx
.daemons
.get_daemon('mds', mds_id
)
35 manager
.raw_cluster_cmd('mds', 'fail', mds_id
)
37 # Reset the filesystem so that next start will go into CREATING
38 manager
.raw_cluster_cmd('fs', 'rm', "default", "--yes-i-really-mean-it")
39 manager
.raw_cluster_cmd('fs', 'new', "default", "metadata", "data")
41 # Start the MDS with mds_kill_create_at set, it will crash during creation
42 mds
.restart_with_args(["--mds_kill_create_at=1"])
45 except CommandFailedError
as e
:
47 log
.info("MDS creation killed as expected")
49 log
.error("Unexpected status code %s" % e
.exitstatus
)
52 # Since I have intentionally caused a crash, I will clean up the resulting core
53 # file to avoid task.internal.coredump seeing it as a failure.
54 log
.info("Removing core file from synthetic MDS failure")
55 mds_remote
.run(args
=['rm', '-f', Raw("{archive}/coredump/*.core".format(archive
=misc
.get_archive_dir(ctx
)))])
57 # It should have left the MDS map state still in CREATING
58 status
= manager
.get_mds_status(mds_id
)
59 assert status
['state'] == 'up:creating'
61 # Start the MDS again without the kill flag set, it should proceed with creation successfully
64 # Wait for state ACTIVE
68 status
= manager
.get_mds_status(mds_id
)
69 if status
['state'] == 'up:active':
70 log
.info("MDS creation completed successfully")
72 elif status
['state'] == 'up:creating':
73 log
.info("MDS still in creating state")
74 if t
> create_timeout
:
75 log
.error("Creating did not complete within %ss" % create_timeout
)
76 raise RuntimeError("Creating did not complete within %ss" % create_timeout
)
80 log
.error("Unexpected MDS state: %s" % status
['state'])
81 assert(status
['state'] in ['up:active', 'up:creating'])
83 # The system should be back up in a happy healthy state, go ahead and run any further tasks
84 # inside this context.