]>
Commit | Line | Data |
---|---|---|
9f95a23c TL |
1 | # FIXME: this file has many undefined vars which are accessed! |
2 | # flake8: noqa | |
7c673cae FG |
3 | import logging |
4 | import contextlib | |
5 | import time | |
e306af50 | 6 | from tasks import ceph_manager |
7c673cae FG |
7 | from teuthology import misc |
8 | from teuthology.orchestra.run import CommandFailedError, Raw | |
9 | ||
10 | log = logging.getLogger(__name__) | |
11 | ||
12 | ||
13 | @contextlib.contextmanager | |
14 | def task(ctx, config): | |
15 | """ | |
16 | Go through filesystem creation with a synthetic failure in an MDS | |
17 | in its 'up:creating' state, to exercise the retry behaviour. | |
18 | """ | |
19 | # Grab handles to the teuthology objects of interest | |
20 | mdslist = list(misc.all_roles_of_type(ctx.cluster, 'mds')) | |
21 | if len(mdslist) != 1: | |
22 | # Require exactly one MDS, the code path for creation failure when | |
23 | # a standby is available is different | |
24 | raise RuntimeError("This task requires exactly one MDS") | |
25 | ||
26 | mds_id = mdslist[0] | |
9f95a23c | 27 | (mds_remote,) = ctx.cluster.only('mds.{_id}'.format(_id=mds_id)).remotes.keys() |
7c673cae FG |
28 | manager = ceph_manager.CephManager( |
29 | mds_remote, ctx=ctx, logger=log.getChild('ceph_manager'), | |
30 | ) | |
31 | ||
32 | # Stop MDS | |
11fdf7f2 TL |
33 | self.fs.set_max_mds(0) |
34 | self.fs.mds_stop(mds_id) | |
35 | self.fs.mds_fail(mds_id) | |
7c673cae FG |
36 | |
37 | # Reset the filesystem so that next start will go into CREATING | |
38 | manager.raw_cluster_cmd('fs', 'rm', "default", "--yes-i-really-mean-it") | |
39 | manager.raw_cluster_cmd('fs', 'new', "default", "metadata", "data") | |
40 | ||
41 | # Start the MDS with mds_kill_create_at set, it will crash during creation | |
42 | mds.restart_with_args(["--mds_kill_create_at=1"]) | |
43 | try: | |
44 | mds.wait_for_exit() | |
45 | except CommandFailedError as e: | |
46 | if e.exitstatus == 1: | |
47 | log.info("MDS creation killed as expected") | |
48 | else: | |
49 | log.error("Unexpected status code %s" % e.exitstatus) | |
50 | raise | |
51 | ||
52 | # Since I have intentionally caused a crash, I will clean up the resulting core | |
53 | # file to avoid task.internal.coredump seeing it as a failure. | |
54 | log.info("Removing core file from synthetic MDS failure") | |
55 | mds_remote.run(args=['rm', '-f', Raw("{archive}/coredump/*.core".format(archive=misc.get_archive_dir(ctx)))]) | |
56 | ||
57 | # It should have left the MDS map state still in CREATING | |
11fdf7f2 | 58 | status = self.fs.status().get_mds(mds_id) |
7c673cae FG |
59 | assert status['state'] == 'up:creating' |
60 | ||
61 | # Start the MDS again without the kill flag set, it should proceed with creation successfully | |
62 | mds.restart() | |
63 | ||
64 | # Wait for state ACTIVE | |
11fdf7f2 | 65 | self.fs.wait_for_state("up:active", timeout=120, mds_id=mds_id) |
7c673cae FG |
66 | |
67 | # The system should be back up in a happy healthy state, go ahead and run any further tasks | |
68 | # inside this context. | |
69 | yield |