]> git.proxmox.com Git - ceph.git/blame - ceph/qa/tasks/cephfs/test_scrub.py
import ceph pacific 16.2.5
[ceph.git] / ceph / qa / tasks / cephfs / test_scrub.py
CommitLineData
94b18763
FG
1"""
2Test CephFS scrub (distinct from OSD scrub) functionality
3"""
f67539c2
TL
4
5from io import BytesIO
94b18763 6import logging
94b18763
FG
7from collections import namedtuple
8
9f95a23c 9from tasks.cephfs.cephfs_test_case import CephFSTestCase
94b18763
FG
10
11log = logging.getLogger(__name__)
12
13ValidationError = namedtuple("ValidationError", ["exception", "backtrace"])
14
15
1adf2230 16class Workload(CephFSTestCase):
f67539c2 17 def __init__(self, test, filesystem, mount):
e306af50 18 super().__init__()
f67539c2 19 self._test = test
94b18763
FG
20 self._mount = mount
21 self._filesystem = filesystem
22 self._initial_state = None
23
24 # Accumulate backtraces for every failed validation, and return them. Backtraces
25 # are rather verbose, but we only see them when something breaks, and they
26 # let us see which check failed without having to decorate each check with
27 # a string
28 self._errors = []
29
94b18763
FG
30 def write(self):
31 """
32 Write the workload files to the mount
33 """
34 raise NotImplementedError()
35
36 def validate(self):
37 """
38 Read from the mount and validate that the workload files are present (i.e. have
39 survived or been reconstructed from the test scenario)
40 """
41 raise NotImplementedError()
42
43 def damage(self):
44 """
45 Damage the filesystem pools in ways that will be interesting to recover from. By
46 default just wipe everything in the metadata pool
47 """
48 # Delete every object in the metadata pool
f67539c2
TL
49 pool = self._filesystem.get_metadata_pool_name()
50 self._filesystem.rados(["purge", pool, '--yes-i-really-really-mean-it'])
94b18763
FG
51
52 def flush(self):
53 """
54 Called after client unmount, after write: flush whatever you want
55 """
56 self._filesystem.mds_asok(["flush", "journal"])
57
58
59class BacktraceWorkload(Workload):
60 """
61 Single file, single directory, wipe the backtrace and check it.
62 """
63 def write(self):
64 self._mount.run_shell(["mkdir", "subdir"])
65 self._mount.write_n_mb("subdir/sixmegs", 6)
66
67 def validate(self):
68 st = self._mount.stat("subdir/sixmegs")
69 self._filesystem.mds_asok(["flush", "journal"])
70 bt = self._filesystem.read_backtrace(st['st_ino'])
71 parent = bt['ancestors'][0]['dname']
1adf2230 72 self.assertEqual(parent, 'sixmegs')
94b18763
FG
73 return self._errors
74
75 def damage(self):
76 st = self._mount.stat("subdir/sixmegs")
77 self._filesystem.mds_asok(["flush", "journal"])
78 self._filesystem._write_data_xattr(st['st_ino'], "parent", "")
79
e306af50
TL
80 def create_files(self, nfiles=1000):
81 self._mount.create_n_files("scrub-new-files/file", nfiles)
82
94b18763
FG
83
84class DupInodeWorkload(Workload):
85 """
86 Duplicate an inode and try scrubbing it twice."
87 """
88
89 def write(self):
90 self._mount.run_shell(["mkdir", "parent"])
91 self._mount.run_shell(["mkdir", "parent/child"])
92 self._mount.write_n_mb("parent/parentfile", 6)
93 self._mount.write_n_mb("parent/child/childfile", 6)
94
95 def damage(self):
e306af50 96 self._mount.umount_wait()
94b18763 97 self._filesystem.mds_asok(["flush", "journal"])
f67539c2
TL
98 self._filesystem.fail()
99 d = self._filesystem.radosmo(["getomapval", "10000000000.00000000", "parentfile_head", "-"])
100 self._filesystem.radosm(["setomapval", "10000000000.00000000", "shadow_head"], stdin=BytesIO(d))
101 self._test.config_set('mds', 'mds_hack_allow_loading_invalid_metadata', True)
102 self._filesystem.set_joinable()
94b18763
FG
103 self._filesystem.wait_for_daemons()
104
105 def validate(self):
b3b6e05e 106 out_json = self._filesystem.run_scrub(["start", "/", "recursive,repair"])
1adf2230 107 self.assertNotEqual(out_json, None)
f67539c2
TL
108 self.assertEqual(out_json["return_code"], 0)
109 self.assertEqual(self._filesystem.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
1adf2230 110 self.assertTrue(self._filesystem.are_daemons_healthy())
94b18763
FG
111 return self._errors
112
113
114class TestScrub(CephFSTestCase):
115 MDSS_REQUIRED = 1
116
e306af50
TL
117 def setUp(self):
118 super().setUp()
119
94b18763
FG
120 def _scrub(self, workload, workers=1):
121 """
122 That when all objects in metadata pool are removed, we can rebuild a metadata pool
123 based on the contents of a data pool, and a client can see and read our files.
124 """
125
126 # First, inject some files
127
128 workload.write()
129
130 # are off by default, but in QA we need to explicitly disable them)
131 self.fs.set_ceph_conf('mds', 'mds verify scatter', False)
132 self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False)
133
134 # Apply any data damage the workload wants
135 workload.damage()
136
b3b6e05e 137 out_json = self.fs.run_scrub(["start", "/", "recursive,repair"])
1adf2230 138 self.assertNotEqual(out_json, None)
f67539c2
TL
139 self.assertEqual(out_json["return_code"], 0)
140 self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
94b18763
FG
141
142 # See that the files are present and correct
143 errors = workload.validate()
144 if errors:
145 log.error("Validation errors found: {0}".format(len(errors)))
146 for e in errors:
147 log.error(e.exception)
148 log.error(e.backtrace)
149 raise AssertionError("Validation failed, first error: {0}\n{1}".format(
150 errors[0].exception, errors[0].backtrace
151 ))
152
e306af50
TL
153 def _get_damage_count(self, damage_type='backtrace'):
154 out_json = self.fs.rank_tell(["damage", "ls"])
155 self.assertNotEqual(out_json, None)
156
157 damage_count = 0
158 for it in out_json:
159 if it['damage_type'] == damage_type:
160 damage_count += 1
161 return damage_count
162
163 def _scrub_new_files(self, workload):
164 """
165 That scrubbing new files does not lead to errors
166 """
167 workload.create_files(1000)
f67539c2 168 self.fs.wait_until_scrub_complete()
e306af50
TL
169 self.assertEqual(self._get_damage_count(), 0)
170
171 def test_scrub_backtrace_for_new_files(self):
f67539c2 172 self._scrub_new_files(BacktraceWorkload(self, self.fs, self.mount_a))
e306af50 173
94b18763 174 def test_scrub_backtrace(self):
f67539c2 175 self._scrub(BacktraceWorkload(self, self.fs, self.mount_a))
94b18763
FG
176
177 def test_scrub_dup_inode(self):
f67539c2 178 self._scrub(DupInodeWorkload(self, self.fs, self.mount_a))