]> git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/cephfs/test_scrub.py
import 15.2.4
[ceph.git] / ceph / qa / tasks / cephfs / test_scrub.py
1 """
2 Test CephFS scrub (distinct from OSD scrub) functionality
3 """
4 import logging
5 from collections import namedtuple
6
7 from tasks.cephfs.cephfs_test_case import CephFSTestCase
8
9 log = logging.getLogger(__name__)
10
11 ValidationError = namedtuple("ValidationError", ["exception", "backtrace"])
12
13
14 class Workload(CephFSTestCase):
15 def __init__(self, filesystem, mount):
16 super().__init__()
17 self._mount = mount
18 self._filesystem = filesystem
19 self._initial_state = None
20
21 # Accumulate backtraces for every failed validation, and return them. Backtraces
22 # are rather verbose, but we only see them when something breaks, and they
23 # let us see which check failed without having to decorate each check with
24 # a string
25 self._errors = []
26
27 def write(self):
28 """
29 Write the workload files to the mount
30 """
31 raise NotImplementedError()
32
33 def validate(self):
34 """
35 Read from the mount and validate that the workload files are present (i.e. have
36 survived or been reconstructed from the test scenario)
37 """
38 raise NotImplementedError()
39
40 def damage(self):
41 """
42 Damage the filesystem pools in ways that will be interesting to recover from. By
43 default just wipe everything in the metadata pool
44 """
45 # Delete every object in the metadata pool
46 objects = self._filesystem.rados(["ls"]).split("\n")
47 for o in objects:
48 self._filesystem.rados(["rm", o])
49
50 def flush(self):
51 """
52 Called after client unmount, after write: flush whatever you want
53 """
54 self._filesystem.mds_asok(["flush", "journal"])
55
56
57 class BacktraceWorkload(Workload):
58 """
59 Single file, single directory, wipe the backtrace and check it.
60 """
61 def write(self):
62 self._mount.run_shell(["mkdir", "subdir"])
63 self._mount.write_n_mb("subdir/sixmegs", 6)
64
65 def validate(self):
66 st = self._mount.stat("subdir/sixmegs")
67 self._filesystem.mds_asok(["flush", "journal"])
68 bt = self._filesystem.read_backtrace(st['st_ino'])
69 parent = bt['ancestors'][0]['dname']
70 self.assertEqual(parent, 'sixmegs')
71 return self._errors
72
73 def damage(self):
74 st = self._mount.stat("subdir/sixmegs")
75 self._filesystem.mds_asok(["flush", "journal"])
76 self._filesystem._write_data_xattr(st['st_ino'], "parent", "")
77
78 def create_files(self, nfiles=1000):
79 self._mount.create_n_files("scrub-new-files/file", nfiles)
80
81
82 class DupInodeWorkload(Workload):
83 """
84 Duplicate an inode and try scrubbing it twice."
85 """
86
87 def write(self):
88 self._mount.run_shell(["mkdir", "parent"])
89 self._mount.run_shell(["mkdir", "parent/child"])
90 self._mount.write_n_mb("parent/parentfile", 6)
91 self._mount.write_n_mb("parent/child/childfile", 6)
92
93 def damage(self):
94 temp_bin_path = "/tmp/10000000000.00000000_omap.bin"
95 self._mount.umount_wait()
96 self._filesystem.mds_asok(["flush", "journal"])
97 self._filesystem.mds_stop()
98 self._filesystem.rados(["getomapval", "10000000000.00000000",
99 "parentfile_head", temp_bin_path])
100 self._filesystem.rados(["setomapval", "10000000000.00000000",
101 "shadow_head"], stdin_file=temp_bin_path)
102 self._filesystem.set_ceph_conf('mds', 'mds hack allow loading invalid metadata', True)
103 self._filesystem.mds_restart()
104 self._filesystem.wait_for_daemons()
105
106 def validate(self):
107 out_json = self._filesystem.rank_tell(["scrub", "start", "/", "recursive", "repair"])
108 self.assertNotEqual(out_json, None)
109 self.assertTrue(self._filesystem.are_daemons_healthy())
110 return self._errors
111
112
113 class TestScrub(CephFSTestCase):
114 MDSS_REQUIRED = 1
115
116 def setUp(self):
117 super().setUp()
118
119 def _scrub(self, workload, workers=1):
120 """
121 That when all objects in metadata pool are removed, we can rebuild a metadata pool
122 based on the contents of a data pool, and a client can see and read our files.
123 """
124
125 # First, inject some files
126
127 workload.write()
128
129 # are off by default, but in QA we need to explicitly disable them)
130 self.fs.set_ceph_conf('mds', 'mds verify scatter', False)
131 self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False)
132
133 # Apply any data damage the workload wants
134 workload.damage()
135
136 out_json = self.fs.rank_tell(["scrub", "start", "/", "recursive", "repair"])
137 self.assertNotEqual(out_json, None)
138
139 # See that the files are present and correct
140 errors = workload.validate()
141 if errors:
142 log.error("Validation errors found: {0}".format(len(errors)))
143 for e in errors:
144 log.error(e.exception)
145 log.error(e.backtrace)
146 raise AssertionError("Validation failed, first error: {0}\n{1}".format(
147 errors[0].exception, errors[0].backtrace
148 ))
149
150 def _get_damage_count(self, damage_type='backtrace'):
151 out_json = self.fs.rank_tell(["damage", "ls"])
152 self.assertNotEqual(out_json, None)
153
154 damage_count = 0
155 for it in out_json:
156 if it['damage_type'] == damage_type:
157 damage_count += 1
158 return damage_count
159
160 def _scrub_new_files(self, workload):
161 """
162 That scrubbing new files does not lead to errors
163 """
164 workload.create_files(1000)
165 self._wait_until_scrub_complete()
166 self.assertEqual(self._get_damage_count(), 0)
167
168 def test_scrub_backtrace_for_new_files(self):
169 self._scrub_new_files(BacktraceWorkload(self.fs, self.mount_a))
170
171 def test_scrub_backtrace(self):
172 self._scrub(BacktraceWorkload(self.fs, self.mount_a))
173
174 def test_scrub_dup_inode(self):
175 self._scrub(DupInodeWorkload(self.fs, self.mount_a))