]>
git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/cephfs/test_scrub.py
dd7c11af50b39a6517b2267d1f5e88c4f99a8e6a
2 Test CephFS scrub (distinct from OSD scrub) functionality
7 from collections
import namedtuple
9 from tasks
.cephfs
.cephfs_test_case
import CephFSTestCase
11 log
= logging
.getLogger(__name__
)
13 ValidationError
= namedtuple("ValidationError", ["exception", "backtrace"])
16 class Workload(CephFSTestCase
):
17 def __init__(self
, test
, filesystem
, mount
):
21 self
._filesystem
= filesystem
22 self
._initial
_state
= None
24 # Accumulate backtraces for every failed validation, and return them. Backtraces
25 # are rather verbose, but we only see them when something breaks, and they
26 # let us see which check failed without having to decorate each check with
32 Write the workload files to the mount
34 raise NotImplementedError()
38 Read from the mount and validate that the workload files are present (i.e. have
39 survived or been reconstructed from the test scenario)
41 raise NotImplementedError()
45 Damage the filesystem pools in ways that will be interesting to recover from. By
46 default just wipe everything in the metadata pool
48 # Delete every object in the metadata pool
49 pool
= self
._filesystem
.get_metadata_pool_name()
50 self
._filesystem
.rados(["purge", pool
, '--yes-i-really-really-mean-it'])
54 Called after client unmount, after write: flush whatever you want
56 self
._filesystem
.mds_asok(["flush", "journal"])
59 class BacktraceWorkload(Workload
):
61 Single file, single directory, wipe the backtrace and check it.
64 self
._mount
.run_shell(["mkdir", "subdir"])
65 self
._mount
.write_n_mb("subdir/sixmegs", 6)
68 st
= self
._mount
.stat("subdir/sixmegs")
69 self
._filesystem
.mds_asok(["flush", "journal"])
70 bt
= self
._filesystem
.read_backtrace(st
['st_ino'])
71 parent
= bt
['ancestors'][0]['dname']
72 self
.assertEqual(parent
, 'sixmegs')
76 st
= self
._mount
.stat("subdir/sixmegs")
77 self
._filesystem
.mds_asok(["flush", "journal"])
78 self
._filesystem
._write
_data
_xattr
(st
['st_ino'], "parent", "")
80 def create_files(self
, nfiles
=1000):
81 self
._mount
.create_n_files("scrub-new-files/file", nfiles
)
84 class DupInodeWorkload(Workload
):
86 Duplicate an inode and try scrubbing it twice."
90 self
._mount
.run_shell(["mkdir", "parent"])
91 self
._mount
.run_shell(["mkdir", "parent/child"])
92 self
._mount
.write_n_mb("parent/parentfile", 6)
93 self
._mount
.write_n_mb("parent/child/childfile", 6)
96 self
._mount
.umount_wait()
97 self
._filesystem
.mds_asok(["flush", "journal"])
98 self
._filesystem
.fail()
99 d
= self
._filesystem
.radosmo(["getomapval", "10000000000.00000000", "parentfile_head", "-"])
100 self
._filesystem
.radosm(["setomapval", "10000000000.00000000", "shadow_head"], stdin
=BytesIO(d
))
101 self
._test
.config_set('mds', 'mds_hack_allow_loading_invalid_metadata', True)
102 self
._filesystem
.set_joinable()
103 self
._filesystem
.wait_for_daemons()
106 out_json
= self
._filesystem
.run_scrub(["start", "/", "recursive,repair"])
107 self
.assertNotEqual(out_json
, None)
108 self
.assertEqual(out_json
["return_code"], 0)
109 self
.assertEqual(self
._filesystem
.wait_until_scrub_complete(tag
=out_json
["scrub_tag"]), True)
110 self
.assertTrue(self
._filesystem
.are_daemons_healthy())
114 class TestScrub(CephFSTestCase
):
120 def _scrub(self
, workload
, workers
=1):
122 That when all objects in metadata pool are removed, we can rebuild a metadata pool
123 based on the contents of a data pool, and a client can see and read our files.
126 # First, inject some files
130 # are off by default, but in QA we need to explicitly disable them)
131 self
.fs
.set_ceph_conf('mds', 'mds verify scatter', False)
132 self
.fs
.set_ceph_conf('mds', 'mds debug scatterstat', False)
134 # Apply any data damage the workload wants
137 out_json
= self
.fs
.run_scrub(["start", "/", "recursive,repair"])
138 self
.assertNotEqual(out_json
, None)
139 self
.assertEqual(out_json
["return_code"], 0)
140 self
.assertEqual(self
.fs
.wait_until_scrub_complete(tag
=out_json
["scrub_tag"]), True)
142 # See that the files are present and correct
143 errors
= workload
.validate()
145 log
.error("Validation errors found: {0}".format(len(errors
)))
147 log
.error(e
.exception
)
148 log
.error(e
.backtrace
)
149 raise AssertionError("Validation failed, first error: {0}\n{1}".format(
150 errors
[0].exception
, errors
[0].backtrace
153 def _get_damage_count(self
, damage_type
='backtrace'):
154 out_json
= self
.fs
.rank_tell(["damage", "ls"])
155 self
.assertNotEqual(out_json
, None)
159 if it
['damage_type'] == damage_type
:
163 def _scrub_new_files(self
, workload
):
165 That scrubbing new files does not lead to errors
167 workload
.create_files(1000)
168 self
.fs
.wait_until_scrub_complete()
169 self
.assertEqual(self
._get
_damage
_count
(), 0)
171 def test_scrub_backtrace_for_new_files(self
):
172 self
._scrub
_new
_files
(BacktraceWorkload(self
, self
.fs
, self
.mount_a
))
174 def test_scrub_backtrace(self
):
175 self
._scrub
(BacktraceWorkload(self
, self
.fs
, self
.mount_a
))
177 def test_scrub_dup_inode(self
):
178 self
._scrub
(DupInodeWorkload(self
, self
.fs
, self
.mount_a
))