]>
git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/cephfs/test_scrub.py
2 Test CephFS scrub (distinct from OSD scrub) functionality
5 from collections
import namedtuple
7 from tasks
.cephfs
.cephfs_test_case
import CephFSTestCase
9 log
= logging
.getLogger(__name__
)
11 ValidationError
= namedtuple("ValidationError", ["exception", "backtrace"])
14 class Workload(CephFSTestCase
):
15 def __init__(self
, filesystem
, mount
):
18 self
._filesystem
= filesystem
19 self
._initial
_state
= None
21 # Accumulate backtraces for every failed validation, and return them. Backtraces
22 # are rather verbose, but we only see them when something breaks, and they
23 # let us see which check failed without having to decorate each check with
29 Write the workload files to the mount
31 raise NotImplementedError()
35 Read from the mount and validate that the workload files are present (i.e. have
36 survived or been reconstructed from the test scenario)
38 raise NotImplementedError()
42 Damage the filesystem pools in ways that will be interesting to recover from. By
43 default just wipe everything in the metadata pool
45 # Delete every object in the metadata pool
46 objects
= self
._filesystem
.rados(["ls"]).split("\n")
48 self
._filesystem
.rados(["rm", o
])
52 Called after client unmount, after write: flush whatever you want
54 self
._filesystem
.mds_asok(["flush", "journal"])
57 class BacktraceWorkload(Workload
):
59 Single file, single directory, wipe the backtrace and check it.
62 self
._mount
.run_shell(["mkdir", "subdir"])
63 self
._mount
.write_n_mb("subdir/sixmegs", 6)
66 st
= self
._mount
.stat("subdir/sixmegs")
67 self
._filesystem
.mds_asok(["flush", "journal"])
68 bt
= self
._filesystem
.read_backtrace(st
['st_ino'])
69 parent
= bt
['ancestors'][0]['dname']
70 self
.assertEqual(parent
, 'sixmegs')
74 st
= self
._mount
.stat("subdir/sixmegs")
75 self
._filesystem
.mds_asok(["flush", "journal"])
76 self
._filesystem
._write
_data
_xattr
(st
['st_ino'], "parent", "")
78 def create_files(self
, nfiles
=1000):
79 self
._mount
.create_n_files("scrub-new-files/file", nfiles
)
82 class DupInodeWorkload(Workload
):
84 Duplicate an inode and try scrubbing it twice."
88 self
._mount
.run_shell(["mkdir", "parent"])
89 self
._mount
.run_shell(["mkdir", "parent/child"])
90 self
._mount
.write_n_mb("parent/parentfile", 6)
91 self
._mount
.write_n_mb("parent/child/childfile", 6)
94 temp_bin_path
= "/tmp/10000000000.00000000_omap.bin"
95 self
._mount
.umount_wait()
96 self
._filesystem
.mds_asok(["flush", "journal"])
97 self
._filesystem
.mds_stop()
98 self
._filesystem
.rados(["getomapval", "10000000000.00000000",
99 "parentfile_head", temp_bin_path
])
100 self
._filesystem
.rados(["setomapval", "10000000000.00000000",
101 "shadow_head"], stdin_file
=temp_bin_path
)
102 self
._filesystem
.set_ceph_conf('mds', 'mds hack allow loading invalid metadata', True)
103 self
._filesystem
.mds_restart()
104 self
._filesystem
.wait_for_daemons()
107 out_json
= self
._filesystem
.rank_tell(["scrub", "start", "/", "recursive", "repair"])
108 self
.assertNotEqual(out_json
, None)
109 self
.assertTrue(self
._filesystem
.are_daemons_healthy())
113 class TestScrub(CephFSTestCase
):
119 def _scrub(self
, workload
, workers
=1):
121 That when all objects in metadata pool are removed, we can rebuild a metadata pool
122 based on the contents of a data pool, and a client can see and read our files.
125 # First, inject some files
129 # are off by default, but in QA we need to explicitly disable them)
130 self
.fs
.set_ceph_conf('mds', 'mds verify scatter', False)
131 self
.fs
.set_ceph_conf('mds', 'mds debug scatterstat', False)
133 # Apply any data damage the workload wants
136 out_json
= self
.fs
.rank_tell(["scrub", "start", "/", "recursive", "repair"])
137 self
.assertNotEqual(out_json
, None)
139 # See that the files are present and correct
140 errors
= workload
.validate()
142 log
.error("Validation errors found: {0}".format(len(errors
)))
144 log
.error(e
.exception
)
145 log
.error(e
.backtrace
)
146 raise AssertionError("Validation failed, first error: {0}\n{1}".format(
147 errors
[0].exception
, errors
[0].backtrace
150 def _get_damage_count(self
, damage_type
='backtrace'):
151 out_json
= self
.fs
.rank_tell(["damage", "ls"])
152 self
.assertNotEqual(out_json
, None)
156 if it
['damage_type'] == damage_type
:
160 def _scrub_new_files(self
, workload
):
162 That scrubbing new files does not lead to errors
164 workload
.create_files(1000)
165 self
._wait
_until
_scrub
_complete
()
166 self
.assertEqual(self
._get
_damage
_count
(), 0)
168 def test_scrub_backtrace_for_new_files(self
):
169 self
._scrub
_new
_files
(BacktraceWorkload(self
.fs
, self
.mount_a
))
171 def test_scrub_backtrace(self
):
172 self
._scrub
(BacktraceWorkload(self
.fs
, self
.mount_a
))
174 def test_scrub_dup_inode(self
):
175 self
._scrub
(DupInodeWorkload(self
.fs
, self
.mount_a
))