3 from tasks
.cephfs
.cephfs_test_case
import CephFSTestCase
4 from teuthology
.orchestra
import run
7 log
= logging
.getLogger(__name__
)
10 class TestFragmentation(CephFSTestCase
):
15 return self
.fs
.mds_asok(['perf', 'dump', 'mds'])['mds']['dir_split']
18 return self
.fs
.mds_asok(['perf', 'dump', 'mds'])['mds']['dir_merge']
20 def get_dir_ino(self
, path
):
21 dir_cache
= self
.fs
.read_cache(path
, 0)
23 dir_inono
= self
.mount_a
.path_to_ino(path
.strip("/"))
25 if ino
['ino'] == dir_inono
:
28 self
.assertIsNotNone(dir_ino
)
31 def _configure(self
, **kwargs
):
33 Apply kwargs as MDS configuration settings, enable dirfrags
36 kwargs
['mds_bal_frag'] = "true"
38 for k
, v
in kwargs
.items():
39 self
.ceph_cluster
.set_ceph_conf("mds", k
, v
.__str
__())
41 self
.fs
.set_allow_dirfrags(True)
43 self
.mds_cluster
.mds_fail_restart()
44 self
.fs
.wait_for_daemons()
46 def test_oversize(self
):
48 That a directory is split when it becomes too large.
55 mds_bal_split_size
=split_size
,
56 mds_bal_merge_size
=merge_size
,
60 self
.assertEqual(self
.get_splits(), 0)
62 self
.mount_a
.create_n_files("splitdir/file", split_size
+ 1)
65 lambda: self
.get_splits() == 1,
69 frags
= self
.get_dir_ino("/splitdir")['dirfrags']
70 self
.assertEqual(len(frags
), 2)
71 self
.assertEqual(frags
[0]['dirfrag'], "10000000000.0*")
72 self
.assertEqual(frags
[1]['dirfrag'], "10000000000.1*")
74 sum([len(f
['dentries']) for f
in frags
]),
78 self
.assertEqual(self
.get_merges(), 0)
80 self
.mount_a
.run_shell(["rm", "-f", run
.Raw("splitdir/file*")])
83 lambda: self
.get_merges() == 1,
87 self
.assertEqual(len(self
.get_dir_ino("/splitdir")["dirfrags"]), 1)
89 def test_rapid_creation(self
):
91 That the fast-splitting limit of 1.5x normal limit is
92 applied when creating dentries quickly.
99 mds_bal_split_size
=split_size
,
100 mds_bal_merge_size
=merge_size
,
101 mds_bal_split_bits
=3,
102 mds_bal_fragment_size_max
=int(split_size
* 1.5 + 2)
105 # We test this only at a single split level. If a client was sending
106 # IO so fast that it hit a second split before the first split
107 # was complete, it could violate mds_bal_fragment_size_max -- there
108 # is a window where the child dirfrags of a split are unfrozen
109 # (so they can grow), but still have STATE_FRAGMENTING (so they
112 # By writing 4x the split size when the split bits are set
113 # to 3 (i.e. 4-ways), I am reasonably sure to see precisely
114 # one split. The test is to check whether that split
115 # happens soon enough that the client doesn't exceed
116 # 2x the split_size (the "immediate" split mode should
117 # kick in at 1.5x the split size).
119 self
.assertEqual(self
.get_splits(), 0)
120 self
.mount_a
.create_n_files("splitdir/file", split_size
* 4)
121 self
.wait_until_equal(
124 reject_fn
=lambda s
: s
> 1,
128 def test_deep_split(self
):
130 That when the directory grows many times larger than split size,
131 the fragments get split again.
135 merge_size
= 1 # i.e. don't merge frag unless its empty
138 branch_factor
= 2**split_bits
140 # Arbitrary: how many levels shall we try fragmenting before
145 mds_bal_split_size
=split_size
,
146 mds_bal_merge_size
=merge_size
,
147 mds_bal_split_bits
=split_bits
150 # Each iteration we will create another level of fragments. The
151 # placement of dentries into fragments is by hashes (i.e. pseudo
152 # random), so we rely on statistics to get the behaviour that
153 # by writing about 1.5x as many dentries as the split_size times
154 # the number of frags, we will get them all to exceed their
155 # split size and trigger a split.
159 while depth
< max_depth
:
160 log
.info("Writing files for depth {0}".format(depth
))
161 target_files
= branch_factor
**depth
* int(split_size
* 1.5)
162 create_files
= target_files
- files_written
164 self
.ceph_cluster
.mon_manager
.raw_cluster_cmd("log",
165 "{0} Writing {1} files (depth={2})".format(
166 self
.__class
__.__name
__, create_files
, depth
168 self
.mount_a
.create_n_files("splitdir/file_{0}".format(depth
),
170 self
.ceph_cluster
.mon_manager
.raw_cluster_cmd("log",
171 "{0} Done".format(self
.__class
__.__name
__))
173 files_written
+= create_files
174 log
.info("Now have {0} files".format(files_written
))
176 splits_expected
+= branch_factor
**depth
177 log
.info("Waiting to see {0} splits".format(splits_expected
))
179 self
.wait_until_equal(
183 reject_fn
=lambda x
: x
> splits_expected
186 frags
= self
.get_dir_ino("/splitdir")['dirfrags']
187 self
.assertEqual(len(frags
), branch_factor
**(depth
+1))
189 sum([len(f
['dentries']) for f
in frags
]),
193 # On failures, log what fragmentation we actually ended
194 # up with. This block is just for logging, at the end
195 # we raise the exception again.
196 frags
= self
.get_dir_ino("/splitdir")['dirfrags']
197 log
.info("depth={0} splits_expected={1} files_written={2}".format(
198 depth
, splits_expected
, files_written
200 log
.info("Dirfrags:")
202 log
.info("{0}: {1}".format(
203 f
['dirfrag'], len(f
['dentries'])
209 # Remember the inode number because we will be checking for
211 dir_inode_no
= self
.mount_a
.path_to_ino("splitdir")
213 self
.mount_a
.run_shell(["rm", "-rf", "splitdir/"])
214 self
.mount_a
.umount_wait()
216 self
.fs
.mds_asok(['flush', 'journal'])
218 # Wait for all strays to purge
219 self
.wait_until_equal(
220 lambda: self
.fs
.mds_asok(['perf', 'dump', 'mds_cache']
221 )['mds_cache']['num_strays'],
225 # Check that the metadata pool objects for all the myriad
226 # child fragments are gone
227 metadata_objs
= self
.fs
.rados(["ls"])
229 for o
in metadata_objs
:
230 if o
.startswith("{0:x}.".format(dir_inode_no
)):
232 self
.assertListEqual(frag_objs
, [])