]> git.proxmox.com Git - ceph.git/blame - ceph/qa/tasks/cephfs/test_misc.py
import 15.2.4
[ceph.git] / ceph / qa / tasks / cephfs / test_misc.py
CommitLineData
7c673cae 1
7c673cae
FG
2from tasks.cephfs.fuse_mount import FuseMount
3from tasks.cephfs.cephfs_test_case import CephFSTestCase
91327a77 4from teuthology.orchestra.run import CommandFailedError, ConnectionLostError
7c673cae
FG
5import errno
6import time
d2e6a577 7import json
91327a77 8import logging
7c673cae 9
91327a77 10log = logging.getLogger(__name__)
31f18b77 11
7c673cae
FG
12class TestMisc(CephFSTestCase):
13 CLIENTS_REQUIRED = 2
14
7c673cae
FG
15 def test_getattr_caps(self):
16 """
17 Check if MDS recognizes the 'mask' parameter of open request.
11fdf7f2 18 The parameter allows client to request caps when opening file
7c673cae
FG
19 """
20
21 if not isinstance(self.mount_a, FuseMount):
9f95a23c 22 self.skipTest("Require FUSE client")
7c673cae
FG
23
24 # Enable debug. Client will requests CEPH_CAP_XATTR_SHARED
25 # on lookup/open
26 self.mount_b.umount_wait()
27 self.set_conf('client', 'client debug getattr caps', 'true')
e306af50 28 self.mount_b.mount_wait()
7c673cae
FG
29
30 # create a file and hold it open. MDS will issue CEPH_CAP_EXCL_*
31 # to mount_a
32 p = self.mount_a.open_background("testfile")
33 self.mount_b.wait_for_visible("testfile")
34
11fdf7f2 35 # this triggers a lookup request and an open request. The debug
7c673cae
FG
36 # code will check if lookup/open reply contains xattrs
37 self.mount_b.run_shell(["cat", "testfile"])
38
39 self.mount_a.kill_background(p)
40
f64942e4
AA
41 def test_root_rctime(self):
42 """
43 Check that the root inode has a non-default rctime on startup.
44 """
45
46 t = time.time()
47 rctime = self.mount_a.getfattr(".", "ceph.dir.rctime")
48 log.info("rctime = {}".format(rctime))
e306af50 49 self.assertGreaterEqual(float(rctime), t - 10)
f64942e4 50
7c673cae 51 def test_fs_new(self):
a8e16298
TL
52 self.mount_a.umount_wait()
53 self.mount_b.umount_wait()
54
7c673cae
FG
55 data_pool_name = self.fs.get_data_pool_name()
56
57 self.fs.mds_stop()
58 self.fs.mds_fail()
59
60 self.fs.mon_manager.raw_cluster_cmd('fs', 'rm', self.fs.name,
61 '--yes-i-really-mean-it')
62
63 self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'delete',
64 self.fs.metadata_pool_name,
65 self.fs.metadata_pool_name,
66 '--yes-i-really-really-mean-it')
67 self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create',
68 self.fs.metadata_pool_name,
9f95a23c 69 self.fs.pgs_per_fs_pool.__str__())
7c673cae
FG
70
71 dummyfile = '/etc/fstab'
72
73 self.fs.put_metadata_object_raw("key", dummyfile)
74
224ce89b
WB
75 def get_pool_df(fs, name):
76 try:
77 return fs.get_pool_df(name)['objects'] > 0
9f95a23c 78 except RuntimeError:
224ce89b 79 return False
7c673cae 80
224ce89b 81 self.wait_until_true(lambda: get_pool_df(self.fs, self.fs.metadata_pool_name), timeout=30)
7c673cae
FG
82
83 try:
84 self.fs.mon_manager.raw_cluster_cmd('fs', 'new', self.fs.name,
85 self.fs.metadata_pool_name,
86 data_pool_name)
87 except CommandFailedError as e:
88 self.assertEqual(e.exitstatus, errno.EINVAL)
89 else:
90 raise AssertionError("Expected EINVAL")
91
92 self.fs.mon_manager.raw_cluster_cmd('fs', 'new', self.fs.name,
93 self.fs.metadata_pool_name,
94 data_pool_name, "--force")
95
96 self.fs.mon_manager.raw_cluster_cmd('fs', 'rm', self.fs.name,
97 '--yes-i-really-mean-it')
98
99
100 self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'delete',
101 self.fs.metadata_pool_name,
102 self.fs.metadata_pool_name,
103 '--yes-i-really-really-mean-it')
104 self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create',
105 self.fs.metadata_pool_name,
9f95a23c 106 self.fs.pgs_per_fs_pool.__str__())
7c673cae
FG
107 self.fs.mon_manager.raw_cluster_cmd('fs', 'new', self.fs.name,
108 self.fs.metadata_pool_name,
109 data_pool_name)
110
91327a77
AA
111 def test_cap_revoke_nonresponder(self):
112 """
113 Check that a client is evicted if it has not responded to cap revoke
114 request for configured number of seconds.
115 """
116 session_timeout = self.fs.get_var("session_timeout")
117 eviction_timeout = session_timeout / 2.0
118
119 self.fs.mds_asok(['config', 'set', 'mds_cap_revoke_eviction_timeout',
120 str(eviction_timeout)])
121
122 cap_holder = self.mount_a.open_background()
123
124 # Wait for the file to be visible from another client, indicating
125 # that mount_a has completed its network ops
126 self.mount_b.wait_for_visible()
127
128 # Simulate client death
129 self.mount_a.kill()
130
131 try:
132 # The waiter should get stuck waiting for the capability
133 # held on the MDS by the now-dead client A
134 cap_waiter = self.mount_b.write_background()
135
136 a = time.time()
137 time.sleep(eviction_timeout)
138 cap_waiter.wait()
139 b = time.time()
140 cap_waited = b - a
141 log.info("cap_waiter waited {0}s".format(cap_waited))
142
143 # check if the cap is transferred before session timeout kicked in.
144 # this is a good enough check to ensure that the client got evicted
145 # by the cap auto evicter rather than transitioning to stale state
146 # and then getting evicted.
147 self.assertLess(cap_waited, session_timeout,
148 "Capability handover took {0}, expected less than {1}".format(
149 cap_waited, session_timeout
150 ))
151
11fdf7f2 152 self.assertTrue(self.mount_a.is_blacklisted())
91327a77
AA
153 cap_holder.stdin.close()
154 try:
155 cap_holder.wait()
156 except (CommandFailedError, ConnectionLostError):
157 # We killed it (and possibly its node), so it raises an error
158 pass
159 finally:
160 self.mount_a.kill_cleanup()
161
162 self.mount_a.mount()
163 self.mount_a.wait_until_mounted()
164
d2e6a577
FG
165 def test_filtered_df(self):
166 pool_name = self.fs.get_data_pool_name()
167 raw_df = self.fs.get_pool_df(pool_name)
168 raw_avail = float(raw_df["max_avail"])
169 out = self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'get',
170 pool_name, 'size',
171 '-f', 'json-pretty')
9f95a23c 172 _ = json.loads(out)
d2e6a577
FG
173
174 proc = self.mount_a.run_shell(['df', '.'])
175 output = proc.stdout.getvalue()
176 fs_avail = output.split('\n')[1].split()[3]
177 fs_avail = float(fs_avail) * 1024
178
181888fb 179 ratio = raw_avail / fs_avail
d2e6a577 180 assert 0.9 < ratio < 1.1
f64942e4 181
11fdf7f2
TL
182 def test_dump_inode(self):
183 info = self.fs.mds_asok(['dump', 'inode', '1'])
184 assert(info['path'] == "/")
f64942e4 185
11fdf7f2
TL
186 def test_dump_inode_hexademical(self):
187 self.mount_a.run_shell(["mkdir", "-p", "foo"])
188 ino = self.mount_a.path_to_ino("foo")
189 assert type(ino) is int
190 info = self.fs.mds_asok(['dump', 'inode', hex(ino)])
191 assert info['path'] == "/foo"
f64942e4 192
f64942e4 193
11fdf7f2
TL
194class TestCacheDrop(CephFSTestCase):
195 CLIENTS_REQUIRED = 1
f64942e4 196
11fdf7f2
TL
197 def _run_drop_cache_cmd(self, timeout=None):
198 result = None
199 mds_id = self.fs.get_lone_mds_id()
200 if timeout is not None:
201 result = self.fs.mon_manager.raw_cluster_cmd("tell", "mds.{0}".format(mds_id),
202 "cache", "drop", str(timeout))
203 else:
204 result = self.fs.mon_manager.raw_cluster_cmd("tell", "mds.{0}".format(mds_id),
205 "cache", "drop")
206 return json.loads(result)
f64942e4 207
11fdf7f2 208 def _setup(self, max_caps=20, threshold=400):
f64942e4 209 # create some files
11fdf7f2 210 self.mount_a.create_n_files("dc-dir/dc-file", 1000, sync=True)
f64942e4 211
11fdf7f2
TL
212 # Reduce this so the MDS doesn't rkcall the maximum for simple tests
213 self.fs.rank_asok(['config', 'set', 'mds_recall_max_caps', str(max_caps)])
214 self.fs.rank_asok(['config', 'set', 'mds_recall_max_decay_threshold', str(threshold)])
f64942e4 215
11fdf7f2 216 def test_drop_cache_command(self):
f64942e4 217 """
11fdf7f2
TL
218 Basic test for checking drop cache command.
219 Confirm it halts without a timeout.
f64942e4
AA
220 Note that the cache size post trimming is not checked here.
221 """
11fdf7f2
TL
222 mds_min_caps_per_client = int(self.fs.get_config("mds_min_caps_per_client"))
223 self._setup()
224 result = self._run_drop_cache_cmd()
92f5a8d4
TL
225 self.assertEqual(result['client_recall']['return_code'], 0)
226 self.assertEqual(result['flush_journal']['return_code'], 0)
11fdf7f2 227 # It should take at least 1 second
92f5a8d4 228 self.assertGreater(result['duration'], 1)
11fdf7f2
TL
229 self.assertGreaterEqual(result['trim_cache']['trimmed'], 1000-2*mds_min_caps_per_client)
230
231 def test_drop_cache_command_timeout(self):
f64942e4 232 """
11fdf7f2
TL
233 Basic test for checking drop cache command.
234 Confirm recall halts early via a timeout.
f64942e4
AA
235 Note that the cache size post trimming is not checked here.
236 """
11fdf7f2
TL
237 self._setup()
238 result = self._run_drop_cache_cmd(timeout=10)
92f5a8d4
TL
239 self.assertEqual(result['client_recall']['return_code'], -errno.ETIMEDOUT)
240 self.assertEqual(result['flush_journal']['return_code'], 0)
241 self.assertGreater(result['duration'], 10)
11fdf7f2
TL
242 self.assertGreaterEqual(result['trim_cache']['trimmed'], 100) # we did something, right?
243
244 def test_drop_cache_command_dead_timeout(self):
f64942e4 245 """
11fdf7f2
TL
246 Check drop cache command with non-responding client using tell
247 interface. Note that the cache size post trimming is not checked
248 here.
f64942e4 249 """
11fdf7f2
TL
250 self._setup()
251 self.mount_a.kill()
252 # Note: recall is subject to the timeout. The journal flush will
253 # be delayed due to the client being dead.
254 result = self._run_drop_cache_cmd(timeout=5)
92f5a8d4
TL
255 self.assertEqual(result['client_recall']['return_code'], -errno.ETIMEDOUT)
256 self.assertEqual(result['flush_journal']['return_code'], 0)
257 self.assertGreater(result['duration'], 5)
258 self.assertLess(result['duration'], 120)
259 # Note: result['trim_cache']['trimmed'] may be >0 because dropping the
260 # cache now causes the Locker to drive eviction of stale clients (a
261 # stale session will be autoclosed at mdsmap['session_timeout']). The
262 # particular operation causing this is journal flush which causes the
263 # MDS to wait wait for cap revoke.
264 #self.assertEqual(0, result['trim_cache']['trimmed'])
11fdf7f2
TL
265 self.mount_a.kill_cleanup()
266 self.mount_a.mount()
267 self.mount_a.wait_until_mounted()
f64942e4 268
11fdf7f2 269 def test_drop_cache_command_dead(self):
f64942e4
AA
270 """
271 Check drop cache command with non-responding client using tell
272 interface. Note that the cache size post trimming is not checked
273 here.
274 """
11fdf7f2
TL
275 self._setup()
276 self.mount_a.kill()
277 result = self._run_drop_cache_cmd()
92f5a8d4
TL
278 self.assertEqual(result['client_recall']['return_code'], 0)
279 self.assertEqual(result['flush_journal']['return_code'], 0)
280 self.assertGreater(result['duration'], 5)
281 self.assertLess(result['duration'], 120)
282 # Note: result['trim_cache']['trimmed'] may be >0 because dropping the
283 # cache now causes the Locker to drive eviction of stale clients (a
284 # stale session will be autoclosed at mdsmap['session_timeout']). The
285 # particular operation causing this is journal flush which causes the
286 # MDS to wait wait for cap revoke.
11fdf7f2
TL
287 self.mount_a.kill_cleanup()
288 self.mount_a.mount()
289 self.mount_a.wait_until_mounted()