]> git.proxmox.com Git - ceph.git/blame - ceph/qa/tasks/cephfs/test_misc.py
import ceph nautilus 14.2.2
[ceph.git] / ceph / qa / tasks / cephfs / test_misc.py
CommitLineData
7c673cae
FG
1
2from unittest import SkipTest
3from tasks.cephfs.fuse_mount import FuseMount
4from tasks.cephfs.cephfs_test_case import CephFSTestCase
91327a77 5from teuthology.orchestra.run import CommandFailedError, ConnectionLostError
7c673cae
FG
6import errno
7import time
d2e6a577 8import json
91327a77 9import logging
f64942e4 10import time
7c673cae 11
91327a77 12log = logging.getLogger(__name__)
31f18b77 13
7c673cae
FG
14class TestMisc(CephFSTestCase):
15 CLIENTS_REQUIRED = 2
16
7c673cae
FG
17 def test_getattr_caps(self):
18 """
19 Check if MDS recognizes the 'mask' parameter of open request.
11fdf7f2 20 The parameter allows client to request caps when opening file
7c673cae
FG
21 """
22
23 if not isinstance(self.mount_a, FuseMount):
24 raise SkipTest("Require FUSE client")
25
26 # Enable debug. Client will requests CEPH_CAP_XATTR_SHARED
27 # on lookup/open
28 self.mount_b.umount_wait()
29 self.set_conf('client', 'client debug getattr caps', 'true')
30 self.mount_b.mount()
31 self.mount_b.wait_until_mounted()
32
33 # create a file and hold it open. MDS will issue CEPH_CAP_EXCL_*
34 # to mount_a
35 p = self.mount_a.open_background("testfile")
36 self.mount_b.wait_for_visible("testfile")
37
11fdf7f2 38 # this triggers a lookup request and an open request. The debug
7c673cae
FG
39 # code will check if lookup/open reply contains xattrs
40 self.mount_b.run_shell(["cat", "testfile"])
41
42 self.mount_a.kill_background(p)
43
f64942e4
AA
44 def test_root_rctime(self):
45 """
46 Check that the root inode has a non-default rctime on startup.
47 """
48
49 t = time.time()
50 rctime = self.mount_a.getfattr(".", "ceph.dir.rctime")
51 log.info("rctime = {}".format(rctime))
52 self.assertGreaterEqual(rctime, t-10)
53
7c673cae 54 def test_fs_new(self):
a8e16298
TL
55 self.mount_a.umount_wait()
56 self.mount_b.umount_wait()
57
7c673cae
FG
58 data_pool_name = self.fs.get_data_pool_name()
59
60 self.fs.mds_stop()
61 self.fs.mds_fail()
62
63 self.fs.mon_manager.raw_cluster_cmd('fs', 'rm', self.fs.name,
64 '--yes-i-really-mean-it')
65
66 self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'delete',
67 self.fs.metadata_pool_name,
68 self.fs.metadata_pool_name,
69 '--yes-i-really-really-mean-it')
70 self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create',
71 self.fs.metadata_pool_name,
72 self.fs.get_pgs_per_fs_pool().__str__())
73
74 dummyfile = '/etc/fstab'
75
76 self.fs.put_metadata_object_raw("key", dummyfile)
77
224ce89b
WB
78 def get_pool_df(fs, name):
79 try:
80 return fs.get_pool_df(name)['objects'] > 0
81 except RuntimeError as e:
82 return False
7c673cae 83
224ce89b 84 self.wait_until_true(lambda: get_pool_df(self.fs, self.fs.metadata_pool_name), timeout=30)
7c673cae
FG
85
86 try:
87 self.fs.mon_manager.raw_cluster_cmd('fs', 'new', self.fs.name,
88 self.fs.metadata_pool_name,
89 data_pool_name)
90 except CommandFailedError as e:
91 self.assertEqual(e.exitstatus, errno.EINVAL)
92 else:
93 raise AssertionError("Expected EINVAL")
94
95 self.fs.mon_manager.raw_cluster_cmd('fs', 'new', self.fs.name,
96 self.fs.metadata_pool_name,
97 data_pool_name, "--force")
98
99 self.fs.mon_manager.raw_cluster_cmd('fs', 'rm', self.fs.name,
100 '--yes-i-really-mean-it')
101
102
103 self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'delete',
104 self.fs.metadata_pool_name,
105 self.fs.metadata_pool_name,
106 '--yes-i-really-really-mean-it')
107 self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create',
108 self.fs.metadata_pool_name,
109 self.fs.get_pgs_per_fs_pool().__str__())
110 self.fs.mon_manager.raw_cluster_cmd('fs', 'new', self.fs.name,
111 self.fs.metadata_pool_name,
112 data_pool_name)
113
114 def test_evict_client(self):
115 """
116 Check that a slow client session won't get evicted if it's the
117 only session
118 """
119
f64942e4
AA
120 session_autoclose = self.fs.get_var("session_autoclose")
121
31f18b77 122 self.mount_b.umount_wait()
7c673cae
FG
123 ls_data = self.fs.mds_asok(['session', 'ls'])
124 self.assert_session_count(1, ls_data)
125
81eedcae 126 mount_a_client_id = self.mount_a.get_global_id()
31f18b77
FG
127 self.mount_a.kill()
128 self.mount_a.kill_cleanup()
7c673cae 129
f64942e4 130 time.sleep(session_autoclose * 1.5)
7c673cae
FG
131 ls_data = self.fs.mds_asok(['session', 'ls'])
132 self.assert_session_count(1, ls_data)
133
81eedcae
TL
134 self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])
135
7c673cae
FG
136 self.mount_a.mount()
137 self.mount_a.wait_until_mounted()
138 self.mount_b.mount()
139 self.mount_b.wait_until_mounted()
140
141 ls_data = self._session_list()
142 self.assert_session_count(2, ls_data)
143
144 self.mount_a.kill()
181888fb 145 self.mount_a.kill_cleanup()
7c673cae 146
f64942e4 147 time.sleep(session_autoclose * 1.5)
7c673cae
FG
148 ls_data = self.fs.mds_asok(['session', 'ls'])
149 self.assert_session_count(1, ls_data)
d2e6a577 150
91327a77
AA
151 def test_cap_revoke_nonresponder(self):
152 """
153 Check that a client is evicted if it has not responded to cap revoke
154 request for configured number of seconds.
155 """
156 session_timeout = self.fs.get_var("session_timeout")
157 eviction_timeout = session_timeout / 2.0
158
159 self.fs.mds_asok(['config', 'set', 'mds_cap_revoke_eviction_timeout',
160 str(eviction_timeout)])
161
162 cap_holder = self.mount_a.open_background()
163
164 # Wait for the file to be visible from another client, indicating
165 # that mount_a has completed its network ops
166 self.mount_b.wait_for_visible()
167
168 # Simulate client death
169 self.mount_a.kill()
170
171 try:
172 # The waiter should get stuck waiting for the capability
173 # held on the MDS by the now-dead client A
174 cap_waiter = self.mount_b.write_background()
175
176 a = time.time()
177 time.sleep(eviction_timeout)
178 cap_waiter.wait()
179 b = time.time()
180 cap_waited = b - a
181 log.info("cap_waiter waited {0}s".format(cap_waited))
182
183 # check if the cap is transferred before session timeout kicked in.
184 # this is a good enough check to ensure that the client got evicted
185 # by the cap auto evicter rather than transitioning to stale state
186 # and then getting evicted.
187 self.assertLess(cap_waited, session_timeout,
188 "Capability handover took {0}, expected less than {1}".format(
189 cap_waited, session_timeout
190 ))
191
11fdf7f2 192 self.assertTrue(self.mount_a.is_blacklisted())
91327a77
AA
193 cap_holder.stdin.close()
194 try:
195 cap_holder.wait()
196 except (CommandFailedError, ConnectionLostError):
197 # We killed it (and possibly its node), so it raises an error
198 pass
199 finally:
200 self.mount_a.kill_cleanup()
201
202 self.mount_a.mount()
203 self.mount_a.wait_until_mounted()
204
d2e6a577
FG
205 def test_filtered_df(self):
206 pool_name = self.fs.get_data_pool_name()
207 raw_df = self.fs.get_pool_df(pool_name)
208 raw_avail = float(raw_df["max_avail"])
209 out = self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'get',
210 pool_name, 'size',
211 '-f', 'json-pretty')
212 j = json.loads(out)
213 pool_size = int(j['size'])
214
215 proc = self.mount_a.run_shell(['df', '.'])
216 output = proc.stdout.getvalue()
217 fs_avail = output.split('\n')[1].split()[3]
218 fs_avail = float(fs_avail) * 1024
219
181888fb 220 ratio = raw_avail / fs_avail
d2e6a577 221 assert 0.9 < ratio < 1.1
f64942e4 222
11fdf7f2
TL
223 def test_dump_inode(self):
224 info = self.fs.mds_asok(['dump', 'inode', '1'])
225 assert(info['path'] == "/")
f64942e4 226
11fdf7f2
TL
227 def test_dump_inode_hexademical(self):
228 self.mount_a.run_shell(["mkdir", "-p", "foo"])
229 ino = self.mount_a.path_to_ino("foo")
230 assert type(ino) is int
231 info = self.fs.mds_asok(['dump', 'inode', hex(ino)])
232 assert info['path'] == "/foo"
f64942e4 233
f64942e4 234
11fdf7f2
TL
235class TestCacheDrop(CephFSTestCase):
236 CLIENTS_REQUIRED = 1
f64942e4 237
11fdf7f2
TL
238 def _run_drop_cache_cmd(self, timeout=None):
239 result = None
240 mds_id = self.fs.get_lone_mds_id()
241 if timeout is not None:
242 result = self.fs.mon_manager.raw_cluster_cmd("tell", "mds.{0}".format(mds_id),
243 "cache", "drop", str(timeout))
244 else:
245 result = self.fs.mon_manager.raw_cluster_cmd("tell", "mds.{0}".format(mds_id),
246 "cache", "drop")
247 return json.loads(result)
f64942e4 248
11fdf7f2 249 def _setup(self, max_caps=20, threshold=400):
f64942e4 250 # create some files
11fdf7f2 251 self.mount_a.create_n_files("dc-dir/dc-file", 1000, sync=True)
f64942e4 252
11fdf7f2
TL
253 # Reduce this so the MDS doesn't rkcall the maximum for simple tests
254 self.fs.rank_asok(['config', 'set', 'mds_recall_max_caps', str(max_caps)])
255 self.fs.rank_asok(['config', 'set', 'mds_recall_max_decay_threshold', str(threshold)])
f64942e4 256
11fdf7f2 257 def test_drop_cache_command(self):
f64942e4 258 """
11fdf7f2
TL
259 Basic test for checking drop cache command.
260 Confirm it halts without a timeout.
f64942e4
AA
261 Note that the cache size post trimming is not checked here.
262 """
11fdf7f2
TL
263 mds_min_caps_per_client = int(self.fs.get_config("mds_min_caps_per_client"))
264 self._setup()
265 result = self._run_drop_cache_cmd()
266 self.assertTrue(result['client_recall']['return_code'] == 0)
267 self.assertTrue(result['flush_journal']['return_code'] == 0)
268 # It should take at least 1 second
269 self.assertTrue(result['duration'] > 1)
270 self.assertGreaterEqual(result['trim_cache']['trimmed'], 1000-2*mds_min_caps_per_client)
271
272 def test_drop_cache_command_timeout(self):
f64942e4 273 """
11fdf7f2
TL
274 Basic test for checking drop cache command.
275 Confirm recall halts early via a timeout.
f64942e4
AA
276 Note that the cache size post trimming is not checked here.
277 """
11fdf7f2
TL
278 self._setup()
279 result = self._run_drop_cache_cmd(timeout=10)
280 self.assertTrue(result['client_recall']['return_code'] == -errno.ETIMEDOUT)
281 self.assertTrue(result['flush_journal']['return_code'] == 0)
282 self.assertTrue(result['duration'] > 10)
283 self.assertGreaterEqual(result['trim_cache']['trimmed'], 100) # we did something, right?
284
285 def test_drop_cache_command_dead_timeout(self):
f64942e4 286 """
11fdf7f2
TL
287 Check drop cache command with non-responding client using tell
288 interface. Note that the cache size post trimming is not checked
289 here.
f64942e4 290 """
11fdf7f2
TL
291 self._setup()
292 self.mount_a.kill()
293 # Note: recall is subject to the timeout. The journal flush will
294 # be delayed due to the client being dead.
295 result = self._run_drop_cache_cmd(timeout=5)
296 self.assertTrue(result['client_recall']['return_code'] == -errno.ETIMEDOUT)
297 self.assertTrue(result['flush_journal']['return_code'] == 0)
298 self.assertTrue(result['duration'] > 5)
299 self.assertTrue(result['duration'] < 120)
300 self.assertEqual(0, result['trim_cache']['trimmed'])
301 self.mount_a.kill_cleanup()
302 self.mount_a.mount()
303 self.mount_a.wait_until_mounted()
f64942e4 304
11fdf7f2 305 def test_drop_cache_command_dead(self):
f64942e4
AA
306 """
307 Check drop cache command with non-responding client using tell
308 interface. Note that the cache size post trimming is not checked
309 here.
310 """
11fdf7f2
TL
311 self._setup()
312 self.mount_a.kill()
313 result = self._run_drop_cache_cmd()
314 self.assertTrue(result['client_recall']['return_code'] == 0)
315 self.assertTrue(result['flush_journal']['return_code'] == 0)
316 self.assertTrue(result['duration'] > 5)
317 self.assertTrue(result['duration'] < 120)
318 self.assertEqual(0, result['trim_cache']['trimmed'])
319 self.mount_a.kill_cleanup()
320 self.mount_a.mount()
321 self.mount_a.wait_until_mounted()