]> git.proxmox.com Git - ceph.git/blame - ceph/qa/tasks/cephfs/test_client_limits.py
import 15.2.0 Octopus source
[ceph.git] / ceph / qa / tasks / cephfs / test_client_limits.py
CommitLineData
7c673cae
FG
1
2"""
3Exercise the MDS's behaviour when clients and the MDCache reach or
4exceed the limits of how many caps/inodes they should hold.
5"""
6
7import logging
8from textwrap import dedent
7c673cae
FG
9from teuthology.orchestra.run import CommandFailedError
10from tasks.cephfs.cephfs_test_case import CephFSTestCase, needs_trimming
11from tasks.cephfs.fuse_mount import FuseMount
12import os
13
14
15log = logging.getLogger(__name__)
16
17
18# Arbitrary timeouts for operations involving restarting
19# an MDS or waiting for it to come up
20MDS_RESTART_GRACE = 60
21
22# Hardcoded values from Server::recall_client_state
23CAP_RECALL_RATIO = 0.8
24CAP_RECALL_MIN = 100
25
26
27class TestClientLimits(CephFSTestCase):
28 REQUIRE_KCLIENT_REMOTE = True
29 CLIENTS_REQUIRED = 2
30
3efd9988 31 def _test_client_pin(self, use_subdir, open_files):
7c673cae
FG
32 """
33 When a client pins an inode in its cache, for example because the file is held open,
34 it should reject requests from the MDS to trim these caps. The MDS should complain
35 to the user that it is unable to enforce its cache size limits because of this
36 objectionable client.
37
38 :param use_subdir: whether to put test files in a subdir or use root
39 """
40
9f95a23c
TL
41 # Set MDS cache memory limit to a low value that will make the MDS to
42 # ask the client to trim the caps.
43 cache_memory_limit = "1K"
7c673cae 44
9f95a23c 45 self.set_conf('mds', 'mds_cache_memory_limit', cache_memory_limit)
a8e16298
TL
46 self.set_conf('mds', 'mds_recall_max_caps', open_files/2)
47 self.set_conf('mds', 'mds_recall_warning_threshold', open_files)
7c673cae
FG
48 self.fs.mds_fail_restart()
49 self.fs.wait_for_daemons()
50
3efd9988 51 mds_min_caps_per_client = int(self.fs.get_config("mds_min_caps_per_client"))
9f95a23c 52 mds_max_caps_per_client = int(self.fs.get_config("mds_max_caps_per_client"))
a8e16298 53 mds_recall_warning_decay_rate = self.fs.get_config("mds_recall_warning_decay_rate")
3efd9988 54 self.assertTrue(open_files >= mds_min_caps_per_client)
3efd9988 55
7c673cae
FG
56 mount_a_client_id = self.mount_a.get_global_id()
57 path = "subdir/mount_a" if use_subdir else "mount_a"
58 open_proc = self.mount_a.open_n_background(path, open_files)
59
60 # Client should now hold:
61 # `open_files` caps for the open files
62 # 1 cap for root
63 # 1 cap for subdir
64 self.wait_until_equal(lambda: self.get_session(mount_a_client_id)['num_caps'],
65 open_files + (2 if use_subdir else 1),
66 timeout=600,
67 reject_fn=lambda x: x > open_files + 2)
68
69 # MDS should not be happy about that, as the client is failing to comply
70 # with the SESSION_RECALL messages it is being sent
a8e16298 71 self.wait_for_health("MDS_CLIENT_RECALL", mds_recall_warning_decay_rate*2)
7c673cae
FG
72
73 # We can also test that the MDS health warning for oversized
74 # cache is functioning as intended.
a8e16298 75 self.wait_for_health("MDS_CACHE_OVERSIZED", mds_recall_warning_decay_rate*2)
7c673cae
FG
76
77 # When the client closes the files, it should retain only as many caps as allowed
78 # under the SESSION_RECALL policy
79 log.info("Terminating process holding files open")
80 open_proc.stdin.close()
81 try:
82 open_proc.wait()
83 except CommandFailedError:
84 # We killed it, so it raises an error
85 pass
86
87 # The remaining caps should comply with the numbers sent from MDS in SESSION_RECALL message,
181888fb 88 # which depend on the caps outstanding, cache size and overall ratio
3efd9988
FG
89 def expected_caps():
90 num_caps = self.get_session(mount_a_client_id)['num_caps']
11fdf7f2 91 if num_caps <= mds_min_caps_per_client:
3efd9988 92 return True
9f95a23c 93 elif num_caps <= mds_max_caps_per_client:
3efd9988
FG
94 return True
95 else:
96 return False
97
98 self.wait_until_true(expected_caps, timeout=60)
7c673cae
FG
99
100 @needs_trimming
101 def test_client_pin_root(self):
3efd9988 102 self._test_client_pin(False, 400)
7c673cae
FG
103
104 @needs_trimming
105 def test_client_pin(self):
3efd9988
FG
106 self._test_client_pin(True, 800)
107
108 @needs_trimming
109 def test_client_pin_mincaps(self):
110 self._test_client_pin(True, 200)
7c673cae
FG
111
112 def test_client_release_bug(self):
113 """
114 When a client has a bug (which we will simulate) preventing it from releasing caps,
115 the MDS should notice that releases are not being sent promptly, and generate a health
116 metric to that effect.
117 """
118
119 # The debug hook to inject the failure only exists in the fuse client
120 if not isinstance(self.mount_a, FuseMount):
9f95a23c 121 self.skipTest("Require FUSE client to inject client release failure")
7c673cae
FG
122
123 self.set_conf('client.{0}'.format(self.mount_a.client_id), 'client inject release failure', 'true')
124 self.mount_a.teardown()
125 self.mount_a.mount()
126 self.mount_a.wait_until_mounted()
127 mount_a_client_id = self.mount_a.get_global_id()
128
129 # Client A creates a file. He will hold the write caps on the file, and later (simulated bug) fail
130 # to comply with the MDSs request to release that cap
131 self.mount_a.run_shell(["touch", "file1"])
132
133 # Client B tries to stat the file that client A created
134 rproc = self.mount_b.write_background("file1")
135
f64942e4 136 # After session_timeout, we should see a health warning (extra lag from
7c673cae 137 # MDS beacon period)
f64942e4
AA
138 session_timeout = self.fs.get_var("session_timeout")
139 self.wait_for_health("MDS_CLIENT_LATE_RELEASE", session_timeout + 10)
7c673cae
FG
140
141 # Client B should still be stuck
142 self.assertFalse(rproc.finished)
143
144 # Kill client A
145 self.mount_a.kill()
146 self.mount_a.kill_cleanup()
147
148 # Client B should complete
149 self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])
150 rproc.wait()
151
152 def test_client_oldest_tid(self):
153 """
154 When a client does not advance its oldest tid, the MDS should notice that
155 and generate health warnings.
156 """
157
158 # num of requests client issues
159 max_requests = 1000
160
161 # The debug hook to inject the failure only exists in the fuse client
162 if not isinstance(self.mount_a, FuseMount):
9f95a23c 163 self.skipTest("Require FUSE client to inject client release failure")
7c673cae
FG
164
165 self.set_conf('client', 'client inject fixed oldest tid', 'true')
166 self.mount_a.teardown()
167 self.mount_a.mount()
168 self.mount_a.wait_until_mounted()
169
170 self.fs.mds_asok(['config', 'set', 'mds_max_completed_requests', '{0}'.format(max_requests)])
171
172 # Create lots of files
173 self.mount_a.create_n_files("testdir/file1", max_requests + 100)
174
175 # Create a few files synchronously. This makes sure previous requests are completed
176 self.mount_a.create_n_files("testdir/file2", 5, True)
177
178 # Wait for the health warnings. Assume mds can handle 10 request per second at least
224ce89b 179 self.wait_for_health("MDS_CLIENT_OLDEST_TID", max_requests / 10)
7c673cae
FG
180
181 def _test_client_cache_size(self, mount_subdir):
182 """
183 check if client invalidate kernel dcache according to its cache size config
184 """
185
186 # The debug hook to inject the failure only exists in the fuse client
187 if not isinstance(self.mount_a, FuseMount):
9f95a23c 188 self.skipTest("Require FUSE client to inject client release failure")
7c673cae
FG
189
190 if mount_subdir:
191 # fuse assigns a fix inode number (1) to root inode. But in mounting into
192 # subdir case, the actual inode number of root is not 1. This mismatch
193 # confuses fuse_lowlevel_notify_inval_entry() when invalidating dentries
194 # in root directory.
195 self.mount_a.run_shell(["mkdir", "subdir"])
196 self.mount_a.umount_wait()
197 self.set_conf('client', 'client mountpoint', '/subdir')
198 self.mount_a.mount()
199 self.mount_a.wait_until_mounted()
200 root_ino = self.mount_a.path_to_ino(".")
201 self.assertEqual(root_ino, 1);
202
203 dir_path = os.path.join(self.mount_a.mountpoint, "testdir")
204
205 mkdir_script = dedent("""
206 import os
207 os.mkdir("{path}")
208 for n in range(0, {num_dirs}):
209 os.mkdir("{path}/dir{{0}}".format(n))
210 """)
211
212 num_dirs = 1000
213 self.mount_a.run_python(mkdir_script.format(path=dir_path, num_dirs=num_dirs))
214 self.mount_a.run_shell(["sync"])
215
216 dentry_count, dentry_pinned_count = self.mount_a.get_dentry_count()
217 self.assertGreaterEqual(dentry_count, num_dirs)
218 self.assertGreaterEqual(dentry_pinned_count, num_dirs)
219
220 cache_size = num_dirs / 10
221 self.mount_a.set_cache_size(cache_size)
222
223 def trimmed():
224 dentry_count, dentry_pinned_count = self.mount_a.get_dentry_count()
225 log.info("waiting, dentry_count, dentry_pinned_count: {0}, {1}".format(
226 dentry_count, dentry_pinned_count
227 ))
228 if dentry_count > cache_size or dentry_pinned_count > cache_size:
229 return False
230
231 return True
232
233 self.wait_until_true(trimmed, 30)
234
235 @needs_trimming
236 def test_client_cache_size(self):
237 self._test_client_cache_size(False)
238 self._test_client_cache_size(True)
a8e16298
TL
239
240 def test_client_max_caps(self):
241 """
242 That the MDS will not let a client sit above mds_max_caps_per_client caps.
243 """
244
245 mds_min_caps_per_client = int(self.fs.get_config("mds_min_caps_per_client"))
246 mds_max_caps_per_client = 2*mds_min_caps_per_client
247 self.set_conf('mds', 'mds_max_caps_per_client', mds_max_caps_per_client)
248 self.fs.mds_fail_restart()
249 self.fs.wait_for_daemons()
250
251 self.mount_a.create_n_files("foo/", 3*mds_max_caps_per_client, sync=True)
252
253 mount_a_client_id = self.mount_a.get_global_id()
254 def expected_caps():
255 num_caps = self.get_session(mount_a_client_id)['num_caps']
11fdf7f2 256 if num_caps <= mds_max_caps_per_client:
a8e16298
TL
257 return True
258 else:
259 return False
260
261 self.wait_until_true(expected_caps, timeout=60)