]> git.proxmox.com Git - ceph.git/blame - ceph/qa/tasks/cephfs/test_client_limits.py
bump version to 18.2.2-pve1
[ceph.git] / ceph / qa / tasks / cephfs / test_client_limits.py
CommitLineData
7c673cae
FG
1
2"""
3Exercise the MDS's behaviour when clients and the MDCache reach or
4exceed the limits of how many caps/inodes they should hold.
5"""
6
7import logging
8from textwrap import dedent
f91f0fd5 9from tasks.ceph_test_case import TestTimeoutError
7c673cae
FG
10from tasks.cephfs.cephfs_test_case import CephFSTestCase, needs_trimming
11from tasks.cephfs.fuse_mount import FuseMount
aee94f69 12from teuthology.exceptions import CommandFailedError
7c673cae 13import os
aee94f69 14from io import StringIO
7c673cae
FG
15
16
17log = logging.getLogger(__name__)
18
19
20# Arbitrary timeouts for operations involving restarting
21# an MDS or waiting for it to come up
22MDS_RESTART_GRACE = 60
23
24# Hardcoded values from Server::recall_client_state
25CAP_RECALL_RATIO = 0.8
26CAP_RECALL_MIN = 100
27
28
29class TestClientLimits(CephFSTestCase):
7c673cae
FG
30 CLIENTS_REQUIRED = 2
31
3efd9988 32 def _test_client_pin(self, use_subdir, open_files):
7c673cae
FG
33 """
34 When a client pins an inode in its cache, for example because the file is held open,
35 it should reject requests from the MDS to trim these caps. The MDS should complain
36 to the user that it is unable to enforce its cache size limits because of this
37 objectionable client.
38
39 :param use_subdir: whether to put test files in a subdir or use root
40 """
41
9f95a23c
TL
42 # Set MDS cache memory limit to a low value that will make the MDS to
43 # ask the client to trim the caps.
44 cache_memory_limit = "1K"
7c673cae 45
f91f0fd5
TL
46 self.config_set('mds', 'mds_cache_memory_limit', cache_memory_limit)
47 self.config_set('mds', 'mds_recall_max_caps', int(open_files/2))
48 self.config_set('mds', 'mds_recall_warning_threshold', open_files)
7c673cae 49
f91f0fd5
TL
50 mds_min_caps_per_client = int(self.config_get('mds', "mds_min_caps_per_client"))
51 self.config_set('mds', 'mds_min_caps_working_set', mds_min_caps_per_client)
52 mds_max_caps_per_client = int(self.config_get('mds', "mds_max_caps_per_client"))
53 mds_recall_warning_decay_rate = float(self.config_get('mds', "mds_recall_warning_decay_rate"))
54 self.assertGreaterEqual(open_files, mds_min_caps_per_client)
3efd9988 55
7c673cae 56 mount_a_client_id = self.mount_a.get_global_id()
f91f0fd5 57 path = "subdir" if use_subdir else "."
7c673cae
FG
58 open_proc = self.mount_a.open_n_background(path, open_files)
59
60 # Client should now hold:
61 # `open_files` caps for the open files
62 # 1 cap for root
63 # 1 cap for subdir
64 self.wait_until_equal(lambda: self.get_session(mount_a_client_id)['num_caps'],
65 open_files + (2 if use_subdir else 1),
66 timeout=600,
67 reject_fn=lambda x: x > open_files + 2)
68
69 # MDS should not be happy about that, as the client is failing to comply
70 # with the SESSION_RECALL messages it is being sent
a8e16298 71 self.wait_for_health("MDS_CLIENT_RECALL", mds_recall_warning_decay_rate*2)
7c673cae
FG
72
73 # We can also test that the MDS health warning for oversized
74 # cache is functioning as intended.
a8e16298 75 self.wait_for_health("MDS_CACHE_OVERSIZED", mds_recall_warning_decay_rate*2)
7c673cae
FG
76
77 # When the client closes the files, it should retain only as many caps as allowed
78 # under the SESSION_RECALL policy
79 log.info("Terminating process holding files open")
f67539c2 80 self.mount_a._kill_background(open_proc)
7c673cae
FG
81
82 # The remaining caps should comply with the numbers sent from MDS in SESSION_RECALL message,
181888fb 83 # which depend on the caps outstanding, cache size and overall ratio
3efd9988
FG
84 def expected_caps():
85 num_caps = self.get_session(mount_a_client_id)['num_caps']
11fdf7f2 86 if num_caps <= mds_min_caps_per_client:
3efd9988 87 return True
9f95a23c 88 elif num_caps <= mds_max_caps_per_client:
3efd9988
FG
89 return True
90 else:
91 return False
92
93 self.wait_until_true(expected_caps, timeout=60)
7c673cae
FG
94
95 @needs_trimming
96 def test_client_pin_root(self):
3efd9988 97 self._test_client_pin(False, 400)
7c673cae
FG
98
99 @needs_trimming
100 def test_client_pin(self):
3efd9988
FG
101 self._test_client_pin(True, 800)
102
103 @needs_trimming
104 def test_client_pin_mincaps(self):
105 self._test_client_pin(True, 200)
7c673cae 106
f91f0fd5
TL
107 def test_client_min_caps_working_set(self):
108 """
109 When a client has inodes pinned in its cache (open files), that the MDS
110 will not warn about the client not responding to cache pressure when
111 the number of caps is below mds_min_caps_working_set.
112 """
113
114 # Set MDS cache memory limit to a low value that will make the MDS to
115 # ask the client to trim the caps.
116 cache_memory_limit = "1K"
117 open_files = 400
118
119 self.config_set('mds', 'mds_cache_memory_limit', cache_memory_limit)
120 self.config_set('mds', 'mds_recall_max_caps', int(open_files/2))
121 self.config_set('mds', 'mds_recall_warning_threshold', open_files)
122 self.config_set('mds', 'mds_min_caps_working_set', open_files*2)
123
124 mds_min_caps_per_client = int(self.config_get('mds', "mds_min_caps_per_client"))
125 mds_recall_warning_decay_rate = float(self.config_get('mds', "mds_recall_warning_decay_rate"))
126 self.assertGreaterEqual(open_files, mds_min_caps_per_client)
127
128 mount_a_client_id = self.mount_a.get_global_id()
129 self.mount_a.open_n_background("subdir", open_files)
130
131 # Client should now hold:
132 # `open_files` caps for the open files
133 # 1 cap for root
134 # 1 cap for subdir
135 self.wait_until_equal(lambda: self.get_session(mount_a_client_id)['num_caps'],
136 open_files + 2,
137 timeout=600,
138 reject_fn=lambda x: x > open_files + 2)
139
140 # We can also test that the MDS health warning for oversized
141 # cache is functioning as intended.
142 self.wait_for_health("MDS_CACHE_OVERSIZED", mds_recall_warning_decay_rate*2)
143
144 try:
145 # MDS should not be happy about that but it's not sending
146 # MDS_CLIENT_RECALL warnings because the client's caps are below
147 # mds_min_caps_working_set.
148 self.wait_for_health("MDS_CLIENT_RECALL", mds_recall_warning_decay_rate*2)
149 except TestTimeoutError:
150 pass
151 else:
152 raise RuntimeError("expected no client recall warning")
153
adb31ebb
TL
154 def test_cap_acquisition_throttle_readdir(self):
155 """
156 Mostly readdir acquires caps faster than the mds recalls, so the cap
157 acquisition via readdir is throttled by retrying the readdir after
158 a fraction of second (0.5) by default when throttling condition is met.
159 """
160
aee94f69
TL
161 subdir_count = 4
162 files_per_dir = 25
adb31ebb 163
aee94f69
TL
164 # throttle in a way so that two dir reads are already hitting it.
165 throttle_value = (files_per_dir * 3) // 2
adb31ebb 166
aee94f69
TL
167 # activate throttling logic by setting max per client to a low value
168 self.config_set('mds', 'mds_max_caps_per_client', 1)
169 self.config_set('mds', 'mds_session_cap_acquisition_throttle', throttle_value)
adb31ebb 170
aee94f69
TL
171 # Create files split across {subdir_count} directories, {per_dir_count} in each dir
172 for i in range(1, subdir_count+1):
173 self.mount_a.create_n_files("dir{0}/file".format(i), files_per_dir, sync=True)
adb31ebb 174
aee94f69 175 mount_a_client_id = self.mount_a.get_global_id()
adb31ebb 176
aee94f69
TL
177 # recursive readdir. macOs wants an explicit directory for `find`.
178 proc = self.mount_a.run_shell_payload("find . | wc", stderr=StringIO())
179 # return code may be None if the command got interrupted
180 self.assertTrue(proc.returncode is None or proc.returncode == 0, proc.stderr.getvalue())
adb31ebb
TL
181
182 # validate the throttle condition to be hit atleast once
183 cap_acquisition_throttle_hit_count = self.perf_dump()['mds_server']['cap_acquisition_throttle']
184 self.assertGreaterEqual(cap_acquisition_throttle_hit_count, 1)
185
aee94f69
TL
186 # validate cap_acquisition decay counter after readdir to NOT exceed the throttle value
187 # plus one batch that could have been taken immediately before querying
188 # assuming the batch is equal to the per dir file count.
189 cap_acquisition_value = self.get_session(mount_a_client_id)['cap_acquisition']['value']
190 self.assertLessEqual(cap_acquisition_value, files_per_dir + throttle_value)
191
192 # make sure that the throttle was reported in the events
193 def historic_ops_have_event(expected_event):
194 ops_dump = self.fs.rank_tell(['dump_historic_ops'])
195 # reverse the events and the ops assuming that later ops would be throttled
196 for op in reversed(ops_dump['ops']):
197 for ev in reversed(op.get('type_data', {}).get('events', [])):
198 if ev['event'] == expected_event:
199 return True
200 return False
201
202 self.assertTrue(historic_ops_have_event('cap_acquisition_throttle'))
203
7c673cae
FG
204 def test_client_release_bug(self):
205 """
206 When a client has a bug (which we will simulate) preventing it from releasing caps,
207 the MDS should notice that releases are not being sent promptly, and generate a health
208 metric to that effect.
209 """
210
211 # The debug hook to inject the failure only exists in the fuse client
212 if not isinstance(self.mount_a, FuseMount):
9f95a23c 213 self.skipTest("Require FUSE client to inject client release failure")
7c673cae
FG
214
215 self.set_conf('client.{0}'.format(self.mount_a.client_id), 'client inject release failure', 'true')
216 self.mount_a.teardown()
e306af50 217 self.mount_a.mount_wait()
7c673cae
FG
218 mount_a_client_id = self.mount_a.get_global_id()
219
220 # Client A creates a file. He will hold the write caps on the file, and later (simulated bug) fail
221 # to comply with the MDSs request to release that cap
222 self.mount_a.run_shell(["touch", "file1"])
223
224 # Client B tries to stat the file that client A created
225 rproc = self.mount_b.write_background("file1")
226
f64942e4 227 # After session_timeout, we should see a health warning (extra lag from
7c673cae 228 # MDS beacon period)
f64942e4
AA
229 session_timeout = self.fs.get_var("session_timeout")
230 self.wait_for_health("MDS_CLIENT_LATE_RELEASE", session_timeout + 10)
7c673cae
FG
231
232 # Client B should still be stuck
233 self.assertFalse(rproc.finished)
234
235 # Kill client A
236 self.mount_a.kill()
237 self.mount_a.kill_cleanup()
238
239 # Client B should complete
240 self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])
241 rproc.wait()
242
aee94f69
TL
243 def test_client_blocklisted_oldest_tid(self):
244 """
245 that a client is blocklisted when its encoded session metadata exceeds the
246 configured threshold (due to ever growing `completed_requests` caused due
247 to an unidentified bug (in the client or the MDS)).
248 """
249
250 # num of requests client issues
251 max_requests = 10000
252
253 # The debug hook to inject the failure only exists in the fuse client
254 if not isinstance(self.mount_a, FuseMount):
255 self.skipTest("Require FUSE client to inject client release failure")
256
257 self.config_set('client', 'client inject fixed oldest tid', 'true')
258 self.mount_a.teardown()
259 self.mount_a.mount_wait()
260
261 self.config_set('mds', 'mds_max_completed_requests', max_requests);
262
263 # Create lots of files
264 self.mount_a.create_n_files("testdir/file1", max_requests + 100)
265
266 # Create a few files synchronously. This makes sure previous requests are completed
267 self.mount_a.create_n_files("testdir/file2", 5, True)
268
269 # Wait for the health warnings. Assume mds can handle 10 request per second at least
270 self.wait_for_health("MDS_CLIENT_OLDEST_TID", max_requests // 10, check_in_detail=str(self.mount_a.client_id))
271
272 # set the threshold low so that it has a high probability of
273 # hitting.
274 self.config_set('mds', 'mds_session_metadata_threshold', 5000);
275
276 # Create lot many files synchronously. This would hit the session metadata threshold
277 # causing the client to get blocklisted.
278 with self.assertRaises(CommandFailedError):
279 self.mount_a.create_n_files("testdir/file2", 100000, True)
280
281 self.mds_cluster.is_addr_blocklisted(self.mount_a.get_global_addr())
282 # the mds should bump up the relevant perf counter
283 pd = self.perf_dump()
284 self.assertGreater(pd['mds_sessions']['mdthresh_evicted'], 0)
285
286 # reset the config
287 self.config_set('client', 'client inject fixed oldest tid', 'false')
288
289 self.mount_a.kill_cleanup()
290 self.mount_a.mount_wait()
291
7c673cae
FG
292 def test_client_oldest_tid(self):
293 """
294 When a client does not advance its oldest tid, the MDS should notice that
295 and generate health warnings.
296 """
297
298 # num of requests client issues
299 max_requests = 1000
300
301 # The debug hook to inject the failure only exists in the fuse client
302 if not isinstance(self.mount_a, FuseMount):
9f95a23c 303 self.skipTest("Require FUSE client to inject client release failure")
7c673cae
FG
304
305 self.set_conf('client', 'client inject fixed oldest tid', 'true')
306 self.mount_a.teardown()
e306af50 307 self.mount_a.mount_wait()
7c673cae
FG
308
309 self.fs.mds_asok(['config', 'set', 'mds_max_completed_requests', '{0}'.format(max_requests)])
310
311 # Create lots of files
312 self.mount_a.create_n_files("testdir/file1", max_requests + 100)
313
314 # Create a few files synchronously. This makes sure previous requests are completed
315 self.mount_a.create_n_files("testdir/file2", 5, True)
316
317 # Wait for the health warnings. Assume mds can handle 10 request per second at least
e306af50 318 self.wait_for_health("MDS_CLIENT_OLDEST_TID", max_requests // 10)
7c673cae
FG
319
320 def _test_client_cache_size(self, mount_subdir):
321 """
322 check if client invalidate kernel dcache according to its cache size config
323 """
324
325 # The debug hook to inject the failure only exists in the fuse client
326 if not isinstance(self.mount_a, FuseMount):
9f95a23c 327 self.skipTest("Require FUSE client to inject client release failure")
7c673cae
FG
328
329 if mount_subdir:
330 # fuse assigns a fix inode number (1) to root inode. But in mounting into
331 # subdir case, the actual inode number of root is not 1. This mismatch
332 # confuses fuse_lowlevel_notify_inval_entry() when invalidating dentries
333 # in root directory.
334 self.mount_a.run_shell(["mkdir", "subdir"])
335 self.mount_a.umount_wait()
336 self.set_conf('client', 'client mountpoint', '/subdir')
e306af50 337 self.mount_a.mount_wait()
7c673cae
FG
338 root_ino = self.mount_a.path_to_ino(".")
339 self.assertEqual(root_ino, 1);
340
341 dir_path = os.path.join(self.mount_a.mountpoint, "testdir")
342
343 mkdir_script = dedent("""
344 import os
345 os.mkdir("{path}")
346 for n in range(0, {num_dirs}):
347 os.mkdir("{path}/dir{{0}}".format(n))
348 """)
349
350 num_dirs = 1000
351 self.mount_a.run_python(mkdir_script.format(path=dir_path, num_dirs=num_dirs))
352 self.mount_a.run_shell(["sync"])
353
354 dentry_count, dentry_pinned_count = self.mount_a.get_dentry_count()
355 self.assertGreaterEqual(dentry_count, num_dirs)
356 self.assertGreaterEqual(dentry_pinned_count, num_dirs)
357
e306af50 358 cache_size = num_dirs // 10
7c673cae
FG
359 self.mount_a.set_cache_size(cache_size)
360
361 def trimmed():
362 dentry_count, dentry_pinned_count = self.mount_a.get_dentry_count()
363 log.info("waiting, dentry_count, dentry_pinned_count: {0}, {1}".format(
364 dentry_count, dentry_pinned_count
365 ))
366 if dentry_count > cache_size or dentry_pinned_count > cache_size:
367 return False
368
369 return True
370
371 self.wait_until_true(trimmed, 30)
372
373 @needs_trimming
374 def test_client_cache_size(self):
375 self._test_client_cache_size(False)
376 self._test_client_cache_size(True)
a8e16298
TL
377
378 def test_client_max_caps(self):
379 """
380 That the MDS will not let a client sit above mds_max_caps_per_client caps.
381 """
382
f91f0fd5 383 mds_min_caps_per_client = int(self.config_get('mds', "mds_min_caps_per_client"))
a8e16298 384 mds_max_caps_per_client = 2*mds_min_caps_per_client
f91f0fd5 385 self.config_set('mds', 'mds_max_caps_per_client', mds_max_caps_per_client)
a8e16298
TL
386
387 self.mount_a.create_n_files("foo/", 3*mds_max_caps_per_client, sync=True)
388
389 mount_a_client_id = self.mount_a.get_global_id()
390 def expected_caps():
391 num_caps = self.get_session(mount_a_client_id)['num_caps']
11fdf7f2 392 if num_caps <= mds_max_caps_per_client:
a8e16298
TL
393 return True
394 else:
395 return False
396
397 self.wait_until_true(expected_caps, timeout=60)