]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | |
2 | """ | |
3 | Exercise the MDS's behaviour when clients and the MDCache reach or | |
4 | exceed the limits of how many caps/inodes they should hold. | |
5 | """ | |
6 | ||
7 | import logging | |
8 | from textwrap import dedent | |
f91f0fd5 | 9 | from tasks.ceph_test_case import TestTimeoutError |
7c673cae FG |
10 | from tasks.cephfs.cephfs_test_case import CephFSTestCase, needs_trimming |
11 | from tasks.cephfs.fuse_mount import FuseMount | |
aee94f69 | 12 | from teuthology.exceptions import CommandFailedError |
7c673cae | 13 | import os |
aee94f69 | 14 | from io import StringIO |
7c673cae FG |
15 | |
16 | ||
17 | log = logging.getLogger(__name__) | |
18 | ||
19 | ||
20 | # Arbitrary timeouts for operations involving restarting | |
21 | # an MDS or waiting for it to come up | |
22 | MDS_RESTART_GRACE = 60 | |
23 | ||
24 | # Hardcoded values from Server::recall_client_state | |
25 | CAP_RECALL_RATIO = 0.8 | |
26 | CAP_RECALL_MIN = 100 | |
27 | ||
28 | ||
29 | class TestClientLimits(CephFSTestCase): | |
7c673cae FG |
30 | CLIENTS_REQUIRED = 2 |
31 | ||
3efd9988 | 32 | def _test_client_pin(self, use_subdir, open_files): |
7c673cae FG |
33 | """ |
34 | When a client pins an inode in its cache, for example because the file is held open, | |
35 | it should reject requests from the MDS to trim these caps. The MDS should complain | |
36 | to the user that it is unable to enforce its cache size limits because of this | |
37 | objectionable client. | |
38 | ||
39 | :param use_subdir: whether to put test files in a subdir or use root | |
40 | """ | |
41 | ||
9f95a23c TL |
42 | # Set MDS cache memory limit to a low value that will make the MDS to |
43 | # ask the client to trim the caps. | |
44 | cache_memory_limit = "1K" | |
7c673cae | 45 | |
f91f0fd5 TL |
46 | self.config_set('mds', 'mds_cache_memory_limit', cache_memory_limit) |
47 | self.config_set('mds', 'mds_recall_max_caps', int(open_files/2)) | |
48 | self.config_set('mds', 'mds_recall_warning_threshold', open_files) | |
7c673cae | 49 | |
f91f0fd5 TL |
50 | mds_min_caps_per_client = int(self.config_get('mds', "mds_min_caps_per_client")) |
51 | self.config_set('mds', 'mds_min_caps_working_set', mds_min_caps_per_client) | |
52 | mds_max_caps_per_client = int(self.config_get('mds', "mds_max_caps_per_client")) | |
53 | mds_recall_warning_decay_rate = float(self.config_get('mds', "mds_recall_warning_decay_rate")) | |
54 | self.assertGreaterEqual(open_files, mds_min_caps_per_client) | |
3efd9988 | 55 | |
7c673cae | 56 | mount_a_client_id = self.mount_a.get_global_id() |
f91f0fd5 | 57 | path = "subdir" if use_subdir else "." |
7c673cae FG |
58 | open_proc = self.mount_a.open_n_background(path, open_files) |
59 | ||
60 | # Client should now hold: | |
61 | # `open_files` caps for the open files | |
62 | # 1 cap for root | |
63 | # 1 cap for subdir | |
64 | self.wait_until_equal(lambda: self.get_session(mount_a_client_id)['num_caps'], | |
65 | open_files + (2 if use_subdir else 1), | |
66 | timeout=600, | |
67 | reject_fn=lambda x: x > open_files + 2) | |
68 | ||
69 | # MDS should not be happy about that, as the client is failing to comply | |
70 | # with the SESSION_RECALL messages it is being sent | |
a8e16298 | 71 | self.wait_for_health("MDS_CLIENT_RECALL", mds_recall_warning_decay_rate*2) |
7c673cae FG |
72 | |
73 | # We can also test that the MDS health warning for oversized | |
74 | # cache is functioning as intended. | |
a8e16298 | 75 | self.wait_for_health("MDS_CACHE_OVERSIZED", mds_recall_warning_decay_rate*2) |
7c673cae FG |
76 | |
77 | # When the client closes the files, it should retain only as many caps as allowed | |
78 | # under the SESSION_RECALL policy | |
79 | log.info("Terminating process holding files open") | |
f67539c2 | 80 | self.mount_a._kill_background(open_proc) |
7c673cae FG |
81 | |
82 | # The remaining caps should comply with the numbers sent from MDS in SESSION_RECALL message, | |
181888fb | 83 | # which depend on the caps outstanding, cache size and overall ratio |
3efd9988 FG |
84 | def expected_caps(): |
85 | num_caps = self.get_session(mount_a_client_id)['num_caps'] | |
11fdf7f2 | 86 | if num_caps <= mds_min_caps_per_client: |
3efd9988 | 87 | return True |
9f95a23c | 88 | elif num_caps <= mds_max_caps_per_client: |
3efd9988 FG |
89 | return True |
90 | else: | |
91 | return False | |
92 | ||
93 | self.wait_until_true(expected_caps, timeout=60) | |
7c673cae FG |
94 | |
95 | @needs_trimming | |
96 | def test_client_pin_root(self): | |
3efd9988 | 97 | self._test_client_pin(False, 400) |
7c673cae FG |
98 | |
99 | @needs_trimming | |
100 | def test_client_pin(self): | |
3efd9988 FG |
101 | self._test_client_pin(True, 800) |
102 | ||
103 | @needs_trimming | |
104 | def test_client_pin_mincaps(self): | |
105 | self._test_client_pin(True, 200) | |
7c673cae | 106 | |
f91f0fd5 TL |
107 | def test_client_min_caps_working_set(self): |
108 | """ | |
109 | When a client has inodes pinned in its cache (open files), that the MDS | |
110 | will not warn about the client not responding to cache pressure when | |
111 | the number of caps is below mds_min_caps_working_set. | |
112 | """ | |
113 | ||
114 | # Set MDS cache memory limit to a low value that will make the MDS to | |
115 | # ask the client to trim the caps. | |
116 | cache_memory_limit = "1K" | |
117 | open_files = 400 | |
118 | ||
119 | self.config_set('mds', 'mds_cache_memory_limit', cache_memory_limit) | |
120 | self.config_set('mds', 'mds_recall_max_caps', int(open_files/2)) | |
121 | self.config_set('mds', 'mds_recall_warning_threshold', open_files) | |
122 | self.config_set('mds', 'mds_min_caps_working_set', open_files*2) | |
123 | ||
124 | mds_min_caps_per_client = int(self.config_get('mds', "mds_min_caps_per_client")) | |
125 | mds_recall_warning_decay_rate = float(self.config_get('mds', "mds_recall_warning_decay_rate")) | |
126 | self.assertGreaterEqual(open_files, mds_min_caps_per_client) | |
127 | ||
128 | mount_a_client_id = self.mount_a.get_global_id() | |
129 | self.mount_a.open_n_background("subdir", open_files) | |
130 | ||
131 | # Client should now hold: | |
132 | # `open_files` caps for the open files | |
133 | # 1 cap for root | |
134 | # 1 cap for subdir | |
135 | self.wait_until_equal(lambda: self.get_session(mount_a_client_id)['num_caps'], | |
136 | open_files + 2, | |
137 | timeout=600, | |
138 | reject_fn=lambda x: x > open_files + 2) | |
139 | ||
140 | # We can also test that the MDS health warning for oversized | |
141 | # cache is functioning as intended. | |
142 | self.wait_for_health("MDS_CACHE_OVERSIZED", mds_recall_warning_decay_rate*2) | |
143 | ||
144 | try: | |
145 | # MDS should not be happy about that but it's not sending | |
146 | # MDS_CLIENT_RECALL warnings because the client's caps are below | |
147 | # mds_min_caps_working_set. | |
148 | self.wait_for_health("MDS_CLIENT_RECALL", mds_recall_warning_decay_rate*2) | |
149 | except TestTimeoutError: | |
150 | pass | |
151 | else: | |
152 | raise RuntimeError("expected no client recall warning") | |
153 | ||
adb31ebb TL |
154 | def test_cap_acquisition_throttle_readdir(self): |
155 | """ | |
156 | Mostly readdir acquires caps faster than the mds recalls, so the cap | |
157 | acquisition via readdir is throttled by retrying the readdir after | |
158 | a fraction of second (0.5) by default when throttling condition is met. | |
159 | """ | |
160 | ||
aee94f69 TL |
161 | subdir_count = 4 |
162 | files_per_dir = 25 | |
adb31ebb | 163 | |
aee94f69 TL |
164 | # throttle in a way so that two dir reads are already hitting it. |
165 | throttle_value = (files_per_dir * 3) // 2 | |
adb31ebb | 166 | |
aee94f69 TL |
167 | # activate throttling logic by setting max per client to a low value |
168 | self.config_set('mds', 'mds_max_caps_per_client', 1) | |
169 | self.config_set('mds', 'mds_session_cap_acquisition_throttle', throttle_value) | |
adb31ebb | 170 | |
aee94f69 TL |
171 | # Create files split across {subdir_count} directories, {per_dir_count} in each dir |
172 | for i in range(1, subdir_count+1): | |
173 | self.mount_a.create_n_files("dir{0}/file".format(i), files_per_dir, sync=True) | |
adb31ebb | 174 | |
aee94f69 | 175 | mount_a_client_id = self.mount_a.get_global_id() |
adb31ebb | 176 | |
aee94f69 TL |
177 | # recursive readdir. macOs wants an explicit directory for `find`. |
178 | proc = self.mount_a.run_shell_payload("find . | wc", stderr=StringIO()) | |
179 | # return code may be None if the command got interrupted | |
180 | self.assertTrue(proc.returncode is None or proc.returncode == 0, proc.stderr.getvalue()) | |
adb31ebb TL |
181 | |
182 | # validate the throttle condition to be hit atleast once | |
183 | cap_acquisition_throttle_hit_count = self.perf_dump()['mds_server']['cap_acquisition_throttle'] | |
184 | self.assertGreaterEqual(cap_acquisition_throttle_hit_count, 1) | |
185 | ||
aee94f69 TL |
186 | # validate cap_acquisition decay counter after readdir to NOT exceed the throttle value |
187 | # plus one batch that could have been taken immediately before querying | |
188 | # assuming the batch is equal to the per dir file count. | |
189 | cap_acquisition_value = self.get_session(mount_a_client_id)['cap_acquisition']['value'] | |
190 | self.assertLessEqual(cap_acquisition_value, files_per_dir + throttle_value) | |
191 | ||
192 | # make sure that the throttle was reported in the events | |
193 | def historic_ops_have_event(expected_event): | |
194 | ops_dump = self.fs.rank_tell(['dump_historic_ops']) | |
195 | # reverse the events and the ops assuming that later ops would be throttled | |
196 | for op in reversed(ops_dump['ops']): | |
197 | for ev in reversed(op.get('type_data', {}).get('events', [])): | |
198 | if ev['event'] == expected_event: | |
199 | return True | |
200 | return False | |
201 | ||
202 | self.assertTrue(historic_ops_have_event('cap_acquisition_throttle')) | |
203 | ||
7c673cae FG |
204 | def test_client_release_bug(self): |
205 | """ | |
206 | When a client has a bug (which we will simulate) preventing it from releasing caps, | |
207 | the MDS should notice that releases are not being sent promptly, and generate a health | |
208 | metric to that effect. | |
209 | """ | |
210 | ||
211 | # The debug hook to inject the failure only exists in the fuse client | |
212 | if not isinstance(self.mount_a, FuseMount): | |
9f95a23c | 213 | self.skipTest("Require FUSE client to inject client release failure") |
7c673cae FG |
214 | |
215 | self.set_conf('client.{0}'.format(self.mount_a.client_id), 'client inject release failure', 'true') | |
216 | self.mount_a.teardown() | |
e306af50 | 217 | self.mount_a.mount_wait() |
7c673cae FG |
218 | mount_a_client_id = self.mount_a.get_global_id() |
219 | ||
220 | # Client A creates a file. He will hold the write caps on the file, and later (simulated bug) fail | |
221 | # to comply with the MDSs request to release that cap | |
222 | self.mount_a.run_shell(["touch", "file1"]) | |
223 | ||
224 | # Client B tries to stat the file that client A created | |
225 | rproc = self.mount_b.write_background("file1") | |
226 | ||
f64942e4 | 227 | # After session_timeout, we should see a health warning (extra lag from |
7c673cae | 228 | # MDS beacon period) |
f64942e4 AA |
229 | session_timeout = self.fs.get_var("session_timeout") |
230 | self.wait_for_health("MDS_CLIENT_LATE_RELEASE", session_timeout + 10) | |
7c673cae FG |
231 | |
232 | # Client B should still be stuck | |
233 | self.assertFalse(rproc.finished) | |
234 | ||
235 | # Kill client A | |
236 | self.mount_a.kill() | |
237 | self.mount_a.kill_cleanup() | |
238 | ||
239 | # Client B should complete | |
240 | self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id]) | |
241 | rproc.wait() | |
242 | ||
aee94f69 TL |
243 | def test_client_blocklisted_oldest_tid(self): |
244 | """ | |
245 | that a client is blocklisted when its encoded session metadata exceeds the | |
246 | configured threshold (due to ever growing `completed_requests` caused due | |
247 | to an unidentified bug (in the client or the MDS)). | |
248 | """ | |
249 | ||
250 | # num of requests client issues | |
251 | max_requests = 10000 | |
252 | ||
253 | # The debug hook to inject the failure only exists in the fuse client | |
254 | if not isinstance(self.mount_a, FuseMount): | |
255 | self.skipTest("Require FUSE client to inject client release failure") | |
256 | ||
257 | self.config_set('client', 'client inject fixed oldest tid', 'true') | |
258 | self.mount_a.teardown() | |
259 | self.mount_a.mount_wait() | |
260 | ||
261 | self.config_set('mds', 'mds_max_completed_requests', max_requests); | |
262 | ||
263 | # Create lots of files | |
264 | self.mount_a.create_n_files("testdir/file1", max_requests + 100) | |
265 | ||
266 | # Create a few files synchronously. This makes sure previous requests are completed | |
267 | self.mount_a.create_n_files("testdir/file2", 5, True) | |
268 | ||
269 | # Wait for the health warnings. Assume mds can handle 10 request per second at least | |
270 | self.wait_for_health("MDS_CLIENT_OLDEST_TID", max_requests // 10, check_in_detail=str(self.mount_a.client_id)) | |
271 | ||
272 | # set the threshold low so that it has a high probability of | |
273 | # hitting. | |
274 | self.config_set('mds', 'mds_session_metadata_threshold', 5000); | |
275 | ||
276 | # Create lot many files synchronously. This would hit the session metadata threshold | |
277 | # causing the client to get blocklisted. | |
278 | with self.assertRaises(CommandFailedError): | |
279 | self.mount_a.create_n_files("testdir/file2", 100000, True) | |
280 | ||
281 | self.mds_cluster.is_addr_blocklisted(self.mount_a.get_global_addr()) | |
282 | # the mds should bump up the relevant perf counter | |
283 | pd = self.perf_dump() | |
284 | self.assertGreater(pd['mds_sessions']['mdthresh_evicted'], 0) | |
285 | ||
286 | # reset the config | |
287 | self.config_set('client', 'client inject fixed oldest tid', 'false') | |
288 | ||
289 | self.mount_a.kill_cleanup() | |
290 | self.mount_a.mount_wait() | |
291 | ||
7c673cae FG |
292 | def test_client_oldest_tid(self): |
293 | """ | |
294 | When a client does not advance its oldest tid, the MDS should notice that | |
295 | and generate health warnings. | |
296 | """ | |
297 | ||
298 | # num of requests client issues | |
299 | max_requests = 1000 | |
300 | ||
301 | # The debug hook to inject the failure only exists in the fuse client | |
302 | if not isinstance(self.mount_a, FuseMount): | |
9f95a23c | 303 | self.skipTest("Require FUSE client to inject client release failure") |
7c673cae FG |
304 | |
305 | self.set_conf('client', 'client inject fixed oldest tid', 'true') | |
306 | self.mount_a.teardown() | |
e306af50 | 307 | self.mount_a.mount_wait() |
7c673cae FG |
308 | |
309 | self.fs.mds_asok(['config', 'set', 'mds_max_completed_requests', '{0}'.format(max_requests)]) | |
310 | ||
311 | # Create lots of files | |
312 | self.mount_a.create_n_files("testdir/file1", max_requests + 100) | |
313 | ||
314 | # Create a few files synchronously. This makes sure previous requests are completed | |
315 | self.mount_a.create_n_files("testdir/file2", 5, True) | |
316 | ||
317 | # Wait for the health warnings. Assume mds can handle 10 request per second at least | |
e306af50 | 318 | self.wait_for_health("MDS_CLIENT_OLDEST_TID", max_requests // 10) |
7c673cae FG |
319 | |
320 | def _test_client_cache_size(self, mount_subdir): | |
321 | """ | |
322 | check if client invalidate kernel dcache according to its cache size config | |
323 | """ | |
324 | ||
325 | # The debug hook to inject the failure only exists in the fuse client | |
326 | if not isinstance(self.mount_a, FuseMount): | |
9f95a23c | 327 | self.skipTest("Require FUSE client to inject client release failure") |
7c673cae FG |
328 | |
329 | if mount_subdir: | |
330 | # fuse assigns a fix inode number (1) to root inode. But in mounting into | |
331 | # subdir case, the actual inode number of root is not 1. This mismatch | |
332 | # confuses fuse_lowlevel_notify_inval_entry() when invalidating dentries | |
333 | # in root directory. | |
334 | self.mount_a.run_shell(["mkdir", "subdir"]) | |
335 | self.mount_a.umount_wait() | |
336 | self.set_conf('client', 'client mountpoint', '/subdir') | |
e306af50 | 337 | self.mount_a.mount_wait() |
7c673cae FG |
338 | root_ino = self.mount_a.path_to_ino(".") |
339 | self.assertEqual(root_ino, 1); | |
340 | ||
341 | dir_path = os.path.join(self.mount_a.mountpoint, "testdir") | |
342 | ||
343 | mkdir_script = dedent(""" | |
344 | import os | |
345 | os.mkdir("{path}") | |
346 | for n in range(0, {num_dirs}): | |
347 | os.mkdir("{path}/dir{{0}}".format(n)) | |
348 | """) | |
349 | ||
350 | num_dirs = 1000 | |
351 | self.mount_a.run_python(mkdir_script.format(path=dir_path, num_dirs=num_dirs)) | |
352 | self.mount_a.run_shell(["sync"]) | |
353 | ||
354 | dentry_count, dentry_pinned_count = self.mount_a.get_dentry_count() | |
355 | self.assertGreaterEqual(dentry_count, num_dirs) | |
356 | self.assertGreaterEqual(dentry_pinned_count, num_dirs) | |
357 | ||
e306af50 | 358 | cache_size = num_dirs // 10 |
7c673cae FG |
359 | self.mount_a.set_cache_size(cache_size) |
360 | ||
361 | def trimmed(): | |
362 | dentry_count, dentry_pinned_count = self.mount_a.get_dentry_count() | |
363 | log.info("waiting, dentry_count, dentry_pinned_count: {0}, {1}".format( | |
364 | dentry_count, dentry_pinned_count | |
365 | )) | |
366 | if dentry_count > cache_size or dentry_pinned_count > cache_size: | |
367 | return False | |
368 | ||
369 | return True | |
370 | ||
371 | self.wait_until_true(trimmed, 30) | |
372 | ||
373 | @needs_trimming | |
374 | def test_client_cache_size(self): | |
375 | self._test_client_cache_size(False) | |
376 | self._test_client_cache_size(True) | |
a8e16298 TL |
377 | |
378 | def test_client_max_caps(self): | |
379 | """ | |
380 | That the MDS will not let a client sit above mds_max_caps_per_client caps. | |
381 | """ | |
382 | ||
f91f0fd5 | 383 | mds_min_caps_per_client = int(self.config_get('mds', "mds_min_caps_per_client")) |
a8e16298 | 384 | mds_max_caps_per_client = 2*mds_min_caps_per_client |
f91f0fd5 | 385 | self.config_set('mds', 'mds_max_caps_per_client', mds_max_caps_per_client) |
a8e16298 TL |
386 | |
387 | self.mount_a.create_n_files("foo/", 3*mds_max_caps_per_client, sync=True) | |
388 | ||
389 | mount_a_client_id = self.mount_a.get_global_id() | |
390 | def expected_caps(): | |
391 | num_caps = self.get_session(mount_a_client_id)['num_caps'] | |
11fdf7f2 | 392 | if num_caps <= mds_max_caps_per_client: |
a8e16298 TL |
393 | return True |
394 | else: | |
395 | return False | |
396 | ||
397 | self.wait_until_true(expected_caps, timeout=60) |