]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | |
2 | """ | |
3 | Exercise the MDS's behaviour when clients and the MDCache reach or | |
4 | exceed the limits of how many caps/inodes they should hold. | |
5 | """ | |
6 | ||
7 | import logging | |
8 | from textwrap import dedent | |
9 | from unittest import SkipTest | |
10 | from teuthology.orchestra.run import CommandFailedError | |
11 | from tasks.cephfs.cephfs_test_case import CephFSTestCase, needs_trimming | |
12 | from tasks.cephfs.fuse_mount import FuseMount | |
13 | import os | |
14 | ||
15 | ||
16 | log = logging.getLogger(__name__) | |
17 | ||
18 | ||
19 | # Arbitrary timeouts for operations involving restarting | |
20 | # an MDS or waiting for it to come up | |
21 | MDS_RESTART_GRACE = 60 | |
22 | ||
23 | # Hardcoded values from Server::recall_client_state | |
24 | CAP_RECALL_RATIO = 0.8 | |
25 | CAP_RECALL_MIN = 100 | |
26 | ||
27 | ||
28 | class TestClientLimits(CephFSTestCase): | |
29 | REQUIRE_KCLIENT_REMOTE = True | |
30 | CLIENTS_REQUIRED = 2 | |
31 | ||
3efd9988 | 32 | def _test_client_pin(self, use_subdir, open_files): |
7c673cae FG |
33 | """ |
34 | When a client pins an inode in its cache, for example because the file is held open, | |
35 | it should reject requests from the MDS to trim these caps. The MDS should complain | |
36 | to the user that it is unable to enforce its cache size limits because of this | |
37 | objectionable client. | |
38 | ||
39 | :param use_subdir: whether to put test files in a subdir or use root | |
40 | """ | |
41 | ||
3efd9988 | 42 | cache_size = open_files/2 |
7c673cae FG |
43 | |
44 | self.set_conf('mds', 'mds cache size', cache_size) | |
45 | self.fs.mds_fail_restart() | |
46 | self.fs.wait_for_daemons() | |
47 | ||
3efd9988 FG |
48 | mds_min_caps_per_client = int(self.fs.get_config("mds_min_caps_per_client")) |
49 | self.assertTrue(open_files >= mds_min_caps_per_client) | |
50 | mds_max_ratio_caps_per_client = float(self.fs.get_config("mds_max_ratio_caps_per_client")) | |
51 | ||
7c673cae FG |
52 | mount_a_client_id = self.mount_a.get_global_id() |
53 | path = "subdir/mount_a" if use_subdir else "mount_a" | |
54 | open_proc = self.mount_a.open_n_background(path, open_files) | |
55 | ||
56 | # Client should now hold: | |
57 | # `open_files` caps for the open files | |
58 | # 1 cap for root | |
59 | # 1 cap for subdir | |
60 | self.wait_until_equal(lambda: self.get_session(mount_a_client_id)['num_caps'], | |
61 | open_files + (2 if use_subdir else 1), | |
62 | timeout=600, | |
63 | reject_fn=lambda x: x > open_files + 2) | |
64 | ||
65 | # MDS should not be happy about that, as the client is failing to comply | |
66 | # with the SESSION_RECALL messages it is being sent | |
c07f9fc5 | 67 | mds_recall_state_timeout = float(self.fs.get_config("mds_recall_state_timeout")) |
3efd9988 | 68 | self.wait_for_health("MDS_CLIENT_RECALL", mds_recall_state_timeout+10) |
7c673cae FG |
69 | |
70 | # We can also test that the MDS health warning for oversized | |
71 | # cache is functioning as intended. | |
224ce89b | 72 | self.wait_for_health("MDS_CACHE_OVERSIZED", |
7c673cae FG |
73 | mds_recall_state_timeout + 10) |
74 | ||
75 | # When the client closes the files, it should retain only as many caps as allowed | |
76 | # under the SESSION_RECALL policy | |
77 | log.info("Terminating process holding files open") | |
78 | open_proc.stdin.close() | |
79 | try: | |
80 | open_proc.wait() | |
81 | except CommandFailedError: | |
82 | # We killed it, so it raises an error | |
83 | pass | |
84 | ||
85 | # The remaining caps should comply with the numbers sent from MDS in SESSION_RECALL message, | |
181888fb | 86 | # which depend on the caps outstanding, cache size and overall ratio |
3efd9988 FG |
87 | recall_expected_value = int((1.0-mds_max_ratio_caps_per_client)*(open_files+2)) |
88 | def expected_caps(): | |
89 | num_caps = self.get_session(mount_a_client_id)['num_caps'] | |
90 | if num_caps < mds_min_caps_per_client: | |
91 | raise RuntimeError("client caps fell below min!") | |
92 | elif num_caps == mds_min_caps_per_client: | |
93 | return True | |
94 | elif recall_expected_value*.95 <= num_caps <= recall_expected_value*1.05: | |
95 | return True | |
96 | else: | |
97 | return False | |
98 | ||
99 | self.wait_until_true(expected_caps, timeout=60) | |
7c673cae FG |
100 | |
101 | @needs_trimming | |
102 | def test_client_pin_root(self): | |
3efd9988 | 103 | self._test_client_pin(False, 400) |
7c673cae FG |
104 | |
105 | @needs_trimming | |
106 | def test_client_pin(self): | |
3efd9988 FG |
107 | self._test_client_pin(True, 800) |
108 | ||
109 | @needs_trimming | |
110 | def test_client_pin_mincaps(self): | |
111 | self._test_client_pin(True, 200) | |
7c673cae FG |
112 | |
113 | def test_client_release_bug(self): | |
114 | """ | |
115 | When a client has a bug (which we will simulate) preventing it from releasing caps, | |
116 | the MDS should notice that releases are not being sent promptly, and generate a health | |
117 | metric to that effect. | |
118 | """ | |
119 | ||
120 | # The debug hook to inject the failure only exists in the fuse client | |
121 | if not isinstance(self.mount_a, FuseMount): | |
122 | raise SkipTest("Require FUSE client to inject client release failure") | |
123 | ||
124 | self.set_conf('client.{0}'.format(self.mount_a.client_id), 'client inject release failure', 'true') | |
125 | self.mount_a.teardown() | |
126 | self.mount_a.mount() | |
127 | self.mount_a.wait_until_mounted() | |
128 | mount_a_client_id = self.mount_a.get_global_id() | |
129 | ||
130 | # Client A creates a file. He will hold the write caps on the file, and later (simulated bug) fail | |
131 | # to comply with the MDSs request to release that cap | |
132 | self.mount_a.run_shell(["touch", "file1"]) | |
133 | ||
134 | # Client B tries to stat the file that client A created | |
135 | rproc = self.mount_b.write_background("file1") | |
136 | ||
f64942e4 | 137 | # After session_timeout, we should see a health warning (extra lag from |
7c673cae | 138 | # MDS beacon period) |
f64942e4 AA |
139 | session_timeout = self.fs.get_var("session_timeout") |
140 | self.wait_for_health("MDS_CLIENT_LATE_RELEASE", session_timeout + 10) | |
7c673cae FG |
141 | |
142 | # Client B should still be stuck | |
143 | self.assertFalse(rproc.finished) | |
144 | ||
145 | # Kill client A | |
146 | self.mount_a.kill() | |
147 | self.mount_a.kill_cleanup() | |
148 | ||
149 | # Client B should complete | |
150 | self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id]) | |
151 | rproc.wait() | |
152 | ||
153 | def test_client_oldest_tid(self): | |
154 | """ | |
155 | When a client does not advance its oldest tid, the MDS should notice that | |
156 | and generate health warnings. | |
157 | """ | |
158 | ||
159 | # num of requests client issues | |
160 | max_requests = 1000 | |
161 | ||
162 | # The debug hook to inject the failure only exists in the fuse client | |
163 | if not isinstance(self.mount_a, FuseMount): | |
164 | raise SkipTest("Require FUSE client to inject client release failure") | |
165 | ||
166 | self.set_conf('client', 'client inject fixed oldest tid', 'true') | |
167 | self.mount_a.teardown() | |
168 | self.mount_a.mount() | |
169 | self.mount_a.wait_until_mounted() | |
170 | ||
171 | self.fs.mds_asok(['config', 'set', 'mds_max_completed_requests', '{0}'.format(max_requests)]) | |
172 | ||
173 | # Create lots of files | |
174 | self.mount_a.create_n_files("testdir/file1", max_requests + 100) | |
175 | ||
176 | # Create a few files synchronously. This makes sure previous requests are completed | |
177 | self.mount_a.create_n_files("testdir/file2", 5, True) | |
178 | ||
179 | # Wait for the health warnings. Assume mds can handle 10 request per second at least | |
224ce89b | 180 | self.wait_for_health("MDS_CLIENT_OLDEST_TID", max_requests / 10) |
7c673cae FG |
181 | |
182 | def _test_client_cache_size(self, mount_subdir): | |
183 | """ | |
184 | check if client invalidate kernel dcache according to its cache size config | |
185 | """ | |
186 | ||
187 | # The debug hook to inject the failure only exists in the fuse client | |
188 | if not isinstance(self.mount_a, FuseMount): | |
189 | raise SkipTest("Require FUSE client to inject client release failure") | |
190 | ||
191 | if mount_subdir: | |
192 | # fuse assigns a fix inode number (1) to root inode. But in mounting into | |
193 | # subdir case, the actual inode number of root is not 1. This mismatch | |
194 | # confuses fuse_lowlevel_notify_inval_entry() when invalidating dentries | |
195 | # in root directory. | |
196 | self.mount_a.run_shell(["mkdir", "subdir"]) | |
197 | self.mount_a.umount_wait() | |
198 | self.set_conf('client', 'client mountpoint', '/subdir') | |
199 | self.mount_a.mount() | |
200 | self.mount_a.wait_until_mounted() | |
201 | root_ino = self.mount_a.path_to_ino(".") | |
202 | self.assertEqual(root_ino, 1); | |
203 | ||
204 | dir_path = os.path.join(self.mount_a.mountpoint, "testdir") | |
205 | ||
206 | mkdir_script = dedent(""" | |
207 | import os | |
208 | os.mkdir("{path}") | |
209 | for n in range(0, {num_dirs}): | |
210 | os.mkdir("{path}/dir{{0}}".format(n)) | |
211 | """) | |
212 | ||
213 | num_dirs = 1000 | |
214 | self.mount_a.run_python(mkdir_script.format(path=dir_path, num_dirs=num_dirs)) | |
215 | self.mount_a.run_shell(["sync"]) | |
216 | ||
217 | dentry_count, dentry_pinned_count = self.mount_a.get_dentry_count() | |
218 | self.assertGreaterEqual(dentry_count, num_dirs) | |
219 | self.assertGreaterEqual(dentry_pinned_count, num_dirs) | |
220 | ||
221 | cache_size = num_dirs / 10 | |
222 | self.mount_a.set_cache_size(cache_size) | |
223 | ||
224 | def trimmed(): | |
225 | dentry_count, dentry_pinned_count = self.mount_a.get_dentry_count() | |
226 | log.info("waiting, dentry_count, dentry_pinned_count: {0}, {1}".format( | |
227 | dentry_count, dentry_pinned_count | |
228 | )) | |
229 | if dentry_count > cache_size or dentry_pinned_count > cache_size: | |
230 | return False | |
231 | ||
232 | return True | |
233 | ||
234 | self.wait_until_true(trimmed, 30) | |
235 | ||
236 | @needs_trimming | |
237 | def test_client_cache_size(self): | |
238 | self._test_client_cache_size(False) | |
239 | self._test_client_cache_size(True) |