]> git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/cephfs/test_client_recovery.py
update sources to 12.2.7
[ceph.git] / ceph / qa / tasks / cephfs / test_client_recovery.py
1
2 """
3 Teuthology task for exercising CephFS client recovery
4 """
5
6 import logging
7 from textwrap import dedent
8 import time
9 import distutils.version as version
10 import re
11 import os
12
13 from teuthology.orchestra.run import CommandFailedError, ConnectionLostError
14 from tasks.cephfs.fuse_mount import FuseMount
15 from tasks.cephfs.cephfs_test_case import CephFSTestCase
16 from teuthology.packaging import get_package_version
17 from unittest import SkipTest
18
19
20 log = logging.getLogger(__name__)
21
22
23 # Arbitrary timeouts for operations involving restarting
24 # an MDS or waiting for it to come up
25 MDS_RESTART_GRACE = 60
26
27
28 class TestClientNetworkRecovery(CephFSTestCase):
29 REQUIRE_KCLIENT_REMOTE = True
30 REQUIRE_ONE_CLIENT_REMOTE = True
31 CLIENTS_REQUIRED = 2
32
33 LOAD_SETTINGS = ["mds_session_timeout", "mds_reconnect_timeout", "ms_max_backoff"]
34
35 # Environment references
36 mds_session_timeout = None
37 mds_reconnect_timeout = None
38 ms_max_backoff = None
39
40 def test_network_death(self):
41 """
42 Simulate software freeze or temporary network failure.
43
44 Check that the client blocks I/O during failure, and completes
45 I/O after failure.
46 """
47
48 # We only need one client
49 self.mount_b.umount_wait()
50
51 # Initially our one client session should be visible
52 client_id = self.mount_a.get_global_id()
53 ls_data = self._session_list()
54 self.assert_session_count(1, ls_data)
55 self.assertEqual(ls_data[0]['id'], client_id)
56 self.assert_session_state(client_id, "open")
57
58 # ...and capable of doing I/O without blocking
59 self.mount_a.create_files()
60
61 # ...but if we turn off the network
62 self.fs.set_clients_block(True)
63
64 # ...and try and start an I/O
65 write_blocked = self.mount_a.write_background()
66
67 # ...then it should block
68 self.assertFalse(write_blocked.finished)
69 self.assert_session_state(client_id, "open")
70 time.sleep(self.mds_session_timeout * 1.5) # Long enough for MDS to consider session stale
71 self.assertFalse(write_blocked.finished)
72 self.assert_session_state(client_id, "stale")
73
74 # ...until we re-enable I/O
75 self.fs.set_clients_block(False)
76
77 # ...when it should complete promptly
78 a = time.time()
79 self.wait_until_true(lambda: write_blocked.finished, self.ms_max_backoff * 2)
80 write_blocked.wait() # Already know we're finished, wait() to raise exception on errors
81 recovery_time = time.time() - a
82 log.info("recovery time: {0}".format(recovery_time))
83 self.assert_session_state(client_id, "open")
84
85
86 class TestClientRecovery(CephFSTestCase):
87 REQUIRE_KCLIENT_REMOTE = True
88 CLIENTS_REQUIRED = 2
89
90 LOAD_SETTINGS = ["mds_session_timeout", "mds_reconnect_timeout", "ms_max_backoff"]
91
92 # Environment references
93 mds_session_timeout = None
94 mds_reconnect_timeout = None
95 ms_max_backoff = None
96
97 def test_basic(self):
98 # Check that two clients come up healthy and see each others' files
99 # =====================================================
100 self.mount_a.create_files()
101 self.mount_a.check_files()
102 self.mount_a.umount_wait()
103
104 self.mount_b.check_files()
105
106 self.mount_a.mount()
107 self.mount_a.wait_until_mounted()
108
109 # Check that the admin socket interface is correctly reporting
110 # two sessions
111 # =====================================================
112 ls_data = self._session_list()
113 self.assert_session_count(2, ls_data)
114
115 self.assertSetEqual(
116 set([l['id'] for l in ls_data]),
117 {self.mount_a.get_global_id(), self.mount_b.get_global_id()}
118 )
119
120 def test_restart(self):
121 # Check that after an MDS restart both clients reconnect and continue
122 # to handle I/O
123 # =====================================================
124 self.fs.mds_fail_restart()
125 self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE)
126
127 self.mount_a.create_destroy()
128 self.mount_b.create_destroy()
129
130 def _session_num_caps(self, client_id):
131 ls_data = self.fs.mds_asok(['session', 'ls'])
132 return int(self._session_by_id(ls_data).get(client_id, {'num_caps': None})['num_caps'])
133
134 def test_reconnect_timeout(self):
135 # Reconnect timeout
136 # =================
137 # Check that if I stop an MDS and a client goes away, the MDS waits
138 # for the reconnect period
139 self.fs.mds_stop()
140 self.fs.mds_fail()
141
142 mount_a_client_id = self.mount_a.get_global_id()
143 self.mount_a.umount_wait(force=True)
144
145 self.fs.mds_restart()
146
147 self.fs.wait_for_state('up:reconnect', reject='up:active', timeout=MDS_RESTART_GRACE)
148 # Check that the MDS locally reports its state correctly
149 status = self.fs.mds_asok(['status'])
150 self.assertIn("reconnect_status", status)
151
152 ls_data = self._session_list()
153 self.assert_session_count(2, ls_data)
154
155 # The session for the dead client should have the 'reconnect' flag set
156 self.assertTrue(self.get_session(mount_a_client_id)['reconnecting'])
157
158 # Wait for the reconnect state to clear, this should take the
159 # reconnect timeout period.
160 in_reconnect_for = self.fs.wait_for_state('up:active', timeout=self.mds_reconnect_timeout * 2)
161 # Check that the period we waited to enter active is within a factor
162 # of two of the reconnect timeout.
163 self.assertGreater(in_reconnect_for, self.mds_reconnect_timeout / 2,
164 "Should have been in reconnect phase for {0} but only took {1}".format(
165 self.mds_reconnect_timeout, in_reconnect_for
166 ))
167
168 self.assert_session_count(1)
169
170 # Check that the client that timed out during reconnect can
171 # mount again and do I/O
172 self.mount_a.mount()
173 self.mount_a.wait_until_mounted()
174 self.mount_a.create_destroy()
175
176 self.assert_session_count(2)
177
178 def test_reconnect_eviction(self):
179 # Eviction during reconnect
180 # =========================
181 mount_a_client_id = self.mount_a.get_global_id()
182
183 self.fs.mds_stop()
184 self.fs.mds_fail()
185
186 # The mount goes away while the MDS is offline
187 self.mount_a.kill()
188
189 self.fs.mds_restart()
190
191 # Enter reconnect phase
192 self.fs.wait_for_state('up:reconnect', reject='up:active', timeout=MDS_RESTART_GRACE)
193 self.assert_session_count(2)
194
195 # Evict the stuck client
196 self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])
197 self.assert_session_count(1)
198
199 # Observe that we proceed to active phase without waiting full reconnect timeout
200 evict_til_active = self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE)
201 # Once we evict the troublemaker, the reconnect phase should complete
202 # in well under the reconnect timeout.
203 self.assertLess(evict_til_active, self.mds_reconnect_timeout * 0.5,
204 "reconnect did not complete soon enough after eviction, took {0}".format(
205 evict_til_active
206 ))
207
208 # We killed earlier so must clean up before trying to use again
209 self.mount_a.kill_cleanup()
210
211 # Bring the client back
212 self.mount_a.mount()
213 self.mount_a.wait_until_mounted()
214 self.mount_a.create_destroy()
215
216 def test_stale_caps(self):
217 # Capability release from stale session
218 # =====================================
219 cap_holder = self.mount_a.open_background()
220
221 # Wait for the file to be visible from another client, indicating
222 # that mount_a has completed its network ops
223 self.mount_b.wait_for_visible()
224
225 # Simulate client death
226 self.mount_a.kill()
227
228 try:
229 # Now, after mds_session_timeout seconds, the waiter should
230 # complete their operation when the MDS marks the holder's
231 # session stale.
232 cap_waiter = self.mount_b.write_background()
233 a = time.time()
234 cap_waiter.wait()
235 b = time.time()
236
237 # Should have succeeded
238 self.assertEqual(cap_waiter.exitstatus, 0)
239
240 cap_waited = b - a
241 log.info("cap_waiter waited {0}s".format(cap_waited))
242 self.assertTrue(self.mds_session_timeout / 2.0 <= cap_waited <= self.mds_session_timeout * 2.0,
243 "Capability handover took {0}, expected approx {1}".format(
244 cap_waited, self.mds_session_timeout
245 ))
246
247 cap_holder.stdin.close()
248 try:
249 cap_holder.wait()
250 except (CommandFailedError, ConnectionLostError):
251 # We killed it (and possibly its node), so it raises an error
252 pass
253 finally:
254 # teardown() doesn't quite handle this case cleanly, so help it out
255 self.mount_a.kill_cleanup()
256
257 self.mount_a.mount()
258 self.mount_a.wait_until_mounted()
259
260 def test_evicted_caps(self):
261 # Eviction while holding a capability
262 # ===================================
263
264 # Take out a write capability on a file on client A,
265 # and then immediately kill it.
266 cap_holder = self.mount_a.open_background()
267 mount_a_client_id = self.mount_a.get_global_id()
268
269 # Wait for the file to be visible from another client, indicating
270 # that mount_a has completed its network ops
271 self.mount_b.wait_for_visible()
272
273 # Simulate client death
274 self.mount_a.kill()
275
276 try:
277 # The waiter should get stuck waiting for the capability
278 # held on the MDS by the now-dead client A
279 cap_waiter = self.mount_b.write_background()
280 time.sleep(5)
281 self.assertFalse(cap_waiter.finished)
282
283 self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])
284 # Now, because I evicted the old holder of the capability, it should
285 # immediately get handed over to the waiter
286 a = time.time()
287 cap_waiter.wait()
288 b = time.time()
289 cap_waited = b - a
290 log.info("cap_waiter waited {0}s".format(cap_waited))
291 # This is the check that it happened 'now' rather than waiting
292 # for the session timeout
293 self.assertLess(cap_waited, self.mds_session_timeout / 2.0,
294 "Capability handover took {0}, expected less than {1}".format(
295 cap_waited, self.mds_session_timeout / 2.0
296 ))
297
298 cap_holder.stdin.close()
299 try:
300 cap_holder.wait()
301 except (CommandFailedError, ConnectionLostError):
302 # We killed it (and possibly its node), so it raises an error
303 pass
304 finally:
305 self.mount_a.kill_cleanup()
306
307 self.mount_a.mount()
308 self.mount_a.wait_until_mounted()
309
310 def test_trim_caps(self):
311 # Trim capability when reconnecting MDS
312 # ===================================
313
314 count = 500
315 # Create lots of files
316 for i in range(count):
317 self.mount_a.run_shell(["touch", "f{0}".format(i)])
318
319 # Populate mount_b's cache
320 self.mount_b.run_shell(["ls", "-l"])
321
322 client_id = self.mount_b.get_global_id()
323 num_caps = self._session_num_caps(client_id)
324 self.assertGreaterEqual(num_caps, count)
325
326 # Restart MDS. client should trim its cache when reconnecting to the MDS
327 self.fs.mds_fail_restart()
328 self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE)
329
330 num_caps = self._session_num_caps(client_id)
331 self.assertLess(num_caps, count,
332 "should have less than {0} capabilities, have {1}".format(
333 count, num_caps
334 ))
335
336 def _is_flockable(self):
337 a_version_str = get_package_version(self.mount_a.client_remote, "fuse")
338 b_version_str = get_package_version(self.mount_b.client_remote, "fuse")
339 flock_version_str = "2.9"
340
341 version_regex = re.compile(r"[0-9\.]+")
342 a_result = version_regex.match(a_version_str)
343 self.assertTrue(a_result)
344 b_result = version_regex.match(b_version_str)
345 self.assertTrue(b_result)
346 a_version = version.StrictVersion(a_result.group())
347 b_version = version.StrictVersion(b_result.group())
348 flock_version=version.StrictVersion(flock_version_str)
349
350 if (a_version >= flock_version and b_version >= flock_version):
351 log.info("flock locks are available")
352 return True
353 else:
354 log.info("not testing flock locks, machines have versions {av} and {bv}".format(
355 av=a_version_str,bv=b_version_str))
356 return False
357
358 def test_filelock(self):
359 """
360 Check that file lock doesn't get lost after an MDS restart
361 """
362
363 flockable = self._is_flockable()
364 lock_holder = self.mount_a.lock_background(do_flock=flockable)
365
366 self.mount_b.wait_for_visible("background_file-2")
367 self.mount_b.check_filelock(do_flock=flockable)
368
369 self.fs.mds_fail_restart()
370 self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE)
371
372 self.mount_b.check_filelock(do_flock=flockable)
373
374 # Tear down the background process
375 lock_holder.stdin.close()
376 try:
377 lock_holder.wait()
378 except (CommandFailedError, ConnectionLostError):
379 # We killed it, so it raises an error
380 pass
381
382 def test_filelock_eviction(self):
383 """
384 Check that file lock held by evicted client is given to
385 waiting client.
386 """
387 if not self._is_flockable():
388 self.skipTest("flock is not available")
389
390 lock_holder = self.mount_a.lock_background()
391 self.mount_b.wait_for_visible("background_file-2")
392 self.mount_b.check_filelock()
393
394 lock_taker = self.mount_b.lock_and_release()
395 # Check the taker is waiting (doesn't get it immediately)
396 time.sleep(2)
397 self.assertFalse(lock_holder.finished)
398 self.assertFalse(lock_taker.finished)
399
400 try:
401 mount_a_client_id = self.mount_a.get_global_id()
402 self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])
403
404 # Evicting mount_a should let mount_b's attempt to take the lock
405 # succeed
406 self.wait_until_true(lambda: lock_taker.finished, timeout=10)
407 finally:
408 # teardown() doesn't quite handle this case cleanly, so help it out
409 self.mount_a.kill()
410 self.mount_a.kill_cleanup()
411
412 # Bring the client back
413 self.mount_a.mount()
414 self.mount_a.wait_until_mounted()
415
416 def test_dir_fsync(self):
417 self._test_fsync(True);
418
419 def test_create_fsync(self):
420 self._test_fsync(False);
421
422 def _test_fsync(self, dirfsync):
423 """
424 That calls to fsync guarantee visibility of metadata to another
425 client immediately after the fsyncing client dies.
426 """
427
428 # Leave this guy out until he's needed
429 self.mount_b.umount_wait()
430
431 # Create dir + child dentry on client A, and fsync the dir
432 path = os.path.join(self.mount_a.mountpoint, "subdir")
433 self.mount_a.run_python(
434 dedent("""
435 import os
436 import time
437
438 path = "{path}"
439
440 print "Starting creation..."
441 start = time.time()
442
443 os.mkdir(path)
444 dfd = os.open(path, os.O_DIRECTORY)
445
446 fd = open(os.path.join(path, "childfile"), "w")
447 print "Finished creation in {{0}}s".format(time.time() - start)
448
449 print "Starting fsync..."
450 start = time.time()
451 if {dirfsync}:
452 os.fsync(dfd)
453 else:
454 os.fsync(fd)
455 print "Finished fsync in {{0}}s".format(time.time() - start)
456 """.format(path=path,dirfsync=str(dirfsync)))
457 )
458
459 # Immediately kill the MDS and then client A
460 self.fs.mds_stop()
461 self.fs.mds_fail()
462 self.mount_a.kill()
463 self.mount_a.kill_cleanup()
464
465 # Restart the MDS. Wait for it to come up, it'll have to time out in clientreplay
466 self.fs.mds_restart()
467 log.info("Waiting for reconnect...")
468 self.fs.wait_for_state("up:reconnect")
469 log.info("Waiting for active...")
470 self.fs.wait_for_state("up:active", timeout=MDS_RESTART_GRACE + self.mds_reconnect_timeout)
471 log.info("Reached active...")
472
473 # Is the child dentry visible from mount B?
474 self.mount_b.mount()
475 self.mount_b.wait_until_mounted()
476 self.mount_b.run_shell(["ls", "subdir/childfile"])
477
478 def test_stale_renew(self):
479 if not isinstance(self.mount_a, FuseMount):
480 raise SkipTest("Require FUSE client to handle signal STOP/CONT")
481
482 self.mount_a.run_shell(["mkdir", "testdir"])
483 self.mount_a.run_shell(["touch", "testdir/file1"])
484 # populate readdir cache
485 self.mount_a.run_shell(["ls", "testdir"])
486 self.mount_b.run_shell(["ls", "testdir"])
487
488 # check if readdir cache is effective
489 initial_readdirs = self.fs.mds_asok(['perf', 'dump', 'mds_server', 'req_readdir_latency'])
490 self.mount_b.run_shell(["ls", "testdir"])
491 current_readdirs = self.fs.mds_asok(['perf', 'dump', 'mds_server', 'req_readdir_latency'])
492 self.assertEqual(current_readdirs, initial_readdirs);
493
494 mount_b_gid = self.mount_b.get_global_id()
495 mount_b_pid = self.mount_b.get_client_pid()
496 # stop ceph-fuse process of mount_b
497 self.mount_b.client_remote.run(args=["sudo", "kill", "-STOP", mount_b_pid])
498
499 self.assert_session_state(mount_b_gid, "open")
500 time.sleep(self.mds_session_timeout * 1.5) # Long enough for MDS to consider session stale
501 self.assert_session_state(mount_b_gid, "stale")
502
503 self.mount_a.run_shell(["touch", "testdir/file2"])
504
505 # resume ceph-fuse process of mount_b
506 self.mount_b.client_remote.run(args=["sudo", "kill", "-CONT", mount_b_pid])
507 # Is the new file visible from mount_b? (caps become invalid after session stale)
508 self.mount_b.run_shell(["ls", "testdir/file2"])
509
510 def test_unmount_for_evicted_client(self):
511 """Test if client hangs on unmount after evicting the client."""
512 mount_a_client_id = self.mount_a.get_global_id()
513 self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])
514
515 self.mount_a.umount_wait(require_clean=True, timeout=30)