]> git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/cephfs/test_failover.py
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / qa / tasks / cephfs / test_failover.py
1 import time
2 import signal
3 import json
4 import logging
5 from unittest import case, SkipTest
6 from random import randint
7
8 from cephfs_test_case import CephFSTestCase
9 from teuthology.exceptions import CommandFailedError
10 from teuthology import misc as teuthology
11 from tasks.cephfs.fuse_mount import FuseMount
12
13 log = logging.getLogger(__name__)
14
15
16 class TestClusterResize(CephFSTestCase):
17 CLIENTS_REQUIRED = 1
18 MDSS_REQUIRED = 3
19
20 def grow(self, n):
21 grace = float(self.fs.get_config("mds_beacon_grace", service_type="mon"))
22
23 fscid = self.fs.id
24 status = self.fs.status()
25 log.info("status = {0}".format(status))
26
27 original_ranks = set([info['gid'] for info in status.get_ranks(fscid)])
28 original_standbys = set([info['gid'] for info in status.get_standbys()])
29
30 oldmax = self.fs.get_var('max_mds')
31 self.assertTrue(n > oldmax)
32 self.fs.set_max_mds(n)
33
34 log.info("Waiting for cluster to grow.")
35 status = self.fs.wait_for_daemons(timeout=60+grace*2)
36 ranks = set([info['gid'] for info in status.get_ranks(fscid)])
37 self.assertTrue(original_ranks.issubset(ranks) and len(ranks) == n)
38 return status
39
40 def shrink(self, n):
41 grace = float(self.fs.get_config("mds_beacon_grace", service_type="mon"))
42
43 fscid = self.fs.id
44 status = self.fs.status()
45 log.info("status = {0}".format(status))
46
47 original_ranks = set([info['gid'] for info in status.get_ranks(fscid)])
48 original_standbys = set([info['gid'] for info in status.get_standbys()])
49
50 oldmax = self.fs.get_var('max_mds')
51 self.assertTrue(n < oldmax)
52 self.fs.set_max_mds(n)
53
54 # Wait until the monitor finishes stopping ranks >= n
55 log.info("Waiting for cluster to shink.")
56 status = self.fs.wait_for_daemons(timeout=60+grace*2)
57 ranks = set([info['gid'] for info in status.get_ranks(fscid)])
58 self.assertTrue(ranks.issubset(original_ranks) and len(ranks) == n)
59 return status
60
61
62 def test_grow(self):
63 """
64 That the MDS cluster grows after increasing max_mds.
65 """
66
67 # Need all my standbys up as well as the active daemons
68 # self.wait_for_daemon_start() necessary?
69
70 self.grow(2)
71 self.grow(3)
72
73
74 def test_shrink(self):
75 """
76 That the MDS cluster shrinks automatically after decreasing max_mds.
77 """
78
79 self.grow(3)
80 self.shrink(1)
81
82 def test_up_less_than_max(self):
83 """
84 That a health warning is generated when max_mds is greater than active count.
85 """
86
87 status = self.fs.status()
88 mdss = [info['gid'] for info in status.get_all()]
89 self.fs.set_max_mds(len(mdss)+1)
90 self.wait_for_health("MDS_UP_LESS_THAN_MAX", 30)
91 self.shrink(2)
92 self.wait_for_health_clear(30)
93
94 def test_down_health(self):
95 """
96 That marking a FS down does not generate a health warning
97 """
98
99 self.mount_a.umount_wait()
100
101 self.fs.set_down()
102 try:
103 self.wait_for_health("", 30)
104 raise RuntimeError("got health warning?")
105 except RuntimeError as e:
106 if "Timed out after" in str(e):
107 pass
108 else:
109 raise
110
111 def test_down_twice(self):
112 """
113 That marking a FS down twice does not wipe old_max_mds.
114 """
115
116 self.mount_a.umount_wait()
117
118 self.grow(2)
119 self.fs.set_down()
120 self.fs.wait_for_daemons()
121 self.fs.set_down(False)
122 self.assertEqual(self.fs.get_var("max_mds"), 2)
123 self.fs.wait_for_daemons(timeout=60)
124
125 def test_down_grow(self):
126 """
127 That setting max_mds undoes down.
128 """
129
130 self.mount_a.umount_wait()
131
132 self.fs.set_down()
133 self.fs.wait_for_daemons()
134 self.grow(2)
135 self.fs.wait_for_daemons()
136
137 def test_down(self):
138 """
139 That down setting toggles and sets max_mds appropriately.
140 """
141
142 self.mount_a.umount_wait()
143
144 self.fs.set_down()
145 self.fs.wait_for_daemons()
146 self.assertEqual(self.fs.get_var("max_mds"), 0)
147 self.fs.set_down(False)
148 self.assertEqual(self.fs.get_var("max_mds"), 1)
149 self.fs.wait_for_daemons()
150 self.assertEqual(self.fs.get_var("max_mds"), 1)
151
152 def test_hole(self):
153 """
154 Test that a hole cannot be created in the FS ranks.
155 """
156
157 fscid = self.fs.id
158
159 self.grow(2)
160
161 self.fs.set_max_mds(1)
162 log.info("status = {0}".format(self.fs.status()))
163
164 self.fs.set_max_mds(3)
165 # Don't wait for rank 1 to stop
166
167 self.fs.set_max_mds(2)
168 # Prevent another MDS from taking rank 1
169 # XXX This is a little racy because rank 1 may have stopped and a
170 # standby assigned to rank 1 before joinable=0 is set.
171 self.fs.set_joinable(False) # XXX keep in mind changing max_mds clears this flag
172
173 try:
174 status = self.fs.wait_for_daemons(timeout=90)
175 raise RuntimeError("should not be able to successfully shrink cluster!")
176 except:
177 # could not shrink to max_mds=2 and reach 2 actives (because joinable=False)
178 status = self.fs.status()
179 ranks = set([info['rank'] for info in status.get_ranks(fscid)])
180 self.assertTrue(ranks == set([0]))
181 finally:
182 log.info("status = {0}".format(status))
183
184 def test_thrash(self):
185 """
186 Test that thrashing max_mds does not fail.
187 """
188
189 max_mds = 2
190 for i in range(0, 100):
191 self.fs.set_max_mds(max_mds)
192 max_mds = (max_mds+1)%3+1
193
194 self.fs.wait_for_daemons(timeout=90)
195
196 class TestFailover(CephFSTestCase):
197 CLIENTS_REQUIRED = 1
198 MDSS_REQUIRED = 2
199
200 def test_simple(self):
201 """
202 That when the active MDS is killed, a standby MDS is promoted into
203 its rank after the grace period.
204
205 This is just a simple unit test, the harder cases are covered
206 in thrashing tests.
207 """
208
209 # Need all my standbys up as well as the active daemons
210 self.wait_for_daemon_start()
211
212 (original_active, ) = self.fs.get_active_names()
213 original_standbys = self.mds_cluster.get_standby_daemons()
214
215 # Kill the rank 0 daemon's physical process
216 self.fs.mds_stop(original_active)
217
218 grace = float(self.fs.get_config("mds_beacon_grace", service_type="mon"))
219
220 # Wait until the monitor promotes his replacement
221 def promoted():
222 active = self.fs.get_active_names()
223 return active and active[0] in original_standbys
224
225 log.info("Waiting for promotion of one of the original standbys {0}".format(
226 original_standbys))
227 self.wait_until_true(
228 promoted,
229 timeout=grace*2)
230
231 # Start the original rank 0 daemon up again, see that he becomes a standby
232 self.fs.mds_restart(original_active)
233 self.wait_until_true(
234 lambda: original_active in self.mds_cluster.get_standby_daemons(),
235 timeout=60 # Approximately long enough for MDS to start and mon to notice
236 )
237
238 def test_client_abort(self):
239 """
240 That a client will respect fuse_require_active_mds and error out
241 when the cluster appears to be unavailable.
242 """
243
244 if not isinstance(self.mount_a, FuseMount):
245 raise SkipTest("Requires FUSE client to inject client metadata")
246
247 require_active = self.fs.get_config("fuse_require_active_mds", service_type="mon").lower() == "true"
248 if not require_active:
249 raise case.SkipTest("fuse_require_active_mds is not set")
250
251 grace = float(self.fs.get_config("mds_beacon_grace", service_type="mon"))
252
253 # Check it's not laggy to begin with
254 (original_active, ) = self.fs.get_active_names()
255 self.assertNotIn("laggy_since", self.fs.status().get_mds(original_active))
256
257 self.mounts[0].umount_wait()
258
259 # Control: that we can mount and unmount usually, while the cluster is healthy
260 self.mounts[0].mount()
261 self.mounts[0].wait_until_mounted()
262 self.mounts[0].umount_wait()
263
264 # Stop the daemon processes
265 self.fs.mds_stop()
266
267 # Wait for everyone to go laggy
268 def laggy():
269 mdsmap = self.fs.get_mds_map()
270 for info in mdsmap['info'].values():
271 if "laggy_since" not in info:
272 return False
273
274 return True
275
276 self.wait_until_true(laggy, grace * 2)
277 with self.assertRaises(CommandFailedError):
278 self.mounts[0].mount()
279
280 def test_standby_count_wanted(self):
281 """
282 That cluster health warnings are generated by insufficient standbys available.
283 """
284
285 # Need all my standbys up as well as the active daemons
286 self.wait_for_daemon_start()
287
288 grace = float(self.fs.get_config("mds_beacon_grace", service_type="mon"))
289
290 standbys = self.mds_cluster.get_standby_daemons()
291 self.assertGreaterEqual(len(standbys), 1)
292 self.fs.mon_manager.raw_cluster_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', str(len(standbys)))
293
294 # Kill a standby and check for warning
295 victim = standbys.pop()
296 self.fs.mds_stop(victim)
297 log.info("waiting for insufficient standby daemon warning")
298 self.wait_for_health("MDS_INSUFFICIENT_STANDBY", grace*2)
299
300 # restart the standby, see that he becomes a standby, check health clears
301 self.fs.mds_restart(victim)
302 self.wait_until_true(
303 lambda: victim in self.mds_cluster.get_standby_daemons(),
304 timeout=60 # Approximately long enough for MDS to start and mon to notice
305 )
306 self.wait_for_health_clear(timeout=30)
307
308 # Set it one greater than standbys ever seen
309 standbys = self.mds_cluster.get_standby_daemons()
310 self.assertGreaterEqual(len(standbys), 1)
311 self.fs.mon_manager.raw_cluster_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', str(len(standbys)+1))
312 log.info("waiting for insufficient standby daemon warning")
313 self.wait_for_health("MDS_INSUFFICIENT_STANDBY", grace*2)
314
315 # Set it to 0
316 self.fs.mon_manager.raw_cluster_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', '0')
317 self.wait_for_health_clear(timeout=30)
318
319 def test_discontinuous_mdsmap(self):
320 """
321 That discontinuous mdsmap does not affect failover.
322 See http://tracker.ceph.com/issues/24856.
323 """
324 self.fs.set_max_mds(2)
325 status = self.fs.wait_for_daemons()
326
327 self.mount_a.umount_wait()
328
329 grace = float(self.fs.get_config("mds_beacon_grace", service_type="mon"))
330 monc_timeout = float(self.fs.get_config("mon_client_ping_timeout", service_type="mds"))
331
332 mds_0 = self.fs.get_rank(rank=0, status=status)
333 self.fs.rank_freeze(True, rank=0) # prevent failover
334 self.fs.rank_signal(signal.SIGSTOP, rank=0, status=status)
335 self.wait_until_true(
336 lambda: "laggy_since" in self.fs.get_rank(),
337 timeout=grace * 2
338 )
339
340 self.fs.rank_fail(rank=1)
341 self.fs.wait_for_state('up:resolve', rank=1, timeout=30)
342
343 # Make sure of mds_0's monitor connection gets reset
344 time.sleep(monc_timeout * 2)
345
346 # Continue rank 0, it will get discontinuous mdsmap
347 self.fs.rank_signal(signal.SIGCONT, rank=0)
348 self.wait_until_true(
349 lambda: "laggy_since" not in self.fs.get_rank(rank=0),
350 timeout=grace * 2
351 )
352
353 # mds.b will be stuck at 'reconnect' state if snapserver gets confused
354 # by discontinuous mdsmap
355 self.fs.wait_for_state('up:active', rank=1, timeout=30)
356 self.assertEqual(mds_0['gid'], self.fs.get_rank(rank=0)['gid'])
357 self.fs.rank_freeze(False, rank=0)
358
359 class TestStandbyReplay(CephFSTestCase):
360 MDSS_REQUIRED = 4
361
362 def _confirm_no_replay(self):
363 status = self.fs.status()
364 standby_count = len(list(status.get_standbys()))
365 self.assertEqual(0, len(list(self.fs.get_replays(status=status))))
366 return status
367
368 def _confirm_single_replay(self, full=True, status=None):
369 status = self.fs.wait_for_daemons(status=status)
370 ranks = sorted(self.fs.get_mds_map(status=status)['in'])
371 replays = list(self.fs.get_replays(status=status))
372 checked_replays = set()
373 for rank in ranks:
374 has_replay = False
375 for replay in replays:
376 if replay['rank'] == rank:
377 self.assertFalse(has_replay)
378 has_replay = True
379 checked_replays.add(replay['gid'])
380 if full and not has_replay:
381 raise RuntimeError("rank "+str(rank)+" has no standby-replay follower")
382 self.assertEqual(checked_replays, set(info['gid'] for info in replays))
383 return status
384
385 def _check_replay_takeover(self, status, rank=0):
386 replay = self.fs.get_replay(rank=rank, status=status)
387 new_status = self.fs.wait_for_daemons()
388 new_active = self.fs.get_rank(rank=rank, status=new_status)
389 if replay:
390 self.assertEqual(replay['gid'], new_active['gid'])
391 else:
392 # double check takeover came from a standby (or some new daemon via restart)
393 found = False
394 for info in status.get_standbys():
395 if info['gid'] == new_active['gid']:
396 found = True
397 break
398 if not found:
399 for info in status.get_all():
400 self.assertNotEqual(info['gid'], new_active['gid'])
401 return new_status
402
403 def test_standby_replay_singleton(self):
404 """
405 That only one MDS becomes standby-replay.
406 """
407
408 self._confirm_no_replay()
409 self.fs.set_allow_standby_replay(True)
410 time.sleep(30)
411 self._confirm_single_replay()
412
413 def test_standby_replay_singleton_fail(self):
414 """
415 That failures don't violate singleton constraint.
416 """
417
418 self._confirm_no_replay()
419 self.fs.set_allow_standby_replay(True)
420 status = self._confirm_single_replay()
421
422 for i in range(10):
423 time.sleep(randint(1, 5))
424 self.fs.rank_restart(status=status)
425 status = self._check_replay_takeover(status)
426 status = self._confirm_single_replay(status=status)
427
428 for i in range(10):
429 time.sleep(randint(1, 5))
430 self.fs.rank_fail()
431 status = self._check_replay_takeover(status)
432 status = self._confirm_single_replay(status=status)
433
434 def test_standby_replay_singleton_fail_multimds(self):
435 """
436 That failures don't violate singleton constraint with multiple actives.
437 """
438
439 status = self._confirm_no_replay()
440 new_max_mds = randint(2, len(list(status.get_standbys())))
441 self.fs.set_max_mds(new_max_mds)
442 self.fs.wait_for_daemons() # wait for actives to come online!
443 self.fs.set_allow_standby_replay(True)
444 status = self._confirm_single_replay(full=False)
445
446 for i in range(10):
447 time.sleep(randint(1, 5))
448 victim = randint(0, new_max_mds-1)
449 self.fs.rank_restart(rank=victim, status=status)
450 status = self._check_replay_takeover(status, rank=victim)
451 status = self._confirm_single_replay(status=status, full=False)
452
453 for i in range(10):
454 time.sleep(randint(1, 5))
455 victim = randint(0, new_max_mds-1)
456 self.fs.rank_fail(rank=victim)
457 status = self._check_replay_takeover(status, rank=victim)
458 status = self._confirm_single_replay(status=status, full=False)
459
460 def test_standby_replay_failure(self):
461 """
462 That the failure of a standby-replay daemon happens cleanly
463 and doesn't interrupt anything else.
464 """
465
466 status = self._confirm_no_replay()
467 self.fs.set_max_mds(1)
468 self.fs.set_allow_standby_replay(True)
469 status = self._confirm_single_replay()
470
471 for i in range(10):
472 time.sleep(randint(1, 5))
473 victim = self.fs.get_replay(status=status)
474 self.fs.mds_restart(mds_id=victim['name'])
475 status = self._confirm_single_replay(status=status)
476
477 def test_rank_stopped(self):
478 """
479 That when a rank is STOPPED, standby replays for
480 that rank get torn down
481 """
482
483 status = self._confirm_no_replay()
484 standby_count = len(list(status.get_standbys()))
485 self.fs.set_max_mds(2)
486 self.fs.set_allow_standby_replay(True)
487 status = self._confirm_single_replay()
488
489 self.fs.set_max_mds(1) # stop rank 1
490
491 status = self._confirm_single_replay()
492 self.assertTrue(standby_count, len(list(status.get_standbys())))
493
494
495 class TestMultiFilesystems(CephFSTestCase):
496 CLIENTS_REQUIRED = 2
497 MDSS_REQUIRED = 4
498
499 # We'll create our own filesystems and start our own daemons
500 REQUIRE_FILESYSTEM = False
501
502 def setUp(self):
503 super(TestMultiFilesystems, self).setUp()
504 self.mds_cluster.mon_manager.raw_cluster_cmd("fs", "flag", "set",
505 "enable_multiple", "true",
506 "--yes-i-really-mean-it")
507
508 def _setup_two(self):
509 fs_a = self.mds_cluster.newfs("alpha")
510 fs_b = self.mds_cluster.newfs("bravo")
511
512 self.mds_cluster.mds_restart()
513
514 # Wait for both filesystems to go healthy
515 fs_a.wait_for_daemons()
516 fs_b.wait_for_daemons()
517
518 # Reconfigure client auth caps
519 for mount in self.mounts:
520 self.mds_cluster.mon_manager.raw_cluster_cmd_result(
521 'auth', 'caps', "client.{0}".format(mount.client_id),
522 'mds', 'allow',
523 'mon', 'allow r',
524 'osd', 'allow rw pool={0}, allow rw pool={1}'.format(
525 fs_a.get_data_pool_name(), fs_b.get_data_pool_name()))
526
527 return fs_a, fs_b
528
529 def test_clients(self):
530 fs_a, fs_b = self._setup_two()
531
532 # Mount a client on fs_a
533 self.mount_a.mount(mount_fs_name=fs_a.name)
534 self.mount_a.write_n_mb("pad.bin", 1)
535 self.mount_a.write_n_mb("test.bin", 2)
536 a_created_ino = self.mount_a.path_to_ino("test.bin")
537 self.mount_a.create_files()
538
539 # Mount a client on fs_b
540 self.mount_b.mount(mount_fs_name=fs_b.name)
541 self.mount_b.write_n_mb("test.bin", 1)
542 b_created_ino = self.mount_b.path_to_ino("test.bin")
543 self.mount_b.create_files()
544
545 # Check that a non-default filesystem mount survives an MDS
546 # failover (i.e. that map subscription is continuous, not
547 # just the first time), reproduces #16022
548 old_fs_b_mds = fs_b.get_active_names()[0]
549 self.mds_cluster.mds_stop(old_fs_b_mds)
550 self.mds_cluster.mds_fail(old_fs_b_mds)
551 fs_b.wait_for_daemons()
552 background = self.mount_b.write_background()
553 # Raise exception if the write doesn't finish (i.e. if client
554 # has not kept up with MDS failure)
555 try:
556 self.wait_until_true(lambda: background.finished, timeout=30)
557 except RuntimeError:
558 # The mount is stuck, we'll have to force it to fail cleanly
559 background.stdin.close()
560 self.mount_b.umount_wait(force=True)
561 raise
562
563 self.mount_a.umount_wait()
564 self.mount_b.umount_wait()
565
566 # See that the client's files went into the correct pool
567 self.assertTrue(fs_a.data_objects_present(a_created_ino, 1024 * 1024))
568 self.assertTrue(fs_b.data_objects_present(b_created_ino, 1024 * 1024))
569
570 def test_standby(self):
571 fs_a, fs_b = self._setup_two()
572
573 # Assert that the remaining two MDS daemons are now standbys
574 a_daemons = fs_a.get_active_names()
575 b_daemons = fs_b.get_active_names()
576 self.assertEqual(len(a_daemons), 1)
577 self.assertEqual(len(b_daemons), 1)
578 original_a = a_daemons[0]
579 original_b = b_daemons[0]
580 expect_standby_daemons = set(self.mds_cluster.mds_ids) - (set(a_daemons) | set(b_daemons))
581
582 # Need all my standbys up as well as the active daemons
583 self.wait_for_daemon_start()
584 self.assertEqual(expect_standby_daemons, self.mds_cluster.get_standby_daemons())
585
586 # Kill fs_a's active MDS, see a standby take over
587 self.mds_cluster.mds_stop(original_a)
588 self.mds_cluster.mon_manager.raw_cluster_cmd("mds", "fail", original_a)
589 self.wait_until_equal(lambda: len(fs_a.get_active_names()), 1, 30,
590 reject_fn=lambda v: v > 1)
591 # Assert that it's a *different* daemon that has now appeared in the map for fs_a
592 self.assertNotEqual(fs_a.get_active_names()[0], original_a)
593
594 # Kill fs_b's active MDS, see a standby take over
595 self.mds_cluster.mds_stop(original_b)
596 self.mds_cluster.mon_manager.raw_cluster_cmd("mds", "fail", original_b)
597 self.wait_until_equal(lambda: len(fs_b.get_active_names()), 1, 30,
598 reject_fn=lambda v: v > 1)
599 # Assert that it's a *different* daemon that has now appeared in the map for fs_a
600 self.assertNotEqual(fs_b.get_active_names()[0], original_b)
601
602 # Both of the original active daemons should be gone, and all standbys used up
603 self.assertEqual(self.mds_cluster.get_standby_daemons(), set())
604
605 # Restart the ones I killed, see them reappear as standbys
606 self.mds_cluster.mds_restart(original_a)
607 self.mds_cluster.mds_restart(original_b)
608 self.wait_until_true(
609 lambda: {original_a, original_b} == self.mds_cluster.get_standby_daemons(),
610 timeout=30
611 )
612
613 def test_grow_shrink(self):
614 # Usual setup...
615 fs_a, fs_b = self._setup_two()
616
617 # Increase max_mds on fs_b, see a standby take up the role
618 fs_b.set_max_mds(2)
619 self.wait_until_equal(lambda: len(fs_b.get_active_names()), 2, 30,
620 reject_fn=lambda v: v > 2 or v < 1)
621
622 # Increase max_mds on fs_a, see a standby take up the role
623 fs_a.set_max_mds(2)
624 self.wait_until_equal(lambda: len(fs_a.get_active_names()), 2, 30,
625 reject_fn=lambda v: v > 2 or v < 1)
626
627 # Shrink fs_b back to 1, see a daemon go back to standby
628 fs_b.set_max_mds(1)
629 self.wait_until_equal(lambda: len(fs_b.get_active_names()), 1, 30,
630 reject_fn=lambda v: v > 2 or v < 1)
631
632 # Grow fs_a up to 3, see the former fs_b daemon join it.
633 fs_a.set_max_mds(3)
634 self.wait_until_equal(lambda: len(fs_a.get_active_names()), 3, 60,
635 reject_fn=lambda v: v > 3 or v < 2)