update ceph source to reef 18.2.0

[ceph.git] / ceph / qa / tasks / cephfs / test_failover.py
diff --git a/ceph/qa/tasks/cephfs/test_failover.py b/ceph/qa/tasks/cephfs/test_failover.py

index 6872319cf3c0dde7f3cea175fb48137b079f905f..6149c6bd62614a894107a986d613c7edd43a7832 100644 (file)
--- a/ceph/qa/tasks/cephfs/test_failover.py
+++ b/ceph/qa/tasks/cephfs/test_failover.py
@@ -2,9 +2,9 @@ import time
  import signal
  import logging
  import operator
-from random import randint
+from random import randint, choice
  
-from cephfs_test_case import CephFSTestCase
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
  from teuthology.exceptions import CommandFailedError
  from tasks.cephfs.fuse_mount import FuseMount
  
@@ -24,7 +24,7 @@ class TestClusterAffinity(CephFSTestCase):
          current = sorted(current, key=operator.itemgetter('name'))
          log.info("current = %s", current)
          self.assertEqual(len(current), len(target))
-        for i in xrange(len(current)):
+        for i in range(len(current)):
              for attr in target[i]:
                  self.assertIn(attr, current[i])
                  self.assertEqual(target[i][attr], current[i][attr])
@@ -52,7 +52,7 @@ class TestClusterAffinity(CephFSTestCase):
              except AssertionError as e:
                  log.debug("%s", e)
                  return False
-        status = self.wait_until_true(takeover, 30)
+        self.wait_until_true(takeover, 30)
  
      def test_join_fs_runtime(self):
          """
@@ -103,7 +103,6 @@ class TestClusterAffinity(CephFSTestCase):
          """
          That a vanilla standby is preferred over others with mds_join_fs set to another fs.
          """
-        self.fs.set_allow_multifs()
          fs2 = self.mds_cluster.newfs(name="cephfs2")
          status, target = self._verify_init()
          active = self.fs.get_active_names(status=status)[0]
@@ -125,11 +124,9 @@ class TestClusterAffinity(CephFSTestCase):
          That a standby with mds_join_fs set to another fs is still used if necessary.
          """
          status, target = self._verify_init()
-        active = self.fs.get_active_names(status=status)[0]
          standbys = [info['name'] for info in status.get_standbys()]
          for mds in standbys:
              self.config_set('mds.'+mds, 'mds_join_fs', 'cephfs2')
-        self.fs.set_allow_multifs()
          fs2 = self.mds_cluster.newfs(name="cephfs2")
          for mds in standbys:
              self._change_target_state(target, mds, {'join_fscid': fs2.id})
@@ -170,51 +167,9 @@ class TestClusterAffinity(CephFSTestCase):
          self._reach_target(target)
  
  class TestClusterResize(CephFSTestCase):
-    CLIENTS_REQUIRED = 1
+    CLIENTS_REQUIRED = 0
      MDSS_REQUIRED = 3
  
-    def grow(self, n):
-        grace = float(self.fs.get_config("mds_beacon_grace", service_type="mon"))
-
-        fscid = self.fs.id
-        status = self.fs.status()
-        log.info("status = {0}".format(status))
-
-        original_ranks = set([info['gid'] for info in status.get_ranks(fscid)])
-        _ = set([info['gid'] for info in status.get_standbys()])
-
-        oldmax = self.fs.get_var('max_mds')
-        self.assertTrue(n > oldmax)
-        self.fs.set_max_mds(n)
-
-        log.info("Waiting for cluster to grow.")
-        status = self.fs.wait_for_daemons(timeout=60+grace*2)
-        ranks = set([info['gid'] for info in status.get_ranks(fscid)])
-        self.assertTrue(original_ranks.issubset(ranks) and len(ranks) == n)
-        return status
-
-    def shrink(self, n):
-        grace = float(self.fs.get_config("mds_beacon_grace", service_type="mon"))
-
-        fscid = self.fs.id
-        status = self.fs.status()
-        log.info("status = {0}".format(status))
-
-        original_ranks = set([info['gid'] for info in status.get_ranks(fscid)])
-        _ = set([info['gid'] for info in status.get_standbys()])
-
-        oldmax = self.fs.get_var('max_mds')
-        self.assertTrue(n < oldmax)
-        self.fs.set_max_mds(n)
-
-        # Wait until the monitor finishes stopping ranks >= n
-        log.info("Waiting for cluster to shink.")
-        status = self.fs.wait_for_daemons(timeout=60+grace*2)
-        ranks = set([info['gid'] for info in status.get_ranks(fscid)])
-        self.assertTrue(ranks.issubset(original_ranks) and len(ranks) == n)
-        return status
-
-
      def test_grow(self):
          """
          That the MDS cluster grows after increasing max_mds.
@@ -223,8 +178,8 @@ class TestClusterResize(CephFSTestCase):
          # Need all my standbys up as well as the active daemons
          # self.wait_for_daemon_start() necessary?
  
-        self.grow(2)
-        self.grow(3)
+        self.fs.grow(2)
+        self.fs.grow(3)
  
  
      def test_shrink(self):
@@ -232,8 +187,8 @@ class TestClusterResize(CephFSTestCase):
          That the MDS cluster shrinks automatically after decreasing max_mds.
          """
  
-        self.grow(3)
-        self.shrink(1)
+        self.fs.grow(3)
+        self.fs.shrink(1)
  
      def test_up_less_than_max(self):
          """
@@ -244,7 +199,7 @@ class TestClusterResize(CephFSTestCase):
          mdss = [info['gid'] for info in status.get_all()]
          self.fs.set_max_mds(len(mdss)+1)
          self.wait_for_health("MDS_UP_LESS_THAN_MAX", 30)
-        self.shrink(2)
+        self.fs.shrink(2)
          self.wait_for_health_clear(30)
  
      def test_down_health(self):
@@ -252,8 +207,6 @@ class TestClusterResize(CephFSTestCase):
          That marking a FS down does not generate a health warning
          """
  
-        self.mount_a.umount_wait()
-
          self.fs.set_down()
          try:
              self.wait_for_health("", 30)
@@ -269,9 +222,7 @@ class TestClusterResize(CephFSTestCase):
          That marking a FS down twice does not wipe old_max_mds.
          """
  
-        self.mount_a.umount_wait()
-
-        self.grow(2)
+        self.fs.grow(2)
          self.fs.set_down()
          self.fs.wait_for_daemons()
          self.fs.set_down(False)
@@ -283,11 +234,9 @@ class TestClusterResize(CephFSTestCase):
          That setting max_mds undoes down.
          """
  
-        self.mount_a.umount_wait()
-
          self.fs.set_down()
          self.fs.wait_for_daemons()
-        self.grow(2)
+        self.fs.grow(2)
          self.fs.wait_for_daemons()
  
      def test_down(self):
@@ -295,8 +244,6 @@ class TestClusterResize(CephFSTestCase):
          That down setting toggles and sets max_mds appropriately.
          """
  
-        self.mount_a.umount_wait()
-
          self.fs.set_down()
          self.fs.wait_for_daemons()
          self.assertEqual(self.fs.get_var("max_mds"), 0)
@@ -312,28 +259,25 @@ class TestClusterResize(CephFSTestCase):
  
          fscid = self.fs.id
  
-        self.grow(2)
+        self.fs.grow(2)
  
+        # Now add a delay which should slow down how quickly rank 1 stops
+        self.config_set('mds', 'ms_inject_delay_max', '5.0')
+        self.config_set('mds', 'ms_inject_delay_probability', '1.0')
          self.fs.set_max_mds(1)
          log.info("status = {0}".format(self.fs.status()))
  
-        self.fs.set_max_mds(3)
          # Don't wait for rank 1 to stop
+        self.fs.set_max_mds(3)
+        log.info("status = {0}".format(self.fs.status()))
  
+        # Now check that the mons didn't try to promote a standby to rank 2
          self.fs.set_max_mds(2)
-        # Prevent another MDS from taking rank 1
-        # XXX This is a little racy because rank 1 may have stopped and a
-        #     standby assigned to rank 1 before joinable=0 is set.
-        self.fs.set_joinable(False) # XXX keep in mind changing max_mds clears this flag
-
+        status = self.fs.status()
          try:
              status = self.fs.wait_for_daemons(timeout=90)
-            raise RuntimeError("should not be able to successfully shrink cluster!")
-        except:
-            # could not shrink to max_mds=2 and reach 2 actives (because joinable=False)
-            status = self.fs.status()
              ranks = set([info['rank'] for info in status.get_ranks(fscid)])
-            self.assertTrue(ranks == set([0]))
+            self.assertEqual(ranks, set([0, 1]))
          finally:
              log.info("status = {0}".format(status))
  
@@ -353,6 +297,27 @@ class TestFailover(CephFSTestCase):
      CLIENTS_REQUIRED = 1
      MDSS_REQUIRED = 2
  
+    def test_repeated_boot(self):
+        """
+        That multiple boot messages do not result in the MDS getting evicted.
+        """
+
+        interval = 10
+        self.config_set("mon", "paxos_propose_interval", interval)
+
+        mds = choice(list(self.fs.status().get_all()))
+
+        with self.assert_cluster_log(f"daemon mds.{mds['name']} restarted", present=False):
+            # Avoid a beacon to the monitors with down:dne by restarting:
+            self.fs.mds_fail(mds_id=mds['name'])
+            # `ceph mds fail` won't return until the FSMap is committed, double-check:
+            self.assertIsNone(self.fs.status().get_mds_gid(mds['gid']))
+            time.sleep(2) # for mds to restart and accept asok commands
+            status1 = self.fs.mds_asok(['status'], mds_id=mds['name'])
+            time.sleep(interval*1.5)
+            status2 = self.fs.mds_asok(['status'], mds_id=mds['name'])
+            self.assertEqual(status1['id'], status2['id'])
+
      def test_simple(self):
          """
          That when the active MDS is killed, a standby MDS is promoted into
@@ -362,27 +327,20 @@ class TestFailover(CephFSTestCase):
          in thrashing tests.
          """
  
-        # Need all my standbys up as well as the active daemons
-        self.wait_for_daemon_start()
-
          (original_active, ) = self.fs.get_active_names()
          original_standbys = self.mds_cluster.get_standby_daemons()
  
          # Kill the rank 0 daemon's physical process
          self.fs.mds_stop(original_active)
  
-        grace = float(self.fs.get_config("mds_beacon_grace", service_type="mon"))
-
          # Wait until the monitor promotes his replacement
          def promoted():
-            active = self.fs.get_active_names()
-            return active and active[0] in original_standbys
+            ranks = list(self.fs.get_ranks())
+            return len(ranks) > 0 and ranks[0]['name'] in original_standbys
  
          log.info("Waiting for promotion of one of the original standbys {0}".format(
              original_standbys))
-        self.wait_until_true(
-            promoted,
-            timeout=grace*2)
+        self.wait_until_true(promoted, timeout=self.fs.beacon_timeout)
  
          # Start the original rank 0 daemon up again, see that he becomes a standby
          self.fs.mds_restart(original_active)
@@ -404,8 +362,6 @@ class TestFailover(CephFSTestCase):
          if not require_active:
              self.skipTest("fuse_require_active_mds is not set")
  
-        grace = float(self.fs.get_config("mds_beacon_grace", service_type="mon"))
-
          # Check it's not laggy to begin with
          (original_active, ) = self.fs.get_active_names()
          self.assertNotIn("laggy_since", self.fs.status().get_mds(original_active))
@@ -413,8 +369,7 @@ class TestFailover(CephFSTestCase):
          self.mounts[0].umount_wait()
  
          # Control: that we can mount and unmount usually, while the cluster is healthy
-        self.mounts[0].mount()
-        self.mounts[0].wait_until_mounted()
+        self.mounts[0].mount_wait()
          self.mounts[0].umount_wait()
  
          # Stop the daemon processes
@@ -429,9 +384,9 @@ class TestFailover(CephFSTestCase):
  
              return True
  
-        self.wait_until_true(laggy, grace * 2)
+        self.wait_until_true(laggy, self.fs.beacon_timeout)
          with self.assertRaises(CommandFailedError):
-            self.mounts[0].mount()
+            self.mounts[0].mount_wait()
  
      def test_standby_count_wanted(self):
          """
@@ -441,8 +396,6 @@ class TestFailover(CephFSTestCase):
          # Need all my standbys up as well as the active daemons
          self.wait_for_daemon_start()
  
-        grace = float(self.fs.get_config("mds_beacon_grace", service_type="mon"))
-
          standbys = self.mds_cluster.get_standby_daemons()
          self.assertGreaterEqual(len(standbys), 1)
          self.fs.mon_manager.raw_cluster_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', str(len(standbys)))
@@ -450,8 +403,7 @@ class TestFailover(CephFSTestCase):
          # Kill a standby and check for warning
          victim = standbys.pop()
          self.fs.mds_stop(victim)
-        log.info("waiting for insufficient standby daemon warning")
-        self.wait_for_health("MDS_INSUFFICIENT_STANDBY", grace*2)
+        self.wait_for_health("MDS_INSUFFICIENT_STANDBY", self.fs.beacon_timeout)
  
          # restart the standby, see that he becomes a standby, check health clears
          self.fs.mds_restart(victim)
@@ -465,8 +417,7 @@ class TestFailover(CephFSTestCase):
          standbys = self.mds_cluster.get_standby_daemons()
          self.assertGreaterEqual(len(standbys), 1)
          self.fs.mon_manager.raw_cluster_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', str(len(standbys)+1))
-        log.info("waiting for insufficient standby daemon warning")
-        self.wait_for_health("MDS_INSUFFICIENT_STANDBY", grace*2)
+        self.wait_for_health("MDS_INSUFFICIENT_STANDBY", self.fs.beacon_timeout)
  
          # Set it to 0
          self.fs.mon_manager.raw_cluster_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', '0')
@@ -482,7 +433,6 @@ class TestFailover(CephFSTestCase):
  
          self.mount_a.umount_wait()
  
-        grace = float(self.fs.get_config("mds_beacon_grace", service_type="mon"))
          monc_timeout = float(self.fs.get_config("mon_client_ping_timeout", service_type="mds"))
  
          mds_0 = self.fs.get_rank(rank=0, status=status)
@@ -490,7 +440,7 @@ class TestFailover(CephFSTestCase):
          self.fs.rank_signal(signal.SIGSTOP, rank=0, status=status)
          self.wait_until_true(
              lambda: "laggy_since" in self.fs.get_rank(),
-            timeout=grace * 2
+            timeout=self.fs.beacon_timeout
          )
  
          self.fs.rank_fail(rank=1)
@@ -503,7 +453,7 @@ class TestFailover(CephFSTestCase):
          self.fs.rank_signal(signal.SIGCONT, rank=0)
          self.wait_until_true(
              lambda: "laggy_since" not in self.fs.get_rank(rank=0),
-            timeout=grace * 2
+            timeout=self.fs.beacon_timeout
          )
  
          # mds.b will be stuck at 'reconnect' state if snapserver gets confused
@@ -512,7 +462,18 @@ class TestFailover(CephFSTestCase):
          self.assertEqual(mds_0['gid'], self.fs.get_rank(rank=0)['gid'])
          self.fs.rank_freeze(False, rank=0)
  
+    def test_connect_bootstrapping(self):
+        self.config_set("mds", "mds_sleep_rank_change", 10000000.0)
+        self.config_set("mds", "mds_connect_bootstrapping", True)
+        self.fs.set_max_mds(2)
+        self.fs.wait_for_daemons()
+        self.fs.rank_fail(rank=0)
+        # rank 0 will get stuck in up:resolve, see https://tracker.ceph.com/issues/53194
+        self.fs.wait_for_daemons()
+
+
  class TestStandbyReplay(CephFSTestCase):
+    CLIENTS_REQUIRED = 0
      MDSS_REQUIRED = 4
  
      def _confirm_no_replay(self):
@@ -570,6 +531,34 @@ class TestStandbyReplay(CephFSTestCase):
          time.sleep(30)
          self._confirm_single_replay()
  
+    def test_standby_replay_damaged(self):
+        """
+        That a standby-replay daemon can cause the rank to go damaged correctly.
+        """
+
+        self._confirm_no_replay()
+        self.config_set("mds", "mds_standby_replay_damaged", True)
+        self.fs.set_allow_standby_replay(True)
+        self.wait_until_true(
+            lambda: len(self.fs.get_damaged()) > 0,
+            timeout=30
+        )
+        status = self.fs.status()
+        self.assertListEqual([], list(self.fs.get_ranks(status=status)))
+        self.assertListEqual([0], self.fs.get_damaged(status=status))
+
+    def test_standby_replay_disable(self):
+        """
+        That turning off allow_standby_replay fails all standby-replay daemons.
+        """
+
+        self._confirm_no_replay()
+        self.fs.set_allow_standby_replay(True)
+        time.sleep(30)
+        self._confirm_single_replay()
+        self.fs.set_allow_standby_replay(False)
+        self._confirm_no_replay()
+
      def test_standby_replay_singleton_fail(self):
          """
          That failures don't violate singleton constraint.
@@ -634,6 +623,25 @@ class TestStandbyReplay(CephFSTestCase):
              self.fs.mds_restart(mds_id=victim['name'])
              status = self._confirm_single_replay(status=status)
  
+    def test_standby_replay_prepare_beacon(self):
+        """
+        That a MDSMonitor::prepare_beacon handles standby-replay daemons
+        correctly without removing the standby. (Note, usually a standby-replay
+        beacon will just be replied to by MDSMonitor::preprocess_beacon.)
+        """
+
+        status = self._confirm_no_replay()
+        self.fs.set_max_mds(1)
+        self.fs.set_allow_standby_replay(True)
+        status = self._confirm_single_replay()
+        replays = list(status.get_replays(self.fs.id))
+        self.assertEqual(len(replays), 1)
+        self.config_set('mds.'+replays[0]['name'], 'mds_inject_health_dummy', True)
+        time.sleep(10) # for something not to happen...
+        status = self._confirm_single_replay()
+        replays2 = list(status.get_replays(self.fs.id))
+        self.assertEqual(replays[0]['gid'], replays2[0]['gid'])
+
      def test_rank_stopped(self):
          """
          That when a rank is STOPPED, standby replays for
@@ -690,14 +698,14 @@ class TestMultiFilesystems(CephFSTestCase):
          fs_a, fs_b = self._setup_two()
  
          # Mount a client on fs_a
-        self.mount_a.mount(mount_fs_name=fs_a.name)
+        self.mount_a.mount_wait(cephfs_name=fs_a.name)
          self.mount_a.write_n_mb("pad.bin", 1)
          self.mount_a.write_n_mb("test.bin", 2)
          a_created_ino = self.mount_a.path_to_ino("test.bin")
          self.mount_a.create_files()
  
          # Mount a client on fs_b
-        self.mount_b.mount(mount_fs_name=fs_b.name)
+        self.mount_b.mount_wait(cephfs_name=fs_b.name)
          self.mount_b.write_n_mb("test.bin", 1)
          b_created_ino = self.mount_b.path_to_ino("test.bin")
          self.mount_b.create_files()