ceph/qa/tasks/cephfs/test_damage.py

   1 import json
   2 import logging
   3 import errno
   4 import re
   5 from teuthology.contextutil import MaxWhileTries
   6 from teuthology.exceptions import CommandFailedError
   7 from teuthology.orchestra.run import wait
   8 from tasks.cephfs.fuse_mount import FuseMount
   9 from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology
  10
  11 DAMAGED_ON_START = "damaged_on_start"
  12 DAMAGED_ON_LS = "damaged_on_ls"
  13 CRASHED = "server crashed"
  14 NO_DAMAGE = "no damage"
  15 READONLY = "readonly"
  16 FAILED_CLIENT = "client failed"
  17 FAILED_SERVER = "server failed"
  18
  19 # An EIO in response to a stat from the client
  20 EIO_ON_LS = "eio"
  21
  22 # An EIO, but nothing in damage table (not ever what we expect)
  23 EIO_NO_DAMAGE = "eio without damage entry"
  24
  25
  26 log = logging.getLogger(__name__)
  27
  28
  29 class TestDamage(CephFSTestCase):
  30     def _simple_workload_write(self):
  31         self.mount_a.run_shell(["mkdir", "subdir"])
  32         self.mount_a.write_n_mb("subdir/sixmegs", 6)
  33         return self.mount_a.stat("subdir/sixmegs")
  34
  35     def is_marked_damaged(self, rank):
  36         mds_map = self.fs.get_mds_map()
  37         return rank in mds_map['damaged']
  38
  39     @for_teuthology #459s
  40     def test_object_deletion(self):
  41         """
  42         That the MDS has a clean 'damaged' response to loss of any single metadata object
  43         """
  44
  45         self._simple_workload_write()
  46
  47         # Hmm, actually it would be nice to permute whether the metadata pool
  48         # state contains sessions or not, but for the moment close this session
  49         # to avoid waiting through reconnect on every MDS start.
  50         self.mount_a.umount_wait()
  51         for mds_name in self.fs.get_active_names():
  52             self.fs.mds_asok(["flush", "journal"], mds_name)
  53
  54         self.fs.mds_stop()
  55         self.fs.mds_fail()
  56
  57         self.fs.rados(['export', '/tmp/metadata.bin'])
  58
  59         def is_ignored(obj_id, dentry=None):
  60             """
  61             A filter to avoid redundantly mutating many similar objects (e.g.
  62             stray dirfrags) or similar dentries (e.g. stray dir dentries)
  63             """
  64             if re.match("60.\.00000000", obj_id) and obj_id != "600.00000000":
  65                 return True
  66
  67             if dentry and obj_id == "100.00000000":
  68                 if re.match("stray.+_head", dentry) and dentry != "stray0_head":
  69                     return True
  70
  71             return False
  72
  73         def get_path(obj_id, dentry=None):
  74             """
  75             What filesystem path does this object or dentry correspond to?   i.e.
  76             what should I poke to see EIO after damaging it?
  77             """
  78
  79             if obj_id == "1.00000000" and dentry == "subdir_head":
  80                 return "./subdir"
  81             elif obj_id == "10000000000.00000000" and dentry == "sixmegs_head":
  82                 return "./subdir/sixmegs"
  83
  84             # None means ls will do an "ls -R" in hope of seeing some errors
  85             return None
  86
  87         objects = self.fs.rados(["ls"]).split("\n")
  88         objects = [o for o in objects if not is_ignored(o)]
  89
  90         # Find all objects with an OMAP header
  91         omap_header_objs = []
  92         for o in objects:
  93             header = self.fs.rados(["getomapheader", o])
  94             # The rados CLI wraps the header output in a hex-printed style
  95             header_bytes = int(re.match("header \((.+) bytes\)", header).group(1))
  96             if header_bytes > 0:
  97                 omap_header_objs.append(o)
  98
  99         # Find all OMAP key/vals
 100         omap_keys = []
 101         for o in objects:
 102             keys_str = self.fs.rados(["listomapkeys", o])
 103             if keys_str:
 104                 for key in keys_str.split("\n"):
 105                     if not is_ignored(o, key):
 106                         omap_keys.append((o, key))
 107
 108         # Find objects that have data in their bodies
 109         data_objects = []
 110         for obj_id in objects:
 111             stat_out = self.fs.rados(["stat", obj_id])
 112             size = int(re.match(".+, size (.+)$", stat_out).group(1))
 113             if size > 0:
 114                 data_objects.append(obj_id)
 115
 116         # Define the various forms of damage we will inflict
 117         class MetadataMutation(object):
 118             def __init__(self, obj_id_, desc_, mutate_fn_, expectation_, ls_path=None):
 119                 self.obj_id = obj_id_
 120                 self.desc = desc_
 121                 self.mutate_fn = mutate_fn_
 122                 self.expectation = expectation_
 123                 if ls_path is None:
 124                     self.ls_path = "."
 125                 else:
 126                     self.ls_path = ls_path
 127
 128             def __eq__(self, other):
 129                 return self.desc == other.desc
 130
 131             def __hash__(self):
 132                 return hash(self.desc)
 133
 134         junk = "deadbeef" * 10
 135         mutations = []
 136
 137         # Removals
 138         for o in objects:
 139             if o in [
 140                 # JournalPointers are auto-replaced if missing (same path as upgrade)
 141                 "400.00000000",
 142                 # Missing dirfrags for non-system dirs result in empty directory
 143                 "10000000000.00000000",
 144                 # PurgeQueue is auto-created if not found on startup
 145                 "500.00000000"
 146             ]:
 147                 expectation = NO_DAMAGE
 148             else:
 149                 expectation = DAMAGED_ON_START
 150
 151             log.info("Expectation on rm '{0}' will be '{1}'".format(
 152                 o, expectation
 153             ))
 154
 155             mutations.append(MetadataMutation(
 156                 o,
 157                 "Delete {0}".format(o),
 158                 lambda o=o: self.fs.rados(["rm", o]),
 159                 expectation
 160             ))
 161
 162         # Blatant corruptions
 163         for obj_id in data_objects:
 164             if obj_id == "500.00000000":
 165                 # purge queue corruption results in read-only FS
 166                 mutations.append(MetadataMutation(
 167                     obj_id,
 168                     "Corrupt {0}".format(obj_id),
 169                     lambda o=obj_id: self.fs.rados(["put", o, "-"], stdin_data=junk),
 170                     READONLY
 171                 ))
 172             else:
 173                 mutations.append(MetadataMutation(
 174                     obj_id,
 175                     "Corrupt {0}".format(obj_id),
 176                     lambda o=obj_id: self.fs.rados(["put", o, "-"], stdin_data=junk),
 177                     DAMAGED_ON_START
 178                 ))
 179
 180         # Truncations
 181         for o in data_objects:
 182             if o == "500.00000000":
 183                 # The PurgeQueue is allowed to be empty: Journaler interprets
 184                 # an empty header object as an empty journal.
 185                 expectation = NO_DAMAGE
 186             else:
 187                 expectation = DAMAGED_ON_START
 188
 189             mutations.append(
 190                 MetadataMutation(
 191                     o,
 192                     "Truncate {0}".format(o),
 193                     lambda o=o: self.fs.rados(["truncate", o, "0"]),
 194                     expectation
 195             ))
 196
 197         # OMAP value corruptions
 198         for o, k in omap_keys:
 199             if o.startswith("100."):
 200                 # Anything in rank 0's 'mydir'
 201                 expectation = DAMAGED_ON_START
 202             else:
 203                 expectation = EIO_ON_LS
 204
 205             mutations.append(
 206                 MetadataMutation(
 207                     o,
 208                     "Corrupt omap key {0}:{1}".format(o, k),
 209                     lambda o=o,k=k: self.fs.rados(["setomapval", o, k, junk]),
 210                     expectation,
 211                     get_path(o, k)
 212                 )
 213             )
 214
 215         # OMAP header corruptions
 216         for o in omap_header_objs:
 217             if re.match("60.\.00000000", o) \
 218                     or o in ["1.00000000", "100.00000000", "mds0_sessionmap"]:
 219                 expectation = DAMAGED_ON_START
 220             else:
 221                 expectation = NO_DAMAGE
 222
 223             log.info("Expectation on corrupt header '{0}' will be '{1}'".format(
 224                 o, expectation
 225             ))
 226
 227             mutations.append(
 228                 MetadataMutation(
 229                     o,
 230                     "Corrupt omap header on {0}".format(o),
 231                     lambda o=o: self.fs.rados(["setomapheader", o, junk]),
 232                     expectation
 233                 )
 234             )
 235
 236         results = {}
 237
 238         for mutation in mutations:
 239             log.info("Applying mutation '{0}'".format(mutation.desc))
 240
 241             # Reset MDS state
 242             self.mount_a.umount_wait(force=True)
 243             self.fs.mds_stop()
 244             self.fs.mds_fail()
 245             self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', '0')
 246
 247             # Reset RADOS pool state
 248             self.fs.rados(['import', '/tmp/metadata.bin'])
 249
 250             # Inject the mutation
 251             mutation.mutate_fn()
 252
 253             # Try starting the MDS
 254             self.fs.mds_restart()
 255
 256             # How long we'll wait between starting a daemon and expecting
 257             # it to make it through startup, and potentially declare itself
 258             # damaged to the mon cluster.
 259             startup_timeout = 60
 260
 261             if mutation.expectation not in (EIO_ON_LS, DAMAGED_ON_LS, NO_DAMAGE):
 262                 if mutation.expectation == DAMAGED_ON_START:
 263                     # The MDS may pass through active before making it to damaged
 264                     try:
 265                         self.wait_until_true(lambda: self.is_marked_damaged(0), startup_timeout)
 266                     except RuntimeError:
 267                         pass
 268
 269                 # Wait for MDS to either come up or go into damaged state
 270                 try:
 271                     self.wait_until_true(lambda: self.is_marked_damaged(0) or self.fs.are_daemons_healthy(), startup_timeout)
 272                 except RuntimeError:
 273                     crashed = False
 274                     # Didn't make it to healthy or damaged, did it crash?
 275                     for daemon_id, daemon in self.fs.mds_daemons.items():
 276                         if daemon.proc and daemon.proc.finished:
 277                             crashed = True
 278                             log.error("Daemon {0} crashed!".format(daemon_id))
 279                             daemon.proc = None  # So that subsequent stop() doesn't raise error
 280                     if not crashed:
 281                         # Didn't go health, didn't go damaged, didn't crash, so what?
 282                         raise
 283                     else:
 284                         log.info("Result: Mutation '{0}' led to crash".format(mutation.desc))
 285                         results[mutation] = CRASHED
 286                         continue
 287                 if self.is_marked_damaged(0):
 288                     log.info("Result: Mutation '{0}' led to DAMAGED state".format(mutation.desc))
 289                     results[mutation] = DAMAGED_ON_START
 290                     continue
 291                 else:
 292                     log.info("Mutation '{0}' did not prevent MDS startup, attempting ls...".format(mutation.desc))
 293             else:
 294                 try:
 295                     self.wait_until_true(self.fs.are_daemons_healthy, 60)
 296                 except RuntimeError:
 297                     log.info("Result: Mutation '{0}' should have left us healthy, actually not.".format(mutation.desc))
 298                     if self.is_marked_damaged(0):
 299                         results[mutation] = DAMAGED_ON_START
 300                     else:
 301                         results[mutation] = FAILED_SERVER
 302                     continue
 303                 log.info("Daemons came up after mutation '{0}', proceeding to ls".format(mutation.desc))
 304
 305             # MDS is up, should go damaged on ls or client mount
 306             self.mount_a.mount()
 307             self.mount_a.wait_until_mounted()
 308             if mutation.ls_path == ".":
 309                 proc = self.mount_a.run_shell(["ls", "-R", mutation.ls_path], wait=False)
 310             else:
 311                 proc = self.mount_a.stat(mutation.ls_path, wait=False)
 312
 313             if mutation.expectation == DAMAGED_ON_LS:
 314                 try:
 315                     self.wait_until_true(lambda: self.is_marked_damaged(0), 60)
 316                     log.info("Result: Mutation '{0}' led to DAMAGED state after ls".format(mutation.desc))
 317                     results[mutation] = DAMAGED_ON_LS
 318                 except RuntimeError:
 319                     if self.fs.are_daemons_healthy():
 320                         log.error("Result: Failed to go damaged on mutation '{0}', actually went active".format(
 321                             mutation.desc))
 322                         results[mutation] = NO_DAMAGE
 323                     else:
 324                         log.error("Result: Failed to go damaged on mutation '{0}'".format(mutation.desc))
 325                         results[mutation] = FAILED_SERVER
 326             elif mutation.expectation == READONLY:
 327                 proc = self.mount_a.run_shell(["mkdir", "foo"], wait=False)
 328                 try:
 329                     proc.wait()
 330                 except CommandFailedError:
 331                     stderr = proc.stderr.getvalue()
 332                     log.info(stderr)
 333                     if "Read-only file system".lower() in stderr.lower():
 334                         pass
 335                     else:
 336                         raise
 337             else:
 338                 try:
 339                     wait([proc], 20)
 340                     log.info("Result: Mutation '{0}' did not caused DAMAGED state".format(mutation.desc))
 341                     results[mutation] = NO_DAMAGE
 342                 except MaxWhileTries:
 343                     log.info("Result: Failed to complete client IO on mutation '{0}'".format(mutation.desc))
 344                     results[mutation] = FAILED_CLIENT
 345                 except CommandFailedError as e:
 346                     if e.exitstatus == errno.EIO:
 347                         log.info("Result: EIO on client")
 348                         results[mutation] = EIO_ON_LS
 349                     else:
 350                         log.info("Result: unexpected error {0} on client".format(e))
 351                         results[mutation] = FAILED_CLIENT
 352
 353             if mutation.expectation == EIO_ON_LS:
 354                 # EIOs mean something handled by DamageTable: assert that it has
 355                 # been populated
 356                 damage = json.loads(
 357                     self.fs.mon_manager.raw_cluster_cmd(
 358                         'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), "damage", "ls", '--format=json-pretty'))
 359                 if len(damage) == 0:
 360                     results[mutation] = EIO_NO_DAMAGE
 361
 362         failures = [(mutation, result) for (mutation, result) in results.items() if mutation.expectation != result]
 363         if failures:
 364             log.error("{0} mutations had unexpected outcomes:".format(len(failures)))
 365             for mutation, result in failures:
 366                 log.error("  Expected '{0}' actually '{1}' from '{2}'".format(
 367                     mutation.expectation, result, mutation.desc
 368                 ))
 369             raise RuntimeError("{0} mutations had unexpected outcomes".format(len(failures)))
 370         else:
 371             log.info("All {0} mutations had expected outcomes".format(len(mutations)))
 372
 373     def test_damaged_dentry(self):
 374         # Damage to dentrys is interesting because it leaves the
 375         # directory's `complete` flag in a subtle state where
 376         # we have marked the dir complete in order that folks
 377         # can access it, but in actual fact there is a dentry
 378         # missing
 379         self.mount_a.run_shell(["mkdir", "subdir/"])
 380
 381         self.mount_a.run_shell(["touch", "subdir/file_undamaged"])
 382         self.mount_a.run_shell(["touch", "subdir/file_to_be_damaged"])
 383
 384         subdir_ino = self.mount_a.path_to_ino("subdir")
 385
 386         self.mount_a.umount_wait()
 387         for mds_name in self.fs.get_active_names():
 388             self.fs.mds_asok(["flush", "journal"], mds_name)
 389
 390         self.fs.mds_stop()
 391         self.fs.mds_fail()
 392
 393         # Corrupt a dentry
 394         junk = "deadbeef" * 10
 395         dirfrag_obj = "{0:x}.00000000".format(subdir_ino)
 396         self.fs.rados(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk])
 397
 398         # Start up and try to list it
 399         self.fs.mds_restart()
 400         self.fs.wait_for_daemons()
 401
 402         self.mount_a.mount()
 403         self.mount_a.wait_until_mounted()
 404         dentries = self.mount_a.ls("subdir/")
 405
 406         # The damaged guy should have disappeared
 407         self.assertEqual(dentries, ["file_undamaged"])
 408
 409         # I should get ENOENT if I try and read it normally, because
 410         # the dir is considered complete
 411         try:
 412             self.mount_a.stat("subdir/file_to_be_damaged", wait=True)
 413         except CommandFailedError as e:
 414             self.assertEqual(e.exitstatus, errno.ENOENT)
 415         else:
 416             raise AssertionError("Expected ENOENT")
 417
 418         # The fact that there is damaged should have bee recorded
 419         damage = json.loads(
 420             self.fs.mon_manager.raw_cluster_cmd(
 421                 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
 422                 "damage", "ls", '--format=json-pretty'))
 423         self.assertEqual(len(damage), 1)
 424         damage_id = damage[0]['id']
 425
 426         # If I try to create a dentry with the same name as the damaged guy
 427         # then that should be forbidden
 428         try:
 429             self.mount_a.touch("subdir/file_to_be_damaged")
 430         except CommandFailedError as e:
 431             self.assertEqual(e.exitstatus, errno.EIO)
 432         else:
 433             raise AssertionError("Expected EIO")
 434
 435         # Attempting that touch will clear the client's complete flag, now
 436         # when I stat it I'll get EIO instead of ENOENT
 437         try:
 438             self.mount_a.stat("subdir/file_to_be_damaged", wait=True)
 439         except CommandFailedError as e:
 440             if isinstance(self.mount_a, FuseMount):
 441                 self.assertEqual(e.exitstatus, errno.EIO)
 442             else:
 443                 # Kernel client handles this case differently
 444                 self.assertEqual(e.exitstatus, errno.ENOENT)
 445         else:
 446             raise AssertionError("Expected EIO")
 447
 448         nfiles = self.mount_a.getfattr("./subdir", "ceph.dir.files")
 449         self.assertEqual(nfiles, "2")
 450
 451         self.mount_a.umount_wait()
 452
 453         # Now repair the stats
 454         scrub_json = self.fs.mds_asok(["scrub_path", "/subdir", "repair"])
 455         log.info(json.dumps(scrub_json, indent=2))
 456
 457         self.assertEqual(scrub_json["passed_validation"], False)
 458         self.assertEqual(scrub_json["raw_stats"]["checked"], True)
 459         self.assertEqual(scrub_json["raw_stats"]["passed"], False)
 460
 461         # Check that the file count is now correct
 462         self.mount_a.mount()
 463         self.mount_a.wait_until_mounted()
 464         nfiles = self.mount_a.getfattr("./subdir", "ceph.dir.files")
 465         self.assertEqual(nfiles, "1")
 466
 467         # Clean up the omap object
 468         self.fs.rados(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk])
 469
 470         # Clean up the damagetable entry
 471         self.fs.mon_manager.raw_cluster_cmd(
 472             'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
 473             "damage", "rm", "{did}".format(did=damage_id))
 474
 475         # Now I should be able to create a file with the same name as the
 476         # damaged guy if I want.
 477         self.mount_a.touch("subdir/file_to_be_damaged")
 478
 479     def test_open_ino_errors(self):
 480         """
 481         That errors encountered during opening inos are properly propagated
 482         """
 483
 484         self.mount_a.run_shell(["mkdir", "dir1"])
 485         self.mount_a.run_shell(["touch", "dir1/file1"])
 486         self.mount_a.run_shell(["mkdir", "dir2"])
 487         self.mount_a.run_shell(["touch", "dir2/file2"])
 488         self.mount_a.run_shell(["mkdir", "testdir"])
 489         self.mount_a.run_shell(["ln", "dir1/file1", "testdir/hardlink1"])
 490         self.mount_a.run_shell(["ln", "dir2/file2", "testdir/hardlink2"])
 491
 492         file1_ino = self.mount_a.path_to_ino("dir1/file1")
 493         file2_ino = self.mount_a.path_to_ino("dir2/file2")
 494         dir2_ino = self.mount_a.path_to_ino("dir2")
 495
 496         # Ensure everything is written to backing store
 497         self.mount_a.umount_wait()
 498         self.fs.mds_asok(["flush", "journal"])
 499
 500         # Drop everything from the MDS cache
 501         self.mds_cluster.mds_stop()
 502         self.fs.journal_tool(['journal', 'reset'], 0)
 503         self.mds_cluster.mds_fail_restart()
 504         self.fs.wait_for_daemons()
 505
 506         self.mount_a.mount()
 507
 508         # Case 1: un-decodeable backtrace
 509
 510         # Validate that the backtrace is present and decodable
 511         self.fs.read_backtrace(file1_ino)
 512         # Go corrupt the backtrace of alpha/target (used for resolving
 513         # bravo/hardlink).
 514         self.fs._write_data_xattr(file1_ino, "parent", "rhubarb")
 515
 516         # Check that touching the hardlink gives EIO
 517         ran = self.mount_a.run_shell(["stat", "testdir/hardlink1"], wait=False)
 518         try:
 519             ran.wait()
 520         except CommandFailedError:
 521             self.assertTrue("Input/output error" in ran.stderr.getvalue())
 522
 523         # Check that an entry is created in the damage table
 524         damage = json.loads(
 525             self.fs.mon_manager.raw_cluster_cmd(
 526                 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
 527                 "damage", "ls", '--format=json-pretty'))
 528         self.assertEqual(len(damage), 1)
 529         self.assertEqual(damage[0]['damage_type'], "backtrace")
 530         self.assertEqual(damage[0]['ino'], file1_ino)
 531
 532         self.fs.mon_manager.raw_cluster_cmd(
 533             'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
 534             "damage", "rm", str(damage[0]['id']))
 535
 536
 537         # Case 2: missing dirfrag for the target inode
 538
 539         self.fs.rados(["rm", "{0:x}.00000000".format(dir2_ino)])
 540
 541         # Check that touching the hardlink gives EIO
 542         ran = self.mount_a.run_shell(["stat", "testdir/hardlink2"], wait=False)
 543         try:
 544             ran.wait()
 545         except CommandFailedError:
 546             self.assertTrue("Input/output error" in ran.stderr.getvalue())
 547
 548         # Check that an entry is created in the damage table
 549         damage = json.loads(
 550             self.fs.mon_manager.raw_cluster_cmd(
 551                 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
 552                 "damage", "ls", '--format=json-pretty'))
 553         self.assertEqual(len(damage), 2)
 554         if damage[0]['damage_type'] == "backtrace" :
 555             self.assertEqual(damage[0]['ino'], file2_ino)
 556             self.assertEqual(damage[1]['damage_type'], "dir_frag")
 557             self.assertEqual(damage[1]['ino'], dir2_ino)
 558         else:
 559             self.assertEqual(damage[0]['damage_type'], "dir_frag")
 560             self.assertEqual(damage[0]['ino'], dir2_ino)
 561             self.assertEqual(damage[1]['damage_type'], "backtrace")
 562             self.assertEqual(damage[1]['ino'], file2_ino)
 563
 564         for entry in damage:
 565             self.fs.mon_manager.raw_cluster_cmd(
 566                 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
 567                 "damage", "rm", str(entry['id']))