]>
git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/cephfs/test_damage.py
1 from io
import BytesIO
, StringIO
7 from teuthology
.contextutil
import MaxWhileTries
8 from teuthology
.exceptions
import CommandFailedError
9 from teuthology
.orchestra
.run
import wait
10 from tasks
.cephfs
.fuse_mount
import FuseMount
11 from tasks
.cephfs
.cephfs_test_case
import CephFSTestCase
, for_teuthology
13 DAMAGED_ON_START
= "damaged_on_start"
14 DAMAGED_ON_LS
= "damaged_on_ls"
15 CRASHED
= "server crashed"
16 NO_DAMAGE
= "no damage"
18 FAILED_CLIENT
= "client failed"
19 FAILED_SERVER
= "server failed"
21 # An EIO in response to a stat from the client
24 # An EIO, but nothing in damage table (not ever what we expect)
25 EIO_NO_DAMAGE
= "eio without damage entry"
28 log
= logging
.getLogger(__name__
)
31 class TestDamage(CephFSTestCase
):
32 def _simple_workload_write(self
):
33 self
.mount_a
.run_shell(["mkdir", "subdir"])
34 self
.mount_a
.write_n_mb("subdir/sixmegs", 6)
35 return self
.mount_a
.stat("subdir/sixmegs")
37 def is_marked_damaged(self
, rank
):
38 mds_map
= self
.fs
.get_mds_map()
39 return rank
in mds_map
['damaged']
42 def test_object_deletion(self
):
44 That the MDS has a clean 'damaged' response to loss of any single metadata object
47 self
._simple
_workload
_write
()
49 # Hmm, actually it would be nice to permute whether the metadata pool
50 # state contains sessions or not, but for the moment close this session
51 # to avoid waiting through reconnect on every MDS start.
52 self
.mount_a
.umount_wait()
53 for mds_name
in self
.fs
.get_active_names():
54 self
.fs
.mds_asok(["flush", "journal"], mds_name
)
58 serialized
= self
.fs
.radosmo(['export', '-'])
60 def is_ignored(obj_id
, dentry
=None):
62 A filter to avoid redundantly mutating many similar objects (e.g.
63 stray dirfrags) or similar dentries (e.g. stray dir dentries)
65 if re
.match("60.\.00000000", obj_id
) and obj_id
!= "600.00000000":
68 if dentry
and obj_id
== "100.00000000":
69 if re
.match("stray.+_head", dentry
) and dentry
!= "stray0_head":
74 def get_path(obj_id
, dentry
=None):
76 What filesystem path does this object or dentry correspond to? i.e.
77 what should I poke to see EIO after damaging it?
80 if obj_id
== "1.00000000" and dentry
== "subdir_head":
82 elif obj_id
== "10000000000.00000000" and dentry
== "sixmegs_head":
83 return "./subdir/sixmegs"
85 # None means ls will do an "ls -R" in hope of seeing some errors
88 objects
= self
.fs
.radosmo(["ls"], stdout
=StringIO()).strip().split("\n")
89 objects
= [o
for o
in objects
if not is_ignored(o
)]
91 # Find all objects with an OMAP header
94 header
= self
.fs
.radosmo(["getomapheader", o
], stdout
=StringIO())
95 # The rados CLI wraps the header output in a hex-printed style
96 header_bytes
= int(re
.match("header \((.+) bytes\)", header
).group(1))
98 omap_header_objs
.append(o
)
100 # Find all OMAP key/vals
103 keys_str
= self
.fs
.radosmo(["listomapkeys", o
], stdout
=StringIO())
105 for key
in keys_str
.strip().split("\n"):
106 if not is_ignored(o
, key
):
107 omap_keys
.append((o
, key
))
109 # Find objects that have data in their bodies
111 for obj_id
in objects
:
112 stat_out
= self
.fs
.radosmo(["stat", obj_id
], stdout
=StringIO())
113 size
= int(re
.match(".+, size (.+)$", stat_out
).group(1))
115 data_objects
.append(obj_id
)
117 # Define the various forms of damage we will inflict
118 class MetadataMutation(object):
119 def __init__(self
, obj_id_
, desc_
, mutate_fn_
, expectation_
, ls_path
=None):
120 self
.obj_id
= obj_id_
122 self
.mutate_fn
= mutate_fn_
123 self
.expectation
= expectation_
127 self
.ls_path
= ls_path
129 def __eq__(self
, other
):
130 return self
.desc
== other
.desc
133 return hash(self
.desc
)
135 junk
= "deadbeef" * 10
141 # JournalPointers are auto-replaced if missing (same path as upgrade)
143 # Missing dirfrags for non-system dirs result in empty directory
144 "10000000000.00000000",
145 # PurgeQueue is auto-created if not found on startup
147 # open file table is auto-created if not found on startup
150 expectation
= NO_DAMAGE
152 expectation
= DAMAGED_ON_START
154 log
.info("Expectation on rm '{0}' will be '{1}'".format(
158 mutations
.append(MetadataMutation(
160 "Delete {0}".format(o
),
161 lambda o
=o
: self
.fs
.radosm(["rm", o
]),
165 # Blatant corruptions
166 for obj_id
in data_objects
:
167 if obj_id
== "500.00000000":
168 # purge queue corruption results in read-only FS
169 mutations
.append(MetadataMutation(
171 "Corrupt {0}".format(obj_id
),
172 lambda o
=obj_id
: self
.fs
.radosm(["put", o
, "-"], stdin
=StringIO(junk
)),
176 mutations
.append(MetadataMutation(
178 "Corrupt {0}".format(obj_id
),
179 lambda o
=obj_id
: self
.fs
.radosm(["put", o
, "-"], stdin
=StringIO(junk
)),
184 for o
in data_objects
:
185 if o
== "500.00000000":
186 # The PurgeQueue is allowed to be empty: Journaler interprets
187 # an empty header object as an empty journal.
188 expectation
= NO_DAMAGE
190 expectation
= DAMAGED_ON_START
195 "Truncate {0}".format(o
),
196 lambda o
=o
: self
.fs
.radosm(["truncate", o
, "0"]),
200 # OMAP value corruptions
201 for o
, k
in omap_keys
:
202 if o
.startswith("100."):
203 # Anything in rank 0's 'mydir'
204 expectation
= DAMAGED_ON_START
206 expectation
= EIO_ON_LS
211 "Corrupt omap key {0}:{1}".format(o
, k
),
212 lambda o
=o
,k
=k
: self
.fs
.radosm(["setomapval", o
, k
, junk
]),
218 # OMAP header corruptions
219 for o
in omap_header_objs
:
220 if re
.match("60.\.00000000", o
) \
221 or o
in ["1.00000000", "100.00000000", "mds0_sessionmap"]:
222 expectation
= DAMAGED_ON_START
224 expectation
= NO_DAMAGE
226 log
.info("Expectation on corrupt header '{0}' will be '{1}'".format(
233 "Corrupt omap header on {0}".format(o
),
234 lambda o
=o
: self
.fs
.radosm(["setomapheader", o
, junk
]),
241 for mutation
in mutations
:
242 log
.info("Applying mutation '{0}'".format(mutation
.desc
))
245 self
.mount_a
.umount_wait(force
=True)
247 self
.fs
.mon_manager
.raw_cluster_cmd('mds', 'repaired', '0')
249 # Reset RADOS pool state
250 self
.fs
.radosm(['import', '-'], stdin
=BytesIO(serialized
))
252 # Inject the mutation
255 # Try starting the MDS
256 self
.fs
.set_joinable()
258 # How long we'll wait between starting a daemon and expecting
259 # it to make it through startup, and potentially declare itself
260 # damaged to the mon cluster.
263 if mutation
.expectation
not in (EIO_ON_LS
, DAMAGED_ON_LS
, NO_DAMAGE
):
264 if mutation
.expectation
== DAMAGED_ON_START
:
265 # The MDS may pass through active before making it to damaged
267 self
.wait_until_true(lambda: self
.is_marked_damaged(0), startup_timeout
)
271 # Wait for MDS to either come up or go into damaged state
273 self
.wait_until_true(lambda: self
.is_marked_damaged(0) or self
.fs
.are_daemons_healthy(), startup_timeout
)
276 # Didn't make it to healthy or damaged, did it crash?
277 for daemon_id
, daemon
in self
.fs
.mds_daemons
.items():
278 if daemon
.proc
and daemon
.proc
.finished
:
280 log
.error("Daemon {0} crashed!".format(daemon_id
))
281 daemon
.proc
= None # So that subsequent stop() doesn't raise error
283 # Didn't go health, didn't go damaged, didn't crash, so what?
286 log
.info("Result: Mutation '{0}' led to crash".format(mutation
.desc
))
287 results
[mutation
] = CRASHED
289 if self
.is_marked_damaged(0):
290 log
.info("Result: Mutation '{0}' led to DAMAGED state".format(mutation
.desc
))
291 results
[mutation
] = DAMAGED_ON_START
294 log
.info("Mutation '{0}' did not prevent MDS startup, attempting ls...".format(mutation
.desc
))
297 self
.wait_until_true(self
.fs
.are_daemons_healthy
, 60)
299 log
.info("Result: Mutation '{0}' should have left us healthy, actually not.".format(mutation
.desc
))
300 if self
.is_marked_damaged(0):
301 results
[mutation
] = DAMAGED_ON_START
303 results
[mutation
] = FAILED_SERVER
305 log
.info("Daemons came up after mutation '{0}', proceeding to ls".format(mutation
.desc
))
307 # MDS is up, should go damaged on ls or client mount
308 self
.mount_a
.mount_wait()
309 if mutation
.ls_path
== ".":
310 proc
= self
.mount_a
.run_shell(["ls", "-R", mutation
.ls_path
], wait
=False)
312 proc
= self
.mount_a
.stat(mutation
.ls_path
, wait
=False)
314 if mutation
.expectation
== DAMAGED_ON_LS
:
316 self
.wait_until_true(lambda: self
.is_marked_damaged(0), 60)
317 log
.info("Result: Mutation '{0}' led to DAMAGED state after ls".format(mutation
.desc
))
318 results
[mutation
] = DAMAGED_ON_LS
320 if self
.fs
.are_daemons_healthy():
321 log
.error("Result: Failed to go damaged on mutation '{0}', actually went active".format(
323 results
[mutation
] = NO_DAMAGE
325 log
.error("Result: Failed to go damaged on mutation '{0}'".format(mutation
.desc
))
326 results
[mutation
] = FAILED_SERVER
327 elif mutation
.expectation
== READONLY
:
328 proc
= self
.mount_a
.run_shell(["mkdir", "foo"], wait
=False)
331 except CommandFailedError
:
332 stderr
= proc
.stderr
.getvalue()
334 if "Read-only file system".lower() in stderr
.lower():
341 log
.info("Result: Mutation '{0}' did not caused DAMAGED state".format(mutation
.desc
))
342 results
[mutation
] = NO_DAMAGE
343 except MaxWhileTries
:
344 log
.info("Result: Failed to complete client IO on mutation '{0}'".format(mutation
.desc
))
345 results
[mutation
] = FAILED_CLIENT
346 except CommandFailedError
as e
:
347 if e
.exitstatus
== errno
.EIO
:
348 log
.info("Result: EIO on client")
349 results
[mutation
] = EIO_ON_LS
351 log
.info("Result: unexpected error {0} on client".format(e
))
352 results
[mutation
] = FAILED_CLIENT
354 if mutation
.expectation
== EIO_ON_LS
:
355 # EIOs mean something handled by DamageTable: assert that it has
358 self
.fs
.mon_manager
.raw_cluster_cmd(
359 'tell', 'mds.{0}'.format(self
.fs
.get_active_names()[0]), "damage", "ls", '--format=json-pretty'))
361 results
[mutation
] = EIO_NO_DAMAGE
363 failures
= [(mutation
, result
) for (mutation
, result
) in results
.items() if mutation
.expectation
!= result
]
365 log
.error("{0} mutations had unexpected outcomes:".format(len(failures
)))
366 for mutation
, result
in failures
:
367 log
.error(" Expected '{0}' actually '{1}' from '{2}'".format(
368 mutation
.expectation
, result
, mutation
.desc
370 raise RuntimeError("{0} mutations had unexpected outcomes".format(len(failures
)))
372 log
.info("All {0} mutations had expected outcomes".format(len(mutations
)))
374 def test_damaged_dentry(self
):
375 # Damage to dentrys is interesting because it leaves the
376 # directory's `complete` flag in a subtle state where
377 # we have marked the dir complete in order that folks
378 # can access it, but in actual fact there is a dentry
380 self
.mount_a
.run_shell(["mkdir", "subdir/"])
382 self
.mount_a
.run_shell(["touch", "subdir/file_undamaged"])
383 self
.mount_a
.run_shell(["touch", "subdir/file_to_be_damaged"])
385 subdir_ino
= self
.mount_a
.path_to_ino("subdir")
387 self
.mount_a
.umount_wait()
388 for mds_name
in self
.fs
.get_active_names():
389 self
.fs
.mds_asok(["flush", "journal"], mds_name
)
394 junk
= "deadbeef" * 10
395 dirfrag_obj
= "{0:x}.00000000".format(subdir_ino
)
396 self
.fs
.radosm(["setomapval", dirfrag_obj
, "file_to_be_damaged_head", junk
])
398 # Start up and try to list it
399 self
.fs
.set_joinable()
400 self
.fs
.wait_for_daemons()
402 self
.mount_a
.mount_wait()
403 dentries
= self
.mount_a
.ls("subdir/")
405 # The damaged guy should have disappeared
406 self
.assertEqual(dentries
, ["file_undamaged"])
408 # I should get ENOENT if I try and read it normally, because
409 # the dir is considered complete
411 self
.mount_a
.stat("subdir/file_to_be_damaged", wait
=True)
412 except CommandFailedError
as e
:
413 self
.assertEqual(e
.exitstatus
, errno
.ENOENT
)
415 raise AssertionError("Expected ENOENT")
417 # The fact that there is damaged should have bee recorded
419 self
.fs
.mon_manager
.raw_cluster_cmd(
420 'tell', 'mds.{0}'.format(self
.fs
.get_active_names()[0]),
421 "damage", "ls", '--format=json-pretty'))
422 self
.assertEqual(len(damage
), 1)
423 damage_id
= damage
[0]['id']
425 # If I try to create a dentry with the same name as the damaged guy
426 # then that should be forbidden
428 self
.mount_a
.touch("subdir/file_to_be_damaged")
429 except CommandFailedError
as e
:
430 self
.assertEqual(e
.exitstatus
, errno
.EIO
)
432 raise AssertionError("Expected EIO")
434 # Attempting that touch will clear the client's complete flag, now
435 # when I stat it I'll get EIO instead of ENOENT
437 self
.mount_a
.stat("subdir/file_to_be_damaged", wait
=True)
438 except CommandFailedError
as e
:
439 if isinstance(self
.mount_a
, FuseMount
):
440 self
.assertEqual(e
.exitstatus
, errno
.EIO
)
442 # Old kernel client handles this case differently
443 self
.assertIn(e
.exitstatus
, [errno
.ENOENT
, errno
.EIO
])
445 raise AssertionError("Expected EIO")
447 nfiles
= self
.mount_a
.getfattr("./subdir", "ceph.dir.files")
448 self
.assertEqual(nfiles
, "2")
450 self
.mount_a
.umount_wait()
452 # Now repair the stats
453 scrub_json
= self
.fs
.run_scrub(["start", "/subdir", "repair"])
454 log
.info(json
.dumps(scrub_json
, indent
=2))
456 self
.assertNotEqual(scrub_json
, None)
457 self
.assertEqual(scrub_json
["return_code"], 0)
458 self
.assertEqual(self
.fs
.wait_until_scrub_complete(tag
=scrub_json
["scrub_tag"]), True)
460 # Check that the file count is now correct
461 self
.mount_a
.mount_wait()
462 nfiles
= self
.mount_a
.getfattr("./subdir", "ceph.dir.files")
463 self
.assertEqual(nfiles
, "1")
465 # Clean up the omap object
466 self
.fs
.radosm(["setomapval", dirfrag_obj
, "file_to_be_damaged_head", junk
])
468 # Clean up the damagetable entry
469 self
.fs
.mon_manager
.raw_cluster_cmd(
470 'tell', 'mds.{0}'.format(self
.fs
.get_active_names()[0]),
471 "damage", "rm", "{did}".format(did
=damage_id
))
473 # Now I should be able to create a file with the same name as the
474 # damaged guy if I want.
475 self
.mount_a
.touch("subdir/file_to_be_damaged")
477 def test_open_ino_errors(self
):
479 That errors encountered during opening inos are properly propagated
482 self
.mount_a
.run_shell(["mkdir", "dir1"])
483 self
.mount_a
.run_shell(["touch", "dir1/file1"])
484 self
.mount_a
.run_shell(["mkdir", "dir2"])
485 self
.mount_a
.run_shell(["touch", "dir2/file2"])
486 self
.mount_a
.run_shell(["mkdir", "testdir"])
487 self
.mount_a
.run_shell(["ln", "dir1/file1", "testdir/hardlink1"])
488 self
.mount_a
.run_shell(["ln", "dir2/file2", "testdir/hardlink2"])
490 file1_ino
= self
.mount_a
.path_to_ino("dir1/file1")
491 file2_ino
= self
.mount_a
.path_to_ino("dir2/file2")
492 dir2_ino
= self
.mount_a
.path_to_ino("dir2")
494 # Ensure everything is written to backing store
495 self
.mount_a
.umount_wait()
496 self
.fs
.mds_asok(["flush", "journal"])
498 # Drop everything from the MDS cache
500 self
.fs
.journal_tool(['journal', 'reset'], 0)
501 self
.fs
.set_joinable()
502 self
.fs
.wait_for_daemons()
504 self
.mount_a
.mount_wait()
506 # Case 1: un-decodeable backtrace
508 # Validate that the backtrace is present and decodable
509 self
.fs
.read_backtrace(file1_ino
)
510 # Go corrupt the backtrace of alpha/target (used for resolving
512 self
.fs
._write
_data
_xattr
(file1_ino
, "parent", "rhubarb")
514 # Check that touching the hardlink gives EIO
515 ran
= self
.mount_a
.run_shell(["stat", "testdir/hardlink1"], wait
=False)
518 except CommandFailedError
:
519 self
.assertTrue("Input/output error" in ran
.stderr
.getvalue())
521 # Check that an entry is created in the damage table
523 self
.fs
.mon_manager
.raw_cluster_cmd(
524 'tell', 'mds.{0}'.format(self
.fs
.get_active_names()[0]),
525 "damage", "ls", '--format=json-pretty'))
526 self
.assertEqual(len(damage
), 1)
527 self
.assertEqual(damage
[0]['damage_type'], "backtrace")
528 self
.assertEqual(damage
[0]['ino'], file1_ino
)
530 self
.fs
.mon_manager
.raw_cluster_cmd(
531 'tell', 'mds.{0}'.format(self
.fs
.get_active_names()[0]),
532 "damage", "rm", str(damage
[0]['id']))
535 # Case 2: missing dirfrag for the target inode
537 self
.fs
.radosm(["rm", "{0:x}.00000000".format(dir2_ino
)])
539 # Check that touching the hardlink gives EIO
540 ran
= self
.mount_a
.run_shell(["stat", "testdir/hardlink2"], wait
=False)
543 except CommandFailedError
:
544 self
.assertTrue("Input/output error" in ran
.stderr
.getvalue())
546 # Check that an entry is created in the damage table
548 self
.fs
.mon_manager
.raw_cluster_cmd(
549 'tell', 'mds.{0}'.format(self
.fs
.get_active_names()[0]),
550 "damage", "ls", '--format=json-pretty'))
551 self
.assertEqual(len(damage
), 2)
552 if damage
[0]['damage_type'] == "backtrace" :
553 self
.assertEqual(damage
[0]['ino'], file2_ino
)
554 self
.assertEqual(damage
[1]['damage_type'], "dir_frag")
555 self
.assertEqual(damage
[1]['ino'], dir2_ino
)
557 self
.assertEqual(damage
[0]['damage_type'], "dir_frag")
558 self
.assertEqual(damage
[0]['ino'], dir2_ino
)
559 self
.assertEqual(damage
[1]['damage_type'], "backtrace")
560 self
.assertEqual(damage
[1]['ino'], file2_ino
)
563 self
.fs
.mon_manager
.raw_cluster_cmd(
564 'tell', 'mds.{0}'.format(self
.fs
.get_active_names()[0]),
565 "damage", "rm", str(entry
['id']))
567 def test_dentry_first_existing(self
):
569 That the MDS won't abort when the dentry is already known to be damaged.
572 def verify_corrupt():
573 info
= self
.fs
.read_cache("/a", 0)
574 log
.debug('%s', info
)
575 self
.assertEqual(len(info
), 1)
576 dirfrags
= info
[0]['dirfrags']
577 self
.assertEqual(len(dirfrags
), 1)
578 dentries
= dirfrags
[0]['dentries']
579 self
.assertEqual([dn
['path'] for dn
in dentries
if dn
['is_primary']], ['a/c'])
580 self
.assertEqual(dentries
[0]['snap_first'], 18446744073709551606) # SNAP_HEAD
582 self
.mount_a
.run_shell_payload("mkdir -p a/b")
584 self
.config_set("mds", "mds_abort_on_newly_corrupt_dentry", False)
585 self
.config_set("mds", "mds_inject_rename_corrupt_dentry_first", "1.0")
586 time
.sleep(5) # for conf to percolate
587 self
.mount_a
.run_shell_payload("mv a/b a/c; sync .")
588 self
.mount_a
.umount()
591 self
.config_rm("mds", "mds_inject_rename_corrupt_dentry_first")
592 self
.config_set("mds", "mds_abort_on_newly_corrupt_dentry", False)
593 self
.fs
.set_joinable()
594 status
= self
.fs
.status()
596 self
.assertFalse(self
.fs
.status().hadfailover(status
))
599 def test_dentry_first_preflush(self
):
601 That the MDS won't write a dentry with new damage to CDentry::first
605 rank0
= self
.fs
.get_rank()
606 self
.fs
.rank_freeze(True, rank
=0)
607 self
.mount_a
.run_shell_payload("mkdir -p a/{b,c}/d")
609 self
.config_set("mds", "mds_inject_rename_corrupt_dentry_first", "1.0")
610 time
.sleep(5) # for conf to percolate
611 p
= self
.mount_a
.run_shell_payload("timeout 60 mv a/b a/z", wait
=False)
612 self
.wait_until_true(lambda: "laggy_since" in self
.fs
.get_rank(), timeout
=self
.fs
.beacon_timeout
)
613 self
.config_rm("mds", "mds_inject_rename_corrupt_dentry_first")
614 self
.fs
.rank_freeze(False, rank
=0)
615 self
.delete_mds_coredump(rank0
['name'])
616 self
.fs
.mds_restart(rank0
['name'])
617 self
.fs
.wait_for_daemons()
619 self
.mount_a
.run_shell_payload("stat a/ && find a/")
622 def test_dentry_first_precommit(self
):
624 That the MDS won't write a dentry with new damage to CDentry::first
625 to the directory object.
629 self
.mount_a
.run_shell_payload("mkdir -p a/{b,c}/d; sync .")
630 self
.mount_a
.umount() # allow immediate scatter write back
632 # now just twiddle some inode metadata on a regular file
633 self
.mount_a
.mount_wait()
634 self
.mount_a
.run_shell_payload("chmod 711 a/b/d; sync .")
635 self
.mount_a
.umount() # avoid journaling session related things
636 # okay, now cause the dentry to get damaged after loading from the journal
638 self
.config_set("mds", "mds_inject_journal_corrupt_dentry_first", "1.0")
639 time
.sleep(5) # for conf to percolate
640 self
.fs
.set_joinable()
641 self
.fs
.wait_for_daemons()
642 rank0
= self
.fs
.get_rank()
643 self
.fs
.rank_freeze(True, rank
=0)
644 # so now we want to trigger commit but this will crash, so:
645 c
= ['--connect-timeout=60', 'tell', f
"mds.{fscid}:0", "flush", "journal"]
646 p
= self
.ceph_cluster
.mon_manager
.run_cluster_cmd(args
=c
, wait
=False, timeoutcmd
=30)
647 self
.wait_until_true(lambda: "laggy_since" in self
.fs
.get_rank(), timeout
=self
.fs
.beacon_timeout
)
648 self
.config_rm("mds", "mds_inject_journal_corrupt_dentry_first")
649 self
.fs
.rank_freeze(False, rank
=0)
650 self
.delete_mds_coredump(rank0
['name'])
651 self
.fs
.mds_restart(rank0
['name'])
652 self
.fs
.wait_for_daemons()
655 except CommandFailedError
as e
:
658 self
.fail("flush journal should fail!")
659 self
.mount_a
.mount_wait()
660 self
.mount_a
.run_shell_payload("stat a/ && find a/")