]>
git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/cephfs/test_damage.py
5 from teuthology
.contextutil
import MaxWhileTries
6 from teuthology
.exceptions
import CommandFailedError
7 from teuthology
.orchestra
.run
import wait
8 from tasks
.cephfs
.fuse_mount
import FuseMount
9 from tasks
.cephfs
.cephfs_test_case
import CephFSTestCase
, for_teuthology
11 DAMAGED_ON_START
= "damaged_on_start"
12 DAMAGED_ON_LS
= "damaged_on_ls"
13 CRASHED
= "server crashed"
14 NO_DAMAGE
= "no damage"
16 FAILED_CLIENT
= "client failed"
17 FAILED_SERVER
= "server failed"
19 # An EIO in response to a stat from the client
22 # An EIO, but nothing in damage table (not ever what we expect)
23 EIO_NO_DAMAGE
= "eio without damage entry"
26 log
= logging
.getLogger(__name__
)
29 class TestDamage(CephFSTestCase
):
30 def _simple_workload_write(self
):
31 self
.mount_a
.run_shell(["mkdir", "subdir"])
32 self
.mount_a
.write_n_mb("subdir/sixmegs", 6)
33 return self
.mount_a
.stat("subdir/sixmegs")
35 def is_marked_damaged(self
, rank
):
36 mds_map
= self
.fs
.get_mds_map()
37 return rank
in mds_map
['damaged']
40 def test_object_deletion(self
):
42 That the MDS has a clean 'damaged' response to loss of any single metadata object
45 self
._simple
_workload
_write
()
47 # Hmm, actually it would be nice to permute whether the metadata pool
48 # state contains sessions or not, but for the moment close this session
49 # to avoid waiting through reconnect on every MDS start.
50 self
.mount_a
.umount_wait()
51 for mds_name
in self
.fs
.get_active_names():
52 self
.fs
.mds_asok(["flush", "journal"], mds_name
)
57 self
.fs
.rados(['export', '/tmp/metadata.bin'])
59 def is_ignored(obj_id
, dentry
=None):
61 A filter to avoid redundantly mutating many similar objects (e.g.
62 stray dirfrags) or similar dentries (e.g. stray dir dentries)
64 if re
.match("60.\.00000000", obj_id
) and obj_id
!= "600.00000000":
67 if dentry
and obj_id
== "100.00000000":
68 if re
.match("stray.+_head", dentry
) and dentry
!= "stray0_head":
73 def get_path(obj_id
, dentry
=None):
75 What filesystem path does this object or dentry correspond to? i.e.
76 what should I poke to see EIO after damaging it?
79 if obj_id
== "1.00000000" and dentry
== "subdir_head":
81 elif obj_id
== "10000000000.00000000" and dentry
== "sixmegs_head":
82 return "./subdir/sixmegs"
84 # None means ls will do an "ls -R" in hope of seeing some errors
87 objects
= self
.fs
.rados(["ls"]).split("\n")
88 objects
= [o
for o
in objects
if not is_ignored(o
)]
90 # Find all objects with an OMAP header
93 header
= self
.fs
.rados(["getomapheader", o
])
94 # The rados CLI wraps the header output in a hex-printed style
95 header_bytes
= int(re
.match("header \((.+) bytes\)", header
).group(1))
97 omap_header_objs
.append(o
)
99 # Find all OMAP key/vals
102 keys_str
= self
.fs
.rados(["listomapkeys", o
])
104 for key
in keys_str
.split("\n"):
105 if not is_ignored(o
, key
):
106 omap_keys
.append((o
, key
))
108 # Find objects that have data in their bodies
110 for obj_id
in objects
:
111 stat_out
= self
.fs
.rados(["stat", obj_id
])
112 size
= int(re
.match(".+, size (.+)$", stat_out
).group(1))
114 data_objects
.append(obj_id
)
116 # Define the various forms of damage we will inflict
117 class MetadataMutation(object):
118 def __init__(self
, obj_id_
, desc_
, mutate_fn_
, expectation_
, ls_path
=None):
119 self
.obj_id
= obj_id_
121 self
.mutate_fn
= mutate_fn_
122 self
.expectation
= expectation_
126 self
.ls_path
= ls_path
128 def __eq__(self
, other
):
129 return self
.desc
== other
.desc
132 return hash(self
.desc
)
134 junk
= "deadbeef" * 10
140 # JournalPointers are auto-replaced if missing (same path as upgrade)
142 # Missing dirfrags for non-system dirs result in empty directory
143 "10000000000.00000000",
144 # PurgeQueue is auto-created if not found on startup
147 expectation
= NO_DAMAGE
149 expectation
= DAMAGED_ON_START
151 log
.info("Expectation on rm '{0}' will be '{1}'".format(
155 mutations
.append(MetadataMutation(
157 "Delete {0}".format(o
),
158 lambda o
=o
: self
.fs
.rados(["rm", o
]),
162 # Blatant corruptions
163 for obj_id
in data_objects
:
164 if obj_id
== "500.00000000":
165 # purge queue corruption results in read-only FS
166 mutations
.append(MetadataMutation(
168 "Corrupt {0}".format(obj_id
),
169 lambda o
=obj_id
: self
.fs
.rados(["put", o
, "-"], stdin_data
=junk
),
173 mutations
.append(MetadataMutation(
175 "Corrupt {0}".format(obj_id
),
176 lambda o
=obj_id
: self
.fs
.rados(["put", o
, "-"], stdin_data
=junk
),
181 for o
in data_objects
:
182 if o
== "500.00000000":
183 # The PurgeQueue is allowed to be empty: Journaler interprets
184 # an empty header object as an empty journal.
185 expectation
= NO_DAMAGE
187 expectation
= DAMAGED_ON_START
192 "Truncate {0}".format(o
),
193 lambda o
=o
: self
.fs
.rados(["truncate", o
, "0"]),
197 # OMAP value corruptions
198 for o
, k
in omap_keys
:
199 if o
.startswith("100."):
200 # Anything in rank 0's 'mydir'
201 expectation
= DAMAGED_ON_START
203 expectation
= EIO_ON_LS
208 "Corrupt omap key {0}:{1}".format(o
, k
),
209 lambda o
=o
,k
=k
: self
.fs
.rados(["setomapval", o
, k
, junk
]),
215 # OMAP header corruptions
216 for o
in omap_header_objs
:
217 if re
.match("60.\.00000000", o
) \
218 or o
in ["1.00000000", "100.00000000", "mds0_sessionmap"]:
219 expectation
= DAMAGED_ON_START
221 expectation
= NO_DAMAGE
223 log
.info("Expectation on corrupt header '{0}' will be '{1}'".format(
230 "Corrupt omap header on {0}".format(o
),
231 lambda o
=o
: self
.fs
.rados(["setomapheader", o
, junk
]),
238 for mutation
in mutations
:
239 log
.info("Applying mutation '{0}'".format(mutation
.desc
))
242 self
.mount_a
.umount_wait(force
=True)
245 self
.fs
.mon_manager
.raw_cluster_cmd('mds', 'repaired', '0')
247 # Reset RADOS pool state
248 self
.fs
.rados(['import', '/tmp/metadata.bin'])
250 # Inject the mutation
253 # Try starting the MDS
254 self
.fs
.mds_restart()
256 # How long we'll wait between starting a daemon and expecting
257 # it to make it through startup, and potentially declare itself
258 # damaged to the mon cluster.
261 if mutation
.expectation
not in (EIO_ON_LS
, DAMAGED_ON_LS
, NO_DAMAGE
):
262 if mutation
.expectation
== DAMAGED_ON_START
:
263 # The MDS may pass through active before making it to damaged
265 self
.wait_until_true(lambda: self
.is_marked_damaged(0), startup_timeout
)
269 # Wait for MDS to either come up or go into damaged state
271 self
.wait_until_true(lambda: self
.is_marked_damaged(0) or self
.fs
.are_daemons_healthy(), startup_timeout
)
274 # Didn't make it to healthy or damaged, did it crash?
275 for daemon_id
, daemon
in self
.fs
.mds_daemons
.items():
276 if daemon
.proc
and daemon
.proc
.finished
:
278 log
.error("Daemon {0} crashed!".format(daemon_id
))
279 daemon
.proc
= None # So that subsequent stop() doesn't raise error
281 # Didn't go health, didn't go damaged, didn't crash, so what?
284 log
.info("Result: Mutation '{0}' led to crash".format(mutation
.desc
))
285 results
[mutation
] = CRASHED
287 if self
.is_marked_damaged(0):
288 log
.info("Result: Mutation '{0}' led to DAMAGED state".format(mutation
.desc
))
289 results
[mutation
] = DAMAGED_ON_START
292 log
.info("Mutation '{0}' did not prevent MDS startup, attempting ls...".format(mutation
.desc
))
295 self
.wait_until_true(self
.fs
.are_daemons_healthy
, 60)
297 log
.info("Result: Mutation '{0}' should have left us healthy, actually not.".format(mutation
.desc
))
298 if self
.is_marked_damaged(0):
299 results
[mutation
] = DAMAGED_ON_START
301 results
[mutation
] = FAILED_SERVER
303 log
.info("Daemons came up after mutation '{0}', proceeding to ls".format(mutation
.desc
))
305 # MDS is up, should go damaged on ls or client mount
307 self
.mount_a
.wait_until_mounted()
308 if mutation
.ls_path
== ".":
309 proc
= self
.mount_a
.run_shell(["ls", "-R", mutation
.ls_path
], wait
=False)
311 proc
= self
.mount_a
.stat(mutation
.ls_path
, wait
=False)
313 if mutation
.expectation
== DAMAGED_ON_LS
:
315 self
.wait_until_true(lambda: self
.is_marked_damaged(0), 60)
316 log
.info("Result: Mutation '{0}' led to DAMAGED state after ls".format(mutation
.desc
))
317 results
[mutation
] = DAMAGED_ON_LS
319 if self
.fs
.are_daemons_healthy():
320 log
.error("Result: Failed to go damaged on mutation '{0}', actually went active".format(
322 results
[mutation
] = NO_DAMAGE
324 log
.error("Result: Failed to go damaged on mutation '{0}'".format(mutation
.desc
))
325 results
[mutation
] = FAILED_SERVER
326 elif mutation
.expectation
== READONLY
:
327 proc
= self
.mount_a
.run_shell(["mkdir", "foo"], wait
=False)
330 except CommandFailedError
:
331 stderr
= proc
.stderr
.getvalue()
333 if "Read-only file system".lower() in stderr
.lower():
340 log
.info("Result: Mutation '{0}' did not caused DAMAGED state".format(mutation
.desc
))
341 results
[mutation
] = NO_DAMAGE
342 except MaxWhileTries
:
343 log
.info("Result: Failed to complete client IO on mutation '{0}'".format(mutation
.desc
))
344 results
[mutation
] = FAILED_CLIENT
345 except CommandFailedError
as e
:
346 if e
.exitstatus
== errno
.EIO
:
347 log
.info("Result: EIO on client")
348 results
[mutation
] = EIO_ON_LS
350 log
.info("Result: unexpected error {0} on client".format(e
))
351 results
[mutation
] = FAILED_CLIENT
353 if mutation
.expectation
== EIO_ON_LS
:
354 # EIOs mean something handled by DamageTable: assert that it has
357 self
.fs
.mon_manager
.raw_cluster_cmd(
358 'tell', 'mds.{0}'.format(self
.fs
.get_active_names()[0]), "damage", "ls", '--format=json-pretty'))
360 results
[mutation
] = EIO_NO_DAMAGE
362 failures
= [(mutation
, result
) for (mutation
, result
) in results
.items() if mutation
.expectation
!= result
]
364 log
.error("{0} mutations had unexpected outcomes:".format(len(failures
)))
365 for mutation
, result
in failures
:
366 log
.error(" Expected '{0}' actually '{1}' from '{2}'".format(
367 mutation
.expectation
, result
, mutation
.desc
369 raise RuntimeError("{0} mutations had unexpected outcomes".format(len(failures
)))
371 log
.info("All {0} mutations had expected outcomes".format(len(mutations
)))
373 def test_damaged_dentry(self
):
374 # Damage to dentrys is interesting because it leaves the
375 # directory's `complete` flag in a subtle state where
376 # we have marked the dir complete in order that folks
377 # can access it, but in actual fact there is a dentry
379 self
.mount_a
.run_shell(["mkdir", "subdir/"])
381 self
.mount_a
.run_shell(["touch", "subdir/file_undamaged"])
382 self
.mount_a
.run_shell(["touch", "subdir/file_to_be_damaged"])
384 subdir_ino
= self
.mount_a
.path_to_ino("subdir")
386 self
.mount_a
.umount_wait()
387 for mds_name
in self
.fs
.get_active_names():
388 self
.fs
.mds_asok(["flush", "journal"], mds_name
)
394 junk
= "deadbeef" * 10
395 dirfrag_obj
= "{0:x}.00000000".format(subdir_ino
)
396 self
.fs
.rados(["setomapval", dirfrag_obj
, "file_to_be_damaged_head", junk
])
398 # Start up and try to list it
399 self
.fs
.mds_restart()
400 self
.fs
.wait_for_daemons()
403 self
.mount_a
.wait_until_mounted()
404 dentries
= self
.mount_a
.ls("subdir/")
406 # The damaged guy should have disappeared
407 self
.assertEqual(dentries
, ["file_undamaged"])
409 # I should get ENOENT if I try and read it normally, because
410 # the dir is considered complete
412 self
.mount_a
.stat("subdir/file_to_be_damaged", wait
=True)
413 except CommandFailedError
as e
:
414 self
.assertEqual(e
.exitstatus
, errno
.ENOENT
)
416 raise AssertionError("Expected ENOENT")
418 # The fact that there is damaged should have bee recorded
420 self
.fs
.mon_manager
.raw_cluster_cmd(
421 'tell', 'mds.{0}'.format(self
.fs
.get_active_names()[0]),
422 "damage", "ls", '--format=json-pretty'))
423 self
.assertEqual(len(damage
), 1)
424 damage_id
= damage
[0]['id']
426 # If I try to create a dentry with the same name as the damaged guy
427 # then that should be forbidden
429 self
.mount_a
.touch("subdir/file_to_be_damaged")
430 except CommandFailedError
as e
:
431 self
.assertEqual(e
.exitstatus
, errno
.EIO
)
433 raise AssertionError("Expected EIO")
435 # Attempting that touch will clear the client's complete flag, now
436 # when I stat it I'll get EIO instead of ENOENT
438 self
.mount_a
.stat("subdir/file_to_be_damaged", wait
=True)
439 except CommandFailedError
as e
:
440 if isinstance(self
.mount_a
, FuseMount
):
441 self
.assertEqual(e
.exitstatus
, errno
.EIO
)
443 # Kernel client handles this case differently
444 self
.assertEqual(e
.exitstatus
, errno
.ENOENT
)
446 raise AssertionError("Expected EIO")
448 nfiles
= self
.mount_a
.getfattr("./subdir", "ceph.dir.files")
449 self
.assertEqual(nfiles
, "2")
451 self
.mount_a
.umount_wait()
453 # Now repair the stats
454 scrub_json
= self
.fs
.mds_asok(["scrub_path", "/subdir", "repair"])
455 log
.info(json
.dumps(scrub_json
, indent
=2))
457 self
.assertEqual(scrub_json
["passed_validation"], False)
458 self
.assertEqual(scrub_json
["raw_stats"]["checked"], True)
459 self
.assertEqual(scrub_json
["raw_stats"]["passed"], False)
461 # Check that the file count is now correct
463 self
.mount_a
.wait_until_mounted()
464 nfiles
= self
.mount_a
.getfattr("./subdir", "ceph.dir.files")
465 self
.assertEqual(nfiles
, "1")
467 # Clean up the omap object
468 self
.fs
.rados(["setomapval", dirfrag_obj
, "file_to_be_damaged_head", junk
])
470 # Clean up the damagetable entry
471 self
.fs
.mon_manager
.raw_cluster_cmd(
472 'tell', 'mds.{0}'.format(self
.fs
.get_active_names()[0]),
473 "damage", "rm", "{did}".format(did
=damage_id
))
475 # Now I should be able to create a file with the same name as the
476 # damaged guy if I want.
477 self
.mount_a
.touch("subdir/file_to_be_damaged")
479 def test_open_ino_errors(self
):
481 That errors encountered during opening inos are properly propagated
484 self
.mount_a
.run_shell(["mkdir", "dir1"])
485 self
.mount_a
.run_shell(["touch", "dir1/file1"])
486 self
.mount_a
.run_shell(["mkdir", "dir2"])
487 self
.mount_a
.run_shell(["touch", "dir2/file2"])
488 self
.mount_a
.run_shell(["mkdir", "testdir"])
489 self
.mount_a
.run_shell(["ln", "dir1/file1", "testdir/hardlink1"])
490 self
.mount_a
.run_shell(["ln", "dir2/file2", "testdir/hardlink2"])
492 file1_ino
= self
.mount_a
.path_to_ino("dir1/file1")
493 file2_ino
= self
.mount_a
.path_to_ino("dir2/file2")
494 dir2_ino
= self
.mount_a
.path_to_ino("dir2")
496 # Ensure everything is written to backing store
497 self
.mount_a
.umount_wait()
498 self
.fs
.mds_asok(["flush", "journal"])
500 # Drop everything from the MDS cache
501 self
.mds_cluster
.mds_stop()
502 self
.fs
.journal_tool(['journal', 'reset'], 0)
503 self
.mds_cluster
.mds_fail_restart()
504 self
.fs
.wait_for_daemons()
508 # Case 1: un-decodeable backtrace
510 # Validate that the backtrace is present and decodable
511 self
.fs
.read_backtrace(file1_ino
)
512 # Go corrupt the backtrace of alpha/target (used for resolving
514 self
.fs
._write
_data
_xattr
(file1_ino
, "parent", "rhubarb")
516 # Check that touching the hardlink gives EIO
517 ran
= self
.mount_a
.run_shell(["stat", "testdir/hardlink1"], wait
=False)
520 except CommandFailedError
:
521 self
.assertTrue("Input/output error" in ran
.stderr
.getvalue())
523 # Check that an entry is created in the damage table
525 self
.fs
.mon_manager
.raw_cluster_cmd(
526 'tell', 'mds.{0}'.format(self
.fs
.get_active_names()[0]),
527 "damage", "ls", '--format=json-pretty'))
528 self
.assertEqual(len(damage
), 1)
529 self
.assertEqual(damage
[0]['damage_type'], "backtrace")
530 self
.assertEqual(damage
[0]['ino'], file1_ino
)
532 self
.fs
.mon_manager
.raw_cluster_cmd(
533 'tell', 'mds.{0}'.format(self
.fs
.get_active_names()[0]),
534 "damage", "rm", str(damage
[0]['id']))
537 # Case 2: missing dirfrag for the target inode
539 self
.fs
.rados(["rm", "{0:x}.00000000".format(dir2_ino
)])
541 # Check that touching the hardlink gives EIO
542 ran
= self
.mount_a
.run_shell(["stat", "testdir/hardlink2"], wait
=False)
545 except CommandFailedError
:
546 self
.assertTrue("Input/output error" in ran
.stderr
.getvalue())
548 # Check that an entry is created in the damage table
550 self
.fs
.mon_manager
.raw_cluster_cmd(
551 'tell', 'mds.{0}'.format(self
.fs
.get_active_names()[0]),
552 "damage", "ls", '--format=json-pretty'))
553 self
.assertEqual(len(damage
), 2)
554 if damage
[0]['damage_type'] == "backtrace" :
555 self
.assertEqual(damage
[0]['ino'], file2_ino
)
556 self
.assertEqual(damage
[1]['damage_type'], "dir_frag")
557 self
.assertEqual(damage
[1]['ino'], dir2_ino
)
559 self
.assertEqual(damage
[0]['damage_type'], "dir_frag")
560 self
.assertEqual(damage
[0]['ino'], dir2_ino
)
561 self
.assertEqual(damage
[1]['damage_type'], "backtrace")
562 self
.assertEqual(damage
[1]['ino'], file2_ino
)
565 self
.fs
.mon_manager
.raw_cluster_cmd(
566 'tell', 'mds.{0}'.format(self
.fs
.get_active_names()[0]),
567 "damage", "rm", str(entry
['id']))