]>
git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/cephfs/test_damage.py
5 from teuthology
.contextutil
import MaxWhileTries
6 from teuthology
.exceptions
import CommandFailedError
7 from teuthology
.orchestra
.run
import wait
8 from tasks
.cephfs
.fuse_mount
import FuseMount
9 from tasks
.cephfs
.cephfs_test_case
import CephFSTestCase
, for_teuthology
11 DAMAGED_ON_START
= "damaged_on_start"
12 DAMAGED_ON_LS
= "damaged_on_ls"
13 CRASHED
= "server crashed"
14 NO_DAMAGE
= "no damage"
16 FAILED_CLIENT
= "client failed"
17 FAILED_SERVER
= "server failed"
19 # An EIO in response to a stat from the client
22 # An EIO, but nothing in damage table (not ever what we expect)
23 EIO_NO_DAMAGE
= "eio without damage entry"
26 log
= logging
.getLogger(__name__
)
29 class TestDamage(CephFSTestCase
):
30 def _simple_workload_write(self
):
31 self
.mount_a
.run_shell(["mkdir", "subdir"])
32 self
.mount_a
.write_n_mb("subdir/sixmegs", 6)
33 return self
.mount_a
.stat("subdir/sixmegs")
35 def is_marked_damaged(self
, rank
):
36 mds_map
= self
.fs
.get_mds_map()
37 return rank
in mds_map
['damaged']
40 def test_object_deletion(self
):
42 That the MDS has a clean 'damaged' response to loss of any single metadata object
45 self
._simple
_workload
_write
()
47 # Hmm, actually it would be nice to permute whether the metadata pool
48 # state contains sessions or not, but for the moment close this session
49 # to avoid waiting through reconnect on every MDS start.
50 self
.mount_a
.umount_wait()
51 for mds_name
in self
.fs
.get_active_names():
52 self
.fs
.mds_asok(["flush", "journal"], mds_name
)
57 self
.fs
.rados(['export', '/tmp/metadata.bin'])
59 def is_ignored(obj_id
, dentry
=None):
61 A filter to avoid redundantly mutating many similar objects (e.g.
62 stray dirfrags) or similar dentries (e.g. stray dir dentries)
64 if re
.match("60.\.00000000", obj_id
) and obj_id
!= "600.00000000":
67 if dentry
and obj_id
== "100.00000000":
68 if re
.match("stray.+_head", dentry
) and dentry
!= "stray0_head":
73 def get_path(obj_id
, dentry
=None):
75 What filesystem path does this object or dentry correspond to? i.e.
76 what should I poke to see EIO after damaging it?
79 if obj_id
== "1.00000000" and dentry
== "subdir_head":
81 elif obj_id
== "10000000000.00000000" and dentry
== "sixmegs_head":
82 return "./subdir/sixmegs"
84 # None means ls will do an "ls -R" in hope of seeing some errors
87 objects
= self
.fs
.rados(["ls"]).split("\n")
88 objects
= [o
for o
in objects
if not is_ignored(o
)]
90 # Find all objects with an OMAP header
93 header
= self
.fs
.rados(["getomapheader", o
])
94 # The rados CLI wraps the header output in a hex-printed style
95 header_bytes
= int(re
.match("header \((.+) bytes\)", header
).group(1))
97 omap_header_objs
.append(o
)
99 # Find all OMAP key/vals
102 keys_str
= self
.fs
.rados(["listomapkeys", o
])
104 for key
in keys_str
.split("\n"):
105 if not is_ignored(o
, key
):
106 omap_keys
.append((o
, key
))
108 # Find objects that have data in their bodies
110 for obj_id
in objects
:
111 stat_out
= self
.fs
.rados(["stat", obj_id
])
112 size
= int(re
.match(".+, size (.+)$", stat_out
).group(1))
114 data_objects
.append(obj_id
)
116 # Define the various forms of damage we will inflict
117 class MetadataMutation(object):
118 def __init__(self
, obj_id_
, desc_
, mutate_fn_
, expectation_
, ls_path
=None):
119 self
.obj_id
= obj_id_
121 self
.mutate_fn
= mutate_fn_
122 self
.expectation
= expectation_
126 self
.ls_path
= ls_path
128 def __eq__(self
, other
):
129 return self
.desc
== other
.desc
132 return hash(self
.desc
)
134 junk
= "deadbeef" * 10
140 # JournalPointers are auto-replaced if missing (same path as upgrade)
142 # Missing dirfrags for non-system dirs result in empty directory
143 "10000000000.00000000",
144 # PurgeQueue is auto-created if not found on startup
146 # open file table is auto-created if not found on startup
149 expectation
= NO_DAMAGE
151 expectation
= DAMAGED_ON_START
153 log
.info("Expectation on rm '{0}' will be '{1}'".format(
157 mutations
.append(MetadataMutation(
159 "Delete {0}".format(o
),
160 lambda o
=o
: self
.fs
.rados(["rm", o
]),
164 # Blatant corruptions
165 for obj_id
in data_objects
:
166 if obj_id
== "500.00000000":
167 # purge queue corruption results in read-only FS
168 mutations
.append(MetadataMutation(
170 "Corrupt {0}".format(obj_id
),
171 lambda o
=obj_id
: self
.fs
.rados(["put", o
, "-"], stdin_data
=junk
),
175 mutations
.append(MetadataMutation(
177 "Corrupt {0}".format(obj_id
),
178 lambda o
=obj_id
: self
.fs
.rados(["put", o
, "-"], stdin_data
=junk
),
183 for o
in data_objects
:
184 if o
== "500.00000000":
185 # The PurgeQueue is allowed to be empty: Journaler interprets
186 # an empty header object as an empty journal.
187 expectation
= NO_DAMAGE
189 expectation
= DAMAGED_ON_START
194 "Truncate {0}".format(o
),
195 lambda o
=o
: self
.fs
.rados(["truncate", o
, "0"]),
199 # OMAP value corruptions
200 for o
, k
in omap_keys
:
201 if o
.startswith("100."):
202 # Anything in rank 0's 'mydir'
203 expectation
= DAMAGED_ON_START
205 expectation
= EIO_ON_LS
210 "Corrupt omap key {0}:{1}".format(o
, k
),
211 lambda o
=o
,k
=k
: self
.fs
.rados(["setomapval", o
, k
, junk
]),
217 # OMAP header corruptions
218 for o
in omap_header_objs
:
219 if re
.match("60.\.00000000", o
) \
220 or o
in ["1.00000000", "100.00000000", "mds0_sessionmap"]:
221 expectation
= DAMAGED_ON_START
223 expectation
= NO_DAMAGE
225 log
.info("Expectation on corrupt header '{0}' will be '{1}'".format(
232 "Corrupt omap header on {0}".format(o
),
233 lambda o
=o
: self
.fs
.rados(["setomapheader", o
, junk
]),
240 for mutation
in mutations
:
241 log
.info("Applying mutation '{0}'".format(mutation
.desc
))
244 self
.mount_a
.umount_wait(force
=True)
247 self
.fs
.mon_manager
.raw_cluster_cmd('mds', 'repaired', '0')
249 # Reset RADOS pool state
250 self
.fs
.rados(['import', '/tmp/metadata.bin'])
252 # Inject the mutation
255 # Try starting the MDS
256 self
.fs
.mds_restart()
258 # How long we'll wait between starting a daemon and expecting
259 # it to make it through startup, and potentially declare itself
260 # damaged to the mon cluster.
263 if mutation
.expectation
not in (EIO_ON_LS
, DAMAGED_ON_LS
, NO_DAMAGE
):
264 if mutation
.expectation
== DAMAGED_ON_START
:
265 # The MDS may pass through active before making it to damaged
267 self
.wait_until_true(lambda: self
.is_marked_damaged(0), startup_timeout
)
271 # Wait for MDS to either come up or go into damaged state
273 self
.wait_until_true(lambda: self
.is_marked_damaged(0) or self
.fs
.are_daemons_healthy(), startup_timeout
)
276 # Didn't make it to healthy or damaged, did it crash?
277 for daemon_id
, daemon
in self
.fs
.mds_daemons
.items():
278 if daemon
.proc
and daemon
.proc
.finished
:
280 log
.error("Daemon {0} crashed!".format(daemon_id
))
281 daemon
.proc
= None # So that subsequent stop() doesn't raise error
283 # Didn't go health, didn't go damaged, didn't crash, so what?
286 log
.info("Result: Mutation '{0}' led to crash".format(mutation
.desc
))
287 results
[mutation
] = CRASHED
289 if self
.is_marked_damaged(0):
290 log
.info("Result: Mutation '{0}' led to DAMAGED state".format(mutation
.desc
))
291 results
[mutation
] = DAMAGED_ON_START
294 log
.info("Mutation '{0}' did not prevent MDS startup, attempting ls...".format(mutation
.desc
))
297 self
.wait_until_true(self
.fs
.are_daemons_healthy
, 60)
299 log
.info("Result: Mutation '{0}' should have left us healthy, actually not.".format(mutation
.desc
))
300 if self
.is_marked_damaged(0):
301 results
[mutation
] = DAMAGED_ON_START
303 results
[mutation
] = FAILED_SERVER
305 log
.info("Daemons came up after mutation '{0}', proceeding to ls".format(mutation
.desc
))
307 # MDS is up, should go damaged on ls or client mount
308 self
.mount_a
.mount_wait()
309 if mutation
.ls_path
== ".":
310 proc
= self
.mount_a
.run_shell(["ls", "-R", mutation
.ls_path
], wait
=False)
312 proc
= self
.mount_a
.stat(mutation
.ls_path
, wait
=False)
314 if mutation
.expectation
== DAMAGED_ON_LS
:
316 self
.wait_until_true(lambda: self
.is_marked_damaged(0), 60)
317 log
.info("Result: Mutation '{0}' led to DAMAGED state after ls".format(mutation
.desc
))
318 results
[mutation
] = DAMAGED_ON_LS
320 if self
.fs
.are_daemons_healthy():
321 log
.error("Result: Failed to go damaged on mutation '{0}', actually went active".format(
323 results
[mutation
] = NO_DAMAGE
325 log
.error("Result: Failed to go damaged on mutation '{0}'".format(mutation
.desc
))
326 results
[mutation
] = FAILED_SERVER
327 elif mutation
.expectation
== READONLY
:
328 proc
= self
.mount_a
.run_shell(["mkdir", "foo"], wait
=False)
331 except CommandFailedError
:
332 stderr
= proc
.stderr
.getvalue()
334 if "Read-only file system".lower() in stderr
.lower():
341 log
.info("Result: Mutation '{0}' did not caused DAMAGED state".format(mutation
.desc
))
342 results
[mutation
] = NO_DAMAGE
343 except MaxWhileTries
:
344 log
.info("Result: Failed to complete client IO on mutation '{0}'".format(mutation
.desc
))
345 results
[mutation
] = FAILED_CLIENT
346 except CommandFailedError
as e
:
347 if e
.exitstatus
== errno
.EIO
:
348 log
.info("Result: EIO on client")
349 results
[mutation
] = EIO_ON_LS
351 log
.info("Result: unexpected error {0} on client".format(e
))
352 results
[mutation
] = FAILED_CLIENT
354 if mutation
.expectation
== EIO_ON_LS
:
355 # EIOs mean something handled by DamageTable: assert that it has
358 self
.fs
.mon_manager
.raw_cluster_cmd(
359 'tell', 'mds.{0}'.format(self
.fs
.get_active_names()[0]), "damage", "ls", '--format=json-pretty'))
361 results
[mutation
] = EIO_NO_DAMAGE
363 failures
= [(mutation
, result
) for (mutation
, result
) in results
.items() if mutation
.expectation
!= result
]
365 log
.error("{0} mutations had unexpected outcomes:".format(len(failures
)))
366 for mutation
, result
in failures
:
367 log
.error(" Expected '{0}' actually '{1}' from '{2}'".format(
368 mutation
.expectation
, result
, mutation
.desc
370 raise RuntimeError("{0} mutations had unexpected outcomes".format(len(failures
)))
372 log
.info("All {0} mutations had expected outcomes".format(len(mutations
)))
374 def test_damaged_dentry(self
):
375 # Damage to dentrys is interesting because it leaves the
376 # directory's `complete` flag in a subtle state where
377 # we have marked the dir complete in order that folks
378 # can access it, but in actual fact there is a dentry
380 self
.mount_a
.run_shell(["mkdir", "subdir/"])
382 self
.mount_a
.run_shell(["touch", "subdir/file_undamaged"])
383 self
.mount_a
.run_shell(["touch", "subdir/file_to_be_damaged"])
385 subdir_ino
= self
.mount_a
.path_to_ino("subdir")
387 self
.mount_a
.umount_wait()
388 for mds_name
in self
.fs
.get_active_names():
389 self
.fs
.mds_asok(["flush", "journal"], mds_name
)
395 junk
= "deadbeef" * 10
396 dirfrag_obj
= "{0:x}.00000000".format(subdir_ino
)
397 self
.fs
.rados(["setomapval", dirfrag_obj
, "file_to_be_damaged_head", junk
])
399 # Start up and try to list it
400 self
.fs
.mds_restart()
401 self
.fs
.wait_for_daemons()
403 self
.mount_a
.mount_wait()
404 dentries
= self
.mount_a
.ls("subdir/")
406 # The damaged guy should have disappeared
407 self
.assertEqual(dentries
, ["file_undamaged"])
409 # I should get ENOENT if I try and read it normally, because
410 # the dir is considered complete
412 self
.mount_a
.stat("subdir/file_to_be_damaged", wait
=True)
413 except CommandFailedError
as e
:
414 self
.assertEqual(e
.exitstatus
, errno
.ENOENT
)
416 raise AssertionError("Expected ENOENT")
418 # The fact that there is damaged should have bee recorded
420 self
.fs
.mon_manager
.raw_cluster_cmd(
421 'tell', 'mds.{0}'.format(self
.fs
.get_active_names()[0]),
422 "damage", "ls", '--format=json-pretty'))
423 self
.assertEqual(len(damage
), 1)
424 damage_id
= damage
[0]['id']
426 # If I try to create a dentry with the same name as the damaged guy
427 # then that should be forbidden
429 self
.mount_a
.touch("subdir/file_to_be_damaged")
430 except CommandFailedError
as e
:
431 self
.assertEqual(e
.exitstatus
, errno
.EIO
)
433 raise AssertionError("Expected EIO")
435 # Attempting that touch will clear the client's complete flag, now
436 # when I stat it I'll get EIO instead of ENOENT
438 self
.mount_a
.stat("subdir/file_to_be_damaged", wait
=True)
439 except CommandFailedError
as e
:
440 if isinstance(self
.mount_a
, FuseMount
):
441 self
.assertEqual(e
.exitstatus
, errno
.EIO
)
443 # Kernel client handles this case differently
444 self
.assertEqual(e
.exitstatus
, errno
.ENOENT
)
446 raise AssertionError("Expected EIO")
448 nfiles
= self
.mount_a
.getfattr("./subdir", "ceph.dir.files")
449 self
.assertEqual(nfiles
, "2")
451 self
.mount_a
.umount_wait()
453 # Now repair the stats
454 scrub_json
= self
.fs
.rank_tell(["scrub", "start", "/subdir", "repair"])
455 log
.info(json
.dumps(scrub_json
, indent
=2))
457 self
.assertEqual(scrub_json
["passed_validation"], False)
458 self
.assertEqual(scrub_json
["raw_stats"]["checked"], True)
459 self
.assertEqual(scrub_json
["raw_stats"]["passed"], False)
461 # Check that the file count is now correct
462 self
.mount_a
.mount_wait()
463 nfiles
= self
.mount_a
.getfattr("./subdir", "ceph.dir.files")
464 self
.assertEqual(nfiles
, "1")
466 # Clean up the omap object
467 self
.fs
.rados(["setomapval", dirfrag_obj
, "file_to_be_damaged_head", junk
])
469 # Clean up the damagetable entry
470 self
.fs
.mon_manager
.raw_cluster_cmd(
471 'tell', 'mds.{0}'.format(self
.fs
.get_active_names()[0]),
472 "damage", "rm", "{did}".format(did
=damage_id
))
474 # Now I should be able to create a file with the same name as the
475 # damaged guy if I want.
476 self
.mount_a
.touch("subdir/file_to_be_damaged")
478 def test_open_ino_errors(self
):
480 That errors encountered during opening inos are properly propagated
483 self
.mount_a
.run_shell(["mkdir", "dir1"])
484 self
.mount_a
.run_shell(["touch", "dir1/file1"])
485 self
.mount_a
.run_shell(["mkdir", "dir2"])
486 self
.mount_a
.run_shell(["touch", "dir2/file2"])
487 self
.mount_a
.run_shell(["mkdir", "testdir"])
488 self
.mount_a
.run_shell(["ln", "dir1/file1", "testdir/hardlink1"])
489 self
.mount_a
.run_shell(["ln", "dir2/file2", "testdir/hardlink2"])
491 file1_ino
= self
.mount_a
.path_to_ino("dir1/file1")
492 file2_ino
= self
.mount_a
.path_to_ino("dir2/file2")
493 dir2_ino
= self
.mount_a
.path_to_ino("dir2")
495 # Ensure everything is written to backing store
496 self
.mount_a
.umount_wait()
497 self
.fs
.mds_asok(["flush", "journal"])
499 # Drop everything from the MDS cache
500 self
.mds_cluster
.mds_stop()
501 self
.fs
.journal_tool(['journal', 'reset'], 0)
502 self
.mds_cluster
.mds_fail_restart()
503 self
.fs
.wait_for_daemons()
505 self
.mount_a
.mount_wait()
507 # Case 1: un-decodeable backtrace
509 # Validate that the backtrace is present and decodable
510 self
.fs
.read_backtrace(file1_ino
)
511 # Go corrupt the backtrace of alpha/target (used for resolving
513 self
.fs
._write
_data
_xattr
(file1_ino
, "parent", "rhubarb")
515 # Check that touching the hardlink gives EIO
516 ran
= self
.mount_a
.run_shell(["stat", "testdir/hardlink1"], wait
=False)
519 except CommandFailedError
:
520 self
.assertTrue("Input/output error" in ran
.stderr
.getvalue())
522 # Check that an entry is created in the damage table
524 self
.fs
.mon_manager
.raw_cluster_cmd(
525 'tell', 'mds.{0}'.format(self
.fs
.get_active_names()[0]),
526 "damage", "ls", '--format=json-pretty'))
527 self
.assertEqual(len(damage
), 1)
528 self
.assertEqual(damage
[0]['damage_type'], "backtrace")
529 self
.assertEqual(damage
[0]['ino'], file1_ino
)
531 self
.fs
.mon_manager
.raw_cluster_cmd(
532 'tell', 'mds.{0}'.format(self
.fs
.get_active_names()[0]),
533 "damage", "rm", str(damage
[0]['id']))
536 # Case 2: missing dirfrag for the target inode
538 self
.fs
.rados(["rm", "{0:x}.00000000".format(dir2_ino
)])
540 # Check that touching the hardlink gives EIO
541 ran
= self
.mount_a
.run_shell(["stat", "testdir/hardlink2"], wait
=False)
544 except CommandFailedError
:
545 self
.assertTrue("Input/output error" in ran
.stderr
.getvalue())
547 # Check that an entry is created in the damage table
549 self
.fs
.mon_manager
.raw_cluster_cmd(
550 'tell', 'mds.{0}'.format(self
.fs
.get_active_names()[0]),
551 "damage", "ls", '--format=json-pretty'))
552 self
.assertEqual(len(damage
), 2)
553 if damage
[0]['damage_type'] == "backtrace" :
554 self
.assertEqual(damage
[0]['ino'], file2_ino
)
555 self
.assertEqual(damage
[1]['damage_type'], "dir_frag")
556 self
.assertEqual(damage
[1]['ino'], dir2_ino
)
558 self
.assertEqual(damage
[0]['damage_type'], "dir_frag")
559 self
.assertEqual(damage
[0]['ino'], dir2_ino
)
560 self
.assertEqual(damage
[1]['damage_type'], "backtrace")
561 self
.assertEqual(damage
[1]['ino'], file2_ino
)
564 self
.fs
.mon_manager
.raw_cluster_cmd(
565 'tell', 'mds.{0}'.format(self
.fs
.get_active_names()[0]),
566 "damage", "rm", str(entry
['id']))