]> git.proxmox.com Git - ceph.git/blame - ceph/qa/tasks/cephfs/test_damage.py
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / qa / tasks / cephfs / test_damage.py
CommitLineData
f67539c2 1from io import BytesIO, StringIO
7c673cae
FG
2import json
3import logging
4import errno
5import re
6from teuthology.contextutil import MaxWhileTries
7from teuthology.exceptions import CommandFailedError
8from teuthology.orchestra.run import wait
9from tasks.cephfs.fuse_mount import FuseMount
10from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology
11
12DAMAGED_ON_START = "damaged_on_start"
13DAMAGED_ON_LS = "damaged_on_ls"
14CRASHED = "server crashed"
15NO_DAMAGE = "no damage"
f64942e4 16READONLY = "readonly"
7c673cae
FG
17FAILED_CLIENT = "client failed"
18FAILED_SERVER = "server failed"
19
20# An EIO in response to a stat from the client
21EIO_ON_LS = "eio"
22
23# An EIO, but nothing in damage table (not ever what we expect)
24EIO_NO_DAMAGE = "eio without damage entry"
25
26
27log = logging.getLogger(__name__)
28
29
30class TestDamage(CephFSTestCase):
31 def _simple_workload_write(self):
32 self.mount_a.run_shell(["mkdir", "subdir"])
33 self.mount_a.write_n_mb("subdir/sixmegs", 6)
34 return self.mount_a.stat("subdir/sixmegs")
35
36 def is_marked_damaged(self, rank):
37 mds_map = self.fs.get_mds_map()
38 return rank in mds_map['damaged']
39
40 @for_teuthology #459s
41 def test_object_deletion(self):
42 """
43 That the MDS has a clean 'damaged' response to loss of any single metadata object
44 """
45
46 self._simple_workload_write()
47
48 # Hmm, actually it would be nice to permute whether the metadata pool
49 # state contains sessions or not, but for the moment close this session
50 # to avoid waiting through reconnect on every MDS start.
51 self.mount_a.umount_wait()
52 for mds_name in self.fs.get_active_names():
53 self.fs.mds_asok(["flush", "journal"], mds_name)
54
f67539c2 55 self.fs.fail()
7c673cae 56
f67539c2 57 serialized = self.fs.radosmo(['export', '-'])
7c673cae
FG
58
59 def is_ignored(obj_id, dentry=None):
60 """
61 A filter to avoid redundantly mutating many similar objects (e.g.
62 stray dirfrags) or similar dentries (e.g. stray dir dentries)
63 """
64 if re.match("60.\.00000000", obj_id) and obj_id != "600.00000000":
65 return True
66
67 if dentry and obj_id == "100.00000000":
68 if re.match("stray.+_head", dentry) and dentry != "stray0_head":
69 return True
70
71 return False
72
73 def get_path(obj_id, dentry=None):
74 """
75 What filesystem path does this object or dentry correspond to? i.e.
76 what should I poke to see EIO after damaging it?
77 """
78
79 if obj_id == "1.00000000" and dentry == "subdir_head":
80 return "./subdir"
81 elif obj_id == "10000000000.00000000" and dentry == "sixmegs_head":
82 return "./subdir/sixmegs"
83
84 # None means ls will do an "ls -R" in hope of seeing some errors
85 return None
86
f67539c2 87 objects = self.fs.radosmo(["ls"], stdout=StringIO()).strip().split("\n")
7c673cae
FG
88 objects = [o for o in objects if not is_ignored(o)]
89
90 # Find all objects with an OMAP header
91 omap_header_objs = []
92 for o in objects:
f67539c2 93 header = self.fs.radosmo(["getomapheader", o], stdout=StringIO())
7c673cae
FG
94 # The rados CLI wraps the header output in a hex-printed style
95 header_bytes = int(re.match("header \((.+) bytes\)", header).group(1))
96 if header_bytes > 0:
97 omap_header_objs.append(o)
98
99 # Find all OMAP key/vals
100 omap_keys = []
101 for o in objects:
f67539c2 102 keys_str = self.fs.radosmo(["listomapkeys", o], stdout=StringIO())
7c673cae 103 if keys_str:
f67539c2 104 for key in keys_str.strip().split("\n"):
7c673cae
FG
105 if not is_ignored(o, key):
106 omap_keys.append((o, key))
107
108 # Find objects that have data in their bodies
109 data_objects = []
110 for obj_id in objects:
f67539c2 111 stat_out = self.fs.radosmo(["stat", obj_id], stdout=StringIO())
7c673cae
FG
112 size = int(re.match(".+, size (.+)$", stat_out).group(1))
113 if size > 0:
114 data_objects.append(obj_id)
115
116 # Define the various forms of damage we will inflict
117 class MetadataMutation(object):
118 def __init__(self, obj_id_, desc_, mutate_fn_, expectation_, ls_path=None):
119 self.obj_id = obj_id_
120 self.desc = desc_
121 self.mutate_fn = mutate_fn_
122 self.expectation = expectation_
123 if ls_path is None:
124 self.ls_path = "."
125 else:
126 self.ls_path = ls_path
127
128 def __eq__(self, other):
129 return self.desc == other.desc
130
131 def __hash__(self):
132 return hash(self.desc)
133
134 junk = "deadbeef" * 10
135 mutations = []
136
137 # Removals
f64942e4
AA
138 for o in objects:
139 if o in [
7c673cae
FG
140 # JournalPointers are auto-replaced if missing (same path as upgrade)
141 "400.00000000",
142 # Missing dirfrags for non-system dirs result in empty directory
143 "10000000000.00000000",
144 # PurgeQueue is auto-created if not found on startup
11fdf7f2
TL
145 "500.00000000",
146 # open file table is auto-created if not found on startup
147 "mds0_openfiles.0"
7c673cae
FG
148 ]:
149 expectation = NO_DAMAGE
150 else:
151 expectation = DAMAGED_ON_START
152
153 log.info("Expectation on rm '{0}' will be '{1}'".format(
f64942e4 154 o, expectation
7c673cae
FG
155 ))
156
157 mutations.append(MetadataMutation(
f64942e4
AA
158 o,
159 "Delete {0}".format(o),
f67539c2 160 lambda o=o: self.fs.radosm(["rm", o]),
7c673cae
FG
161 expectation
162 ))
163
164 # Blatant corruptions
7c673cae
FG
165 for obj_id in data_objects:
166 if obj_id == "500.00000000":
f64942e4
AA
167 # purge queue corruption results in read-only FS
168 mutations.append(MetadataMutation(
169 obj_id,
170 "Corrupt {0}".format(obj_id),
f67539c2 171 lambda o=obj_id: self.fs.radosm(["put", o, "-"], stdin=StringIO(junk)),
f64942e4
AA
172 READONLY
173 ))
174 else:
175 mutations.append(MetadataMutation(
176 obj_id,
177 "Corrupt {0}".format(obj_id),
f67539c2 178 lambda o=obj_id: self.fs.radosm(["put", o, "-"], stdin=StringIO(junk)),
f64942e4
AA
179 DAMAGED_ON_START
180 ))
181
182 # Truncations
183 for o in data_objects:
184 if o == "500.00000000":
7c673cae
FG
185 # The PurgeQueue is allowed to be empty: Journaler interprets
186 # an empty header object as an empty journal.
187 expectation = NO_DAMAGE
188 else:
189 expectation = DAMAGED_ON_START
190
191 mutations.append(
192 MetadataMutation(
193 o,
194 "Truncate {0}".format(o),
f67539c2 195 lambda o=o: self.fs.radosm(["truncate", o, "0"]),
f64942e4 196 expectation
7c673cae
FG
197 ))
198
199 # OMAP value corruptions
200 for o, k in omap_keys:
201 if o.startswith("100."):
202 # Anything in rank 0's 'mydir'
203 expectation = DAMAGED_ON_START
204 else:
205 expectation = EIO_ON_LS
206
207 mutations.append(
208 MetadataMutation(
209 o,
210 "Corrupt omap key {0}:{1}".format(o, k),
f67539c2 211 lambda o=o,k=k: self.fs.radosm(["setomapval", o, k, junk]),
7c673cae
FG
212 expectation,
213 get_path(o, k)
214 )
215 )
216
217 # OMAP header corruptions
f64942e4
AA
218 for o in omap_header_objs:
219 if re.match("60.\.00000000", o) \
220 or o in ["1.00000000", "100.00000000", "mds0_sessionmap"]:
7c673cae
FG
221 expectation = DAMAGED_ON_START
222 else:
223 expectation = NO_DAMAGE
224
225 log.info("Expectation on corrupt header '{0}' will be '{1}'".format(
f64942e4 226 o, expectation
7c673cae
FG
227 ))
228
229 mutations.append(
230 MetadataMutation(
f64942e4
AA
231 o,
232 "Corrupt omap header on {0}".format(o),
f67539c2 233 lambda o=o: self.fs.radosm(["setomapheader", o, junk]),
7c673cae
FG
234 expectation
235 )
236 )
237
238 results = {}
239
240 for mutation in mutations:
241 log.info("Applying mutation '{0}'".format(mutation.desc))
242
243 # Reset MDS state
244 self.mount_a.umount_wait(force=True)
f67539c2 245 self.fs.fail()
7c673cae
FG
246 self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', '0')
247
248 # Reset RADOS pool state
f67539c2 249 self.fs.radosm(['import', '-'], stdin=BytesIO(serialized))
7c673cae
FG
250
251 # Inject the mutation
252 mutation.mutate_fn()
253
254 # Try starting the MDS
f67539c2 255 self.fs.set_joinable()
7c673cae
FG
256
257 # How long we'll wait between starting a daemon and expecting
258 # it to make it through startup, and potentially declare itself
259 # damaged to the mon cluster.
260 startup_timeout = 60
261
262 if mutation.expectation not in (EIO_ON_LS, DAMAGED_ON_LS, NO_DAMAGE):
263 if mutation.expectation == DAMAGED_ON_START:
264 # The MDS may pass through active before making it to damaged
265 try:
266 self.wait_until_true(lambda: self.is_marked_damaged(0), startup_timeout)
267 except RuntimeError:
268 pass
269
270 # Wait for MDS to either come up or go into damaged state
271 try:
272 self.wait_until_true(lambda: self.is_marked_damaged(0) or self.fs.are_daemons_healthy(), startup_timeout)
273 except RuntimeError:
274 crashed = False
275 # Didn't make it to healthy or damaged, did it crash?
276 for daemon_id, daemon in self.fs.mds_daemons.items():
277 if daemon.proc and daemon.proc.finished:
278 crashed = True
279 log.error("Daemon {0} crashed!".format(daemon_id))
280 daemon.proc = None # So that subsequent stop() doesn't raise error
281 if not crashed:
282 # Didn't go health, didn't go damaged, didn't crash, so what?
283 raise
284 else:
285 log.info("Result: Mutation '{0}' led to crash".format(mutation.desc))
286 results[mutation] = CRASHED
287 continue
288 if self.is_marked_damaged(0):
289 log.info("Result: Mutation '{0}' led to DAMAGED state".format(mutation.desc))
290 results[mutation] = DAMAGED_ON_START
291 continue
292 else:
293 log.info("Mutation '{0}' did not prevent MDS startup, attempting ls...".format(mutation.desc))
294 else:
295 try:
296 self.wait_until_true(self.fs.are_daemons_healthy, 60)
297 except RuntimeError:
298 log.info("Result: Mutation '{0}' should have left us healthy, actually not.".format(mutation.desc))
299 if self.is_marked_damaged(0):
300 results[mutation] = DAMAGED_ON_START
301 else:
302 results[mutation] = FAILED_SERVER
303 continue
304 log.info("Daemons came up after mutation '{0}', proceeding to ls".format(mutation.desc))
305
306 # MDS is up, should go damaged on ls or client mount
e306af50 307 self.mount_a.mount_wait()
7c673cae
FG
308 if mutation.ls_path == ".":
309 proc = self.mount_a.run_shell(["ls", "-R", mutation.ls_path], wait=False)
310 else:
311 proc = self.mount_a.stat(mutation.ls_path, wait=False)
312
313 if mutation.expectation == DAMAGED_ON_LS:
314 try:
315 self.wait_until_true(lambda: self.is_marked_damaged(0), 60)
316 log.info("Result: Mutation '{0}' led to DAMAGED state after ls".format(mutation.desc))
317 results[mutation] = DAMAGED_ON_LS
318 except RuntimeError:
319 if self.fs.are_daemons_healthy():
320 log.error("Result: Failed to go damaged on mutation '{0}', actually went active".format(
321 mutation.desc))
322 results[mutation] = NO_DAMAGE
323 else:
324 log.error("Result: Failed to go damaged on mutation '{0}'".format(mutation.desc))
325 results[mutation] = FAILED_SERVER
f64942e4
AA
326 elif mutation.expectation == READONLY:
327 proc = self.mount_a.run_shell(["mkdir", "foo"], wait=False)
328 try:
329 proc.wait()
330 except CommandFailedError:
331 stderr = proc.stderr.getvalue()
332 log.info(stderr)
333 if "Read-only file system".lower() in stderr.lower():
334 pass
335 else:
336 raise
7c673cae
FG
337 else:
338 try:
339 wait([proc], 20)
340 log.info("Result: Mutation '{0}' did not caused DAMAGED state".format(mutation.desc))
341 results[mutation] = NO_DAMAGE
342 except MaxWhileTries:
343 log.info("Result: Failed to complete client IO on mutation '{0}'".format(mutation.desc))
344 results[mutation] = FAILED_CLIENT
345 except CommandFailedError as e:
346 if e.exitstatus == errno.EIO:
347 log.info("Result: EIO on client")
348 results[mutation] = EIO_ON_LS
349 else:
350 log.info("Result: unexpected error {0} on client".format(e))
351 results[mutation] = FAILED_CLIENT
352
353 if mutation.expectation == EIO_ON_LS:
354 # EIOs mean something handled by DamageTable: assert that it has
355 # been populated
356 damage = json.loads(
357 self.fs.mon_manager.raw_cluster_cmd(
358 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), "damage", "ls", '--format=json-pretty'))
359 if len(damage) == 0:
360 results[mutation] = EIO_NO_DAMAGE
361
362 failures = [(mutation, result) for (mutation, result) in results.items() if mutation.expectation != result]
363 if failures:
364 log.error("{0} mutations had unexpected outcomes:".format(len(failures)))
365 for mutation, result in failures:
366 log.error(" Expected '{0}' actually '{1}' from '{2}'".format(
367 mutation.expectation, result, mutation.desc
368 ))
369 raise RuntimeError("{0} mutations had unexpected outcomes".format(len(failures)))
370 else:
371 log.info("All {0} mutations had expected outcomes".format(len(mutations)))
372
373 def test_damaged_dentry(self):
374 # Damage to dentrys is interesting because it leaves the
375 # directory's `complete` flag in a subtle state where
376 # we have marked the dir complete in order that folks
377 # can access it, but in actual fact there is a dentry
378 # missing
379 self.mount_a.run_shell(["mkdir", "subdir/"])
380
381 self.mount_a.run_shell(["touch", "subdir/file_undamaged"])
382 self.mount_a.run_shell(["touch", "subdir/file_to_be_damaged"])
383
384 subdir_ino = self.mount_a.path_to_ino("subdir")
385
386 self.mount_a.umount_wait()
387 for mds_name in self.fs.get_active_names():
388 self.fs.mds_asok(["flush", "journal"], mds_name)
389
f67539c2 390 self.fs.fail()
7c673cae
FG
391
392 # Corrupt a dentry
393 junk = "deadbeef" * 10
394 dirfrag_obj = "{0:x}.00000000".format(subdir_ino)
f67539c2 395 self.fs.radosm(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk])
7c673cae
FG
396
397 # Start up and try to list it
f67539c2 398 self.fs.set_joinable()
7c673cae
FG
399 self.fs.wait_for_daemons()
400
e306af50 401 self.mount_a.mount_wait()
7c673cae
FG
402 dentries = self.mount_a.ls("subdir/")
403
404 # The damaged guy should have disappeared
405 self.assertEqual(dentries, ["file_undamaged"])
406
407 # I should get ENOENT if I try and read it normally, because
408 # the dir is considered complete
409 try:
410 self.mount_a.stat("subdir/file_to_be_damaged", wait=True)
411 except CommandFailedError as e:
412 self.assertEqual(e.exitstatus, errno.ENOENT)
413 else:
414 raise AssertionError("Expected ENOENT")
415
416 # The fact that there is damaged should have bee recorded
417 damage = json.loads(
418 self.fs.mon_manager.raw_cluster_cmd(
419 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
420 "damage", "ls", '--format=json-pretty'))
421 self.assertEqual(len(damage), 1)
422 damage_id = damage[0]['id']
423
424 # If I try to create a dentry with the same name as the damaged guy
425 # then that should be forbidden
426 try:
427 self.mount_a.touch("subdir/file_to_be_damaged")
428 except CommandFailedError as e:
429 self.assertEqual(e.exitstatus, errno.EIO)
430 else:
431 raise AssertionError("Expected EIO")
432
433 # Attempting that touch will clear the client's complete flag, now
434 # when I stat it I'll get EIO instead of ENOENT
435 try:
436 self.mount_a.stat("subdir/file_to_be_damaged", wait=True)
437 except CommandFailedError as e:
438 if isinstance(self.mount_a, FuseMount):
439 self.assertEqual(e.exitstatus, errno.EIO)
440 else:
f67539c2
TL
441 # Old kernel client handles this case differently
442 self.assertIn(e.exitstatus, [errno.ENOENT, errno.EIO])
7c673cae
FG
443 else:
444 raise AssertionError("Expected EIO")
445
446 nfiles = self.mount_a.getfattr("./subdir", "ceph.dir.files")
447 self.assertEqual(nfiles, "2")
448
449 self.mount_a.umount_wait()
450
451 # Now repair the stats
f67539c2 452 scrub_json = self.fs.run_scrub(["start", "/subdir", "repair"])
7c673cae
FG
453 log.info(json.dumps(scrub_json, indent=2))
454
f67539c2
TL
455 self.assertNotEqual(scrub_json, None)
456 self.assertEqual(scrub_json["return_code"], 0)
457 self.assertEqual(self.fs.wait_until_scrub_complete(tag=scrub_json["scrub_tag"]), True)
7c673cae
FG
458
459 # Check that the file count is now correct
e306af50 460 self.mount_a.mount_wait()
7c673cae
FG
461 nfiles = self.mount_a.getfattr("./subdir", "ceph.dir.files")
462 self.assertEqual(nfiles, "1")
463
464 # Clean up the omap object
f67539c2 465 self.fs.radosm(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk])
7c673cae
FG
466
467 # Clean up the damagetable entry
468 self.fs.mon_manager.raw_cluster_cmd(
469 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
470 "damage", "rm", "{did}".format(did=damage_id))
471
472 # Now I should be able to create a file with the same name as the
473 # damaged guy if I want.
474 self.mount_a.touch("subdir/file_to_be_damaged")
475
476 def test_open_ino_errors(self):
477 """
478 That errors encountered during opening inos are properly propagated
479 """
480
481 self.mount_a.run_shell(["mkdir", "dir1"])
482 self.mount_a.run_shell(["touch", "dir1/file1"])
483 self.mount_a.run_shell(["mkdir", "dir2"])
484 self.mount_a.run_shell(["touch", "dir2/file2"])
485 self.mount_a.run_shell(["mkdir", "testdir"])
486 self.mount_a.run_shell(["ln", "dir1/file1", "testdir/hardlink1"])
487 self.mount_a.run_shell(["ln", "dir2/file2", "testdir/hardlink2"])
488
489 file1_ino = self.mount_a.path_to_ino("dir1/file1")
490 file2_ino = self.mount_a.path_to_ino("dir2/file2")
491 dir2_ino = self.mount_a.path_to_ino("dir2")
492
493 # Ensure everything is written to backing store
494 self.mount_a.umount_wait()
495 self.fs.mds_asok(["flush", "journal"])
496
497 # Drop everything from the MDS cache
f67539c2 498 self.fs.fail()
f64942e4 499 self.fs.journal_tool(['journal', 'reset'], 0)
f67539c2 500 self.fs.set_joinable()
7c673cae
FG
501 self.fs.wait_for_daemons()
502
e306af50 503 self.mount_a.mount_wait()
7c673cae
FG
504
505 # Case 1: un-decodeable backtrace
506
507 # Validate that the backtrace is present and decodable
508 self.fs.read_backtrace(file1_ino)
509 # Go corrupt the backtrace of alpha/target (used for resolving
510 # bravo/hardlink).
511 self.fs._write_data_xattr(file1_ino, "parent", "rhubarb")
512
513 # Check that touching the hardlink gives EIO
514 ran = self.mount_a.run_shell(["stat", "testdir/hardlink1"], wait=False)
515 try:
516 ran.wait()
517 except CommandFailedError:
518 self.assertTrue("Input/output error" in ran.stderr.getvalue())
519
520 # Check that an entry is created in the damage table
521 damage = json.loads(
522 self.fs.mon_manager.raw_cluster_cmd(
523 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
524 "damage", "ls", '--format=json-pretty'))
525 self.assertEqual(len(damage), 1)
526 self.assertEqual(damage[0]['damage_type'], "backtrace")
527 self.assertEqual(damage[0]['ino'], file1_ino)
528
529 self.fs.mon_manager.raw_cluster_cmd(
530 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
531 "damage", "rm", str(damage[0]['id']))
532
533
534 # Case 2: missing dirfrag for the target inode
535
f67539c2 536 self.fs.radosm(["rm", "{0:x}.00000000".format(dir2_ino)])
7c673cae
FG
537
538 # Check that touching the hardlink gives EIO
539 ran = self.mount_a.run_shell(["stat", "testdir/hardlink2"], wait=False)
540 try:
541 ran.wait()
542 except CommandFailedError:
543 self.assertTrue("Input/output error" in ran.stderr.getvalue())
544
545 # Check that an entry is created in the damage table
546 damage = json.loads(
547 self.fs.mon_manager.raw_cluster_cmd(
548 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
549 "damage", "ls", '--format=json-pretty'))
550 self.assertEqual(len(damage), 2)
551 if damage[0]['damage_type'] == "backtrace" :
552 self.assertEqual(damage[0]['ino'], file2_ino)
553 self.assertEqual(damage[1]['damage_type'], "dir_frag")
554 self.assertEqual(damage[1]['ino'], dir2_ino)
555 else:
556 self.assertEqual(damage[0]['damage_type'], "dir_frag")
557 self.assertEqual(damage[0]['ino'], dir2_ino)
558 self.assertEqual(damage[1]['damage_type'], "backtrace")
559 self.assertEqual(damage[1]['ino'], file2_ino)
560
561 for entry in damage:
562 self.fs.mon_manager.raw_cluster_cmd(
563 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
564 "damage", "rm", str(entry['id']))