]> git.proxmox.com Git - ceph.git/blame - ceph/qa/tasks/cephfs/test_damage.py
bump version to 12.2.12-pve1
[ceph.git] / ceph / qa / tasks / cephfs / test_damage.py
CommitLineData
7c673cae
FG
1import json
2import logging
3import errno
4import re
5from teuthology.contextutil import MaxWhileTries
6from teuthology.exceptions import CommandFailedError
7from teuthology.orchestra.run import wait
8from tasks.cephfs.fuse_mount import FuseMount
9from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology
10
11DAMAGED_ON_START = "damaged_on_start"
12DAMAGED_ON_LS = "damaged_on_ls"
13CRASHED = "server crashed"
14NO_DAMAGE = "no damage"
f64942e4 15READONLY = "readonly"
7c673cae
FG
16FAILED_CLIENT = "client failed"
17FAILED_SERVER = "server failed"
18
19# An EIO in response to a stat from the client
20EIO_ON_LS = "eio"
21
22# An EIO, but nothing in damage table (not ever what we expect)
23EIO_NO_DAMAGE = "eio without damage entry"
24
25
26log = logging.getLogger(__name__)
27
28
29class TestDamage(CephFSTestCase):
30 def _simple_workload_write(self):
31 self.mount_a.run_shell(["mkdir", "subdir"])
32 self.mount_a.write_n_mb("subdir/sixmegs", 6)
33 return self.mount_a.stat("subdir/sixmegs")
34
35 def is_marked_damaged(self, rank):
36 mds_map = self.fs.get_mds_map()
37 return rank in mds_map['damaged']
38
39 @for_teuthology #459s
40 def test_object_deletion(self):
41 """
42 That the MDS has a clean 'damaged' response to loss of any single metadata object
43 """
44
45 self._simple_workload_write()
46
47 # Hmm, actually it would be nice to permute whether the metadata pool
48 # state contains sessions or not, but for the moment close this session
49 # to avoid waiting through reconnect on every MDS start.
50 self.mount_a.umount_wait()
51 for mds_name in self.fs.get_active_names():
52 self.fs.mds_asok(["flush", "journal"], mds_name)
53
54 self.fs.mds_stop()
55 self.fs.mds_fail()
56
57 self.fs.rados(['export', '/tmp/metadata.bin'])
58
59 def is_ignored(obj_id, dentry=None):
60 """
61 A filter to avoid redundantly mutating many similar objects (e.g.
62 stray dirfrags) or similar dentries (e.g. stray dir dentries)
63 """
64 if re.match("60.\.00000000", obj_id) and obj_id != "600.00000000":
65 return True
66
67 if dentry and obj_id == "100.00000000":
68 if re.match("stray.+_head", dentry) and dentry != "stray0_head":
69 return True
70
71 return False
72
73 def get_path(obj_id, dentry=None):
74 """
75 What filesystem path does this object or dentry correspond to? i.e.
76 what should I poke to see EIO after damaging it?
77 """
78
79 if obj_id == "1.00000000" and dentry == "subdir_head":
80 return "./subdir"
81 elif obj_id == "10000000000.00000000" and dentry == "sixmegs_head":
82 return "./subdir/sixmegs"
83
84 # None means ls will do an "ls -R" in hope of seeing some errors
85 return None
86
87 objects = self.fs.rados(["ls"]).split("\n")
88 objects = [o for o in objects if not is_ignored(o)]
89
90 # Find all objects with an OMAP header
91 omap_header_objs = []
92 for o in objects:
93 header = self.fs.rados(["getomapheader", o])
94 # The rados CLI wraps the header output in a hex-printed style
95 header_bytes = int(re.match("header \((.+) bytes\)", header).group(1))
96 if header_bytes > 0:
97 omap_header_objs.append(o)
98
99 # Find all OMAP key/vals
100 omap_keys = []
101 for o in objects:
102 keys_str = self.fs.rados(["listomapkeys", o])
103 if keys_str:
104 for key in keys_str.split("\n"):
105 if not is_ignored(o, key):
106 omap_keys.append((o, key))
107
108 # Find objects that have data in their bodies
109 data_objects = []
110 for obj_id in objects:
111 stat_out = self.fs.rados(["stat", obj_id])
112 size = int(re.match(".+, size (.+)$", stat_out).group(1))
113 if size > 0:
114 data_objects.append(obj_id)
115
116 # Define the various forms of damage we will inflict
117 class MetadataMutation(object):
118 def __init__(self, obj_id_, desc_, mutate_fn_, expectation_, ls_path=None):
119 self.obj_id = obj_id_
120 self.desc = desc_
121 self.mutate_fn = mutate_fn_
122 self.expectation = expectation_
123 if ls_path is None:
124 self.ls_path = "."
125 else:
126 self.ls_path = ls_path
127
128 def __eq__(self, other):
129 return self.desc == other.desc
130
131 def __hash__(self):
132 return hash(self.desc)
133
134 junk = "deadbeef" * 10
135 mutations = []
136
137 # Removals
f64942e4
AA
138 for o in objects:
139 if o in [
7c673cae
FG
140 # JournalPointers are auto-replaced if missing (same path as upgrade)
141 "400.00000000",
142 # Missing dirfrags for non-system dirs result in empty directory
143 "10000000000.00000000",
144 # PurgeQueue is auto-created if not found on startup
145 "500.00000000"
146 ]:
147 expectation = NO_DAMAGE
148 else:
149 expectation = DAMAGED_ON_START
150
151 log.info("Expectation on rm '{0}' will be '{1}'".format(
f64942e4 152 o, expectation
7c673cae
FG
153 ))
154
155 mutations.append(MetadataMutation(
f64942e4
AA
156 o,
157 "Delete {0}".format(o),
158 lambda o=o: self.fs.rados(["rm", o]),
7c673cae
FG
159 expectation
160 ))
161
162 # Blatant corruptions
7c673cae
FG
163 for obj_id in data_objects:
164 if obj_id == "500.00000000":
f64942e4
AA
165 # purge queue corruption results in read-only FS
166 mutations.append(MetadataMutation(
167 obj_id,
168 "Corrupt {0}".format(obj_id),
169 lambda o=obj_id: self.fs.rados(["put", o, "-"], stdin_data=junk),
170 READONLY
171 ))
172 else:
173 mutations.append(MetadataMutation(
174 obj_id,
175 "Corrupt {0}".format(obj_id),
176 lambda o=obj_id: self.fs.rados(["put", o, "-"], stdin_data=junk),
177 DAMAGED_ON_START
178 ))
179
180 # Truncations
181 for o in data_objects:
182 if o == "500.00000000":
7c673cae
FG
183 # The PurgeQueue is allowed to be empty: Journaler interprets
184 # an empty header object as an empty journal.
185 expectation = NO_DAMAGE
186 else:
187 expectation = DAMAGED_ON_START
188
189 mutations.append(
190 MetadataMutation(
191 o,
192 "Truncate {0}".format(o),
193 lambda o=o: self.fs.rados(["truncate", o, "0"]),
f64942e4 194 expectation
7c673cae
FG
195 ))
196
197 # OMAP value corruptions
198 for o, k in omap_keys:
199 if o.startswith("100."):
200 # Anything in rank 0's 'mydir'
201 expectation = DAMAGED_ON_START
202 else:
203 expectation = EIO_ON_LS
204
205 mutations.append(
206 MetadataMutation(
207 o,
208 "Corrupt omap key {0}:{1}".format(o, k),
209 lambda o=o,k=k: self.fs.rados(["setomapval", o, k, junk]),
210 expectation,
211 get_path(o, k)
212 )
213 )
214
215 # OMAP header corruptions
f64942e4
AA
216 for o in omap_header_objs:
217 if re.match("60.\.00000000", o) \
218 or o in ["1.00000000", "100.00000000", "mds0_sessionmap"]:
7c673cae
FG
219 expectation = DAMAGED_ON_START
220 else:
221 expectation = NO_DAMAGE
222
223 log.info("Expectation on corrupt header '{0}' will be '{1}'".format(
f64942e4 224 o, expectation
7c673cae
FG
225 ))
226
227 mutations.append(
228 MetadataMutation(
f64942e4
AA
229 o,
230 "Corrupt omap header on {0}".format(o),
231 lambda o=o: self.fs.rados(["setomapheader", o, junk]),
7c673cae
FG
232 expectation
233 )
234 )
235
236 results = {}
237
238 for mutation in mutations:
239 log.info("Applying mutation '{0}'".format(mutation.desc))
240
241 # Reset MDS state
242 self.mount_a.umount_wait(force=True)
243 self.fs.mds_stop()
244 self.fs.mds_fail()
245 self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', '0')
246
247 # Reset RADOS pool state
248 self.fs.rados(['import', '/tmp/metadata.bin'])
249
250 # Inject the mutation
251 mutation.mutate_fn()
252
253 # Try starting the MDS
254 self.fs.mds_restart()
255
256 # How long we'll wait between starting a daemon and expecting
257 # it to make it through startup, and potentially declare itself
258 # damaged to the mon cluster.
259 startup_timeout = 60
260
261 if mutation.expectation not in (EIO_ON_LS, DAMAGED_ON_LS, NO_DAMAGE):
262 if mutation.expectation == DAMAGED_ON_START:
263 # The MDS may pass through active before making it to damaged
264 try:
265 self.wait_until_true(lambda: self.is_marked_damaged(0), startup_timeout)
266 except RuntimeError:
267 pass
268
269 # Wait for MDS to either come up or go into damaged state
270 try:
271 self.wait_until_true(lambda: self.is_marked_damaged(0) or self.fs.are_daemons_healthy(), startup_timeout)
272 except RuntimeError:
273 crashed = False
274 # Didn't make it to healthy or damaged, did it crash?
275 for daemon_id, daemon in self.fs.mds_daemons.items():
276 if daemon.proc and daemon.proc.finished:
277 crashed = True
278 log.error("Daemon {0} crashed!".format(daemon_id))
279 daemon.proc = None # So that subsequent stop() doesn't raise error
280 if not crashed:
281 # Didn't go health, didn't go damaged, didn't crash, so what?
282 raise
283 else:
284 log.info("Result: Mutation '{0}' led to crash".format(mutation.desc))
285 results[mutation] = CRASHED
286 continue
287 if self.is_marked_damaged(0):
288 log.info("Result: Mutation '{0}' led to DAMAGED state".format(mutation.desc))
289 results[mutation] = DAMAGED_ON_START
290 continue
291 else:
292 log.info("Mutation '{0}' did not prevent MDS startup, attempting ls...".format(mutation.desc))
293 else:
294 try:
295 self.wait_until_true(self.fs.are_daemons_healthy, 60)
296 except RuntimeError:
297 log.info("Result: Mutation '{0}' should have left us healthy, actually not.".format(mutation.desc))
298 if self.is_marked_damaged(0):
299 results[mutation] = DAMAGED_ON_START
300 else:
301 results[mutation] = FAILED_SERVER
302 continue
303 log.info("Daemons came up after mutation '{0}', proceeding to ls".format(mutation.desc))
304
305 # MDS is up, should go damaged on ls or client mount
306 self.mount_a.mount()
307 self.mount_a.wait_until_mounted()
308 if mutation.ls_path == ".":
309 proc = self.mount_a.run_shell(["ls", "-R", mutation.ls_path], wait=False)
310 else:
311 proc = self.mount_a.stat(mutation.ls_path, wait=False)
312
313 if mutation.expectation == DAMAGED_ON_LS:
314 try:
315 self.wait_until_true(lambda: self.is_marked_damaged(0), 60)
316 log.info("Result: Mutation '{0}' led to DAMAGED state after ls".format(mutation.desc))
317 results[mutation] = DAMAGED_ON_LS
318 except RuntimeError:
319 if self.fs.are_daemons_healthy():
320 log.error("Result: Failed to go damaged on mutation '{0}', actually went active".format(
321 mutation.desc))
322 results[mutation] = NO_DAMAGE
323 else:
324 log.error("Result: Failed to go damaged on mutation '{0}'".format(mutation.desc))
325 results[mutation] = FAILED_SERVER
f64942e4
AA
326 elif mutation.expectation == READONLY:
327 proc = self.mount_a.run_shell(["mkdir", "foo"], wait=False)
328 try:
329 proc.wait()
330 except CommandFailedError:
331 stderr = proc.stderr.getvalue()
332 log.info(stderr)
333 if "Read-only file system".lower() in stderr.lower():
334 pass
335 else:
336 raise
7c673cae
FG
337 else:
338 try:
339 wait([proc], 20)
340 log.info("Result: Mutation '{0}' did not caused DAMAGED state".format(mutation.desc))
341 results[mutation] = NO_DAMAGE
342 except MaxWhileTries:
343 log.info("Result: Failed to complete client IO on mutation '{0}'".format(mutation.desc))
344 results[mutation] = FAILED_CLIENT
345 except CommandFailedError as e:
346 if e.exitstatus == errno.EIO:
347 log.info("Result: EIO on client")
348 results[mutation] = EIO_ON_LS
349 else:
350 log.info("Result: unexpected error {0} on client".format(e))
351 results[mutation] = FAILED_CLIENT
352
353 if mutation.expectation == EIO_ON_LS:
354 # EIOs mean something handled by DamageTable: assert that it has
355 # been populated
356 damage = json.loads(
357 self.fs.mon_manager.raw_cluster_cmd(
358 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), "damage", "ls", '--format=json-pretty'))
359 if len(damage) == 0:
360 results[mutation] = EIO_NO_DAMAGE
361
362 failures = [(mutation, result) for (mutation, result) in results.items() if mutation.expectation != result]
363 if failures:
364 log.error("{0} mutations had unexpected outcomes:".format(len(failures)))
365 for mutation, result in failures:
366 log.error(" Expected '{0}' actually '{1}' from '{2}'".format(
367 mutation.expectation, result, mutation.desc
368 ))
369 raise RuntimeError("{0} mutations had unexpected outcomes".format(len(failures)))
370 else:
371 log.info("All {0} mutations had expected outcomes".format(len(mutations)))
372
373 def test_damaged_dentry(self):
374 # Damage to dentrys is interesting because it leaves the
375 # directory's `complete` flag in a subtle state where
376 # we have marked the dir complete in order that folks
377 # can access it, but in actual fact there is a dentry
378 # missing
379 self.mount_a.run_shell(["mkdir", "subdir/"])
380
381 self.mount_a.run_shell(["touch", "subdir/file_undamaged"])
382 self.mount_a.run_shell(["touch", "subdir/file_to_be_damaged"])
383
384 subdir_ino = self.mount_a.path_to_ino("subdir")
385
386 self.mount_a.umount_wait()
387 for mds_name in self.fs.get_active_names():
388 self.fs.mds_asok(["flush", "journal"], mds_name)
389
390 self.fs.mds_stop()
391 self.fs.mds_fail()
392
393 # Corrupt a dentry
394 junk = "deadbeef" * 10
395 dirfrag_obj = "{0:x}.00000000".format(subdir_ino)
396 self.fs.rados(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk])
397
398 # Start up and try to list it
399 self.fs.mds_restart()
400 self.fs.wait_for_daemons()
401
402 self.mount_a.mount()
403 self.mount_a.wait_until_mounted()
404 dentries = self.mount_a.ls("subdir/")
405
406 # The damaged guy should have disappeared
407 self.assertEqual(dentries, ["file_undamaged"])
408
409 # I should get ENOENT if I try and read it normally, because
410 # the dir is considered complete
411 try:
412 self.mount_a.stat("subdir/file_to_be_damaged", wait=True)
413 except CommandFailedError as e:
414 self.assertEqual(e.exitstatus, errno.ENOENT)
415 else:
416 raise AssertionError("Expected ENOENT")
417
418 # The fact that there is damaged should have bee recorded
419 damage = json.loads(
420 self.fs.mon_manager.raw_cluster_cmd(
421 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
422 "damage", "ls", '--format=json-pretty'))
423 self.assertEqual(len(damage), 1)
424 damage_id = damage[0]['id']
425
426 # If I try to create a dentry with the same name as the damaged guy
427 # then that should be forbidden
428 try:
429 self.mount_a.touch("subdir/file_to_be_damaged")
430 except CommandFailedError as e:
431 self.assertEqual(e.exitstatus, errno.EIO)
432 else:
433 raise AssertionError("Expected EIO")
434
435 # Attempting that touch will clear the client's complete flag, now
436 # when I stat it I'll get EIO instead of ENOENT
437 try:
438 self.mount_a.stat("subdir/file_to_be_damaged", wait=True)
439 except CommandFailedError as e:
440 if isinstance(self.mount_a, FuseMount):
441 self.assertEqual(e.exitstatus, errno.EIO)
442 else:
443 # Kernel client handles this case differently
444 self.assertEqual(e.exitstatus, errno.ENOENT)
445 else:
446 raise AssertionError("Expected EIO")
447
448 nfiles = self.mount_a.getfattr("./subdir", "ceph.dir.files")
449 self.assertEqual(nfiles, "2")
450
451 self.mount_a.umount_wait()
452
453 # Now repair the stats
454 scrub_json = self.fs.mds_asok(["scrub_path", "/subdir", "repair"])
455 log.info(json.dumps(scrub_json, indent=2))
456
457 self.assertEqual(scrub_json["passed_validation"], False)
458 self.assertEqual(scrub_json["raw_stats"]["checked"], True)
459 self.assertEqual(scrub_json["raw_stats"]["passed"], False)
460
461 # Check that the file count is now correct
462 self.mount_a.mount()
463 self.mount_a.wait_until_mounted()
464 nfiles = self.mount_a.getfattr("./subdir", "ceph.dir.files")
465 self.assertEqual(nfiles, "1")
466
467 # Clean up the omap object
468 self.fs.rados(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk])
469
470 # Clean up the damagetable entry
471 self.fs.mon_manager.raw_cluster_cmd(
472 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
473 "damage", "rm", "{did}".format(did=damage_id))
474
475 # Now I should be able to create a file with the same name as the
476 # damaged guy if I want.
477 self.mount_a.touch("subdir/file_to_be_damaged")
478
479 def test_open_ino_errors(self):
480 """
481 That errors encountered during opening inos are properly propagated
482 """
483
484 self.mount_a.run_shell(["mkdir", "dir1"])
485 self.mount_a.run_shell(["touch", "dir1/file1"])
486 self.mount_a.run_shell(["mkdir", "dir2"])
487 self.mount_a.run_shell(["touch", "dir2/file2"])
488 self.mount_a.run_shell(["mkdir", "testdir"])
489 self.mount_a.run_shell(["ln", "dir1/file1", "testdir/hardlink1"])
490 self.mount_a.run_shell(["ln", "dir2/file2", "testdir/hardlink2"])
491
492 file1_ino = self.mount_a.path_to_ino("dir1/file1")
493 file2_ino = self.mount_a.path_to_ino("dir2/file2")
494 dir2_ino = self.mount_a.path_to_ino("dir2")
495
496 # Ensure everything is written to backing store
497 self.mount_a.umount_wait()
498 self.fs.mds_asok(["flush", "journal"])
499
500 # Drop everything from the MDS cache
501 self.mds_cluster.mds_stop()
f64942e4 502 self.fs.journal_tool(['journal', 'reset'], 0)
7c673cae
FG
503 self.mds_cluster.mds_fail_restart()
504 self.fs.wait_for_daemons()
505
506 self.mount_a.mount()
507
508 # Case 1: un-decodeable backtrace
509
510 # Validate that the backtrace is present and decodable
511 self.fs.read_backtrace(file1_ino)
512 # Go corrupt the backtrace of alpha/target (used for resolving
513 # bravo/hardlink).
514 self.fs._write_data_xattr(file1_ino, "parent", "rhubarb")
515
516 # Check that touching the hardlink gives EIO
517 ran = self.mount_a.run_shell(["stat", "testdir/hardlink1"], wait=False)
518 try:
519 ran.wait()
520 except CommandFailedError:
521 self.assertTrue("Input/output error" in ran.stderr.getvalue())
522
523 # Check that an entry is created in the damage table
524 damage = json.loads(
525 self.fs.mon_manager.raw_cluster_cmd(
526 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
527 "damage", "ls", '--format=json-pretty'))
528 self.assertEqual(len(damage), 1)
529 self.assertEqual(damage[0]['damage_type'], "backtrace")
530 self.assertEqual(damage[0]['ino'], file1_ino)
531
532 self.fs.mon_manager.raw_cluster_cmd(
533 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
534 "damage", "rm", str(damage[0]['id']))
535
536
537 # Case 2: missing dirfrag for the target inode
538
539 self.fs.rados(["rm", "{0:x}.00000000".format(dir2_ino)])
540
541 # Check that touching the hardlink gives EIO
542 ran = self.mount_a.run_shell(["stat", "testdir/hardlink2"], wait=False)
543 try:
544 ran.wait()
545 except CommandFailedError:
546 self.assertTrue("Input/output error" in ran.stderr.getvalue())
547
548 # Check that an entry is created in the damage table
549 damage = json.loads(
550 self.fs.mon_manager.raw_cluster_cmd(
551 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
552 "damage", "ls", '--format=json-pretty'))
553 self.assertEqual(len(damage), 2)
554 if damage[0]['damage_type'] == "backtrace" :
555 self.assertEqual(damage[0]['ino'], file2_ino)
556 self.assertEqual(damage[1]['damage_type'], "dir_frag")
557 self.assertEqual(damage[1]['ino'], dir2_ino)
558 else:
559 self.assertEqual(damage[0]['damage_type'], "dir_frag")
560 self.assertEqual(damage[0]['ino'], dir2_ino)
561 self.assertEqual(damage[1]['damage_type'], "backtrace")
562 self.assertEqual(damage[1]['ino'], file2_ino)
563
564 for entry in damage:
565 self.fs.mon_manager.raw_cluster_cmd(
566 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
567 "damage", "rm", str(entry['id']))