]>
git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/dump_stuck.py
237d9127fc1487589dd1d114f4cd3cc2af8ce5ee
8 from teuthology
import misc
as teuthology
11 log
= logging
.getLogger(__name__
)
13 def check_stuck(manager
, num_inactive
, num_unclean
, num_stale
, timeout
=10):
15 Do checks. Make sure get_stuck_pgs return the right amount of information, then
16 extract health information from the raw_cluster_cmd and compare the results with
17 values passed in. This passes if all asserts pass.
19 :param num_manager: Ceph manager
20 :param num_inactive: number of inaactive pages that are stuck
21 :param num_unclean: number of unclean pages that are stuck
22 :param num_stale: number of stale pages that are stuck
23 :param timeout: timeout value for get_stuck_pgs calls
25 inactive
= manager
.get_stuck_pgs('inactive', timeout
)
26 unclean
= manager
.get_stuck_pgs('unclean', timeout
)
27 stale
= manager
.get_stuck_pgs('stale', timeout
)
28 log
.info('inactive %s / %d, unclean %s / %d, stale %s / %d',
29 len(inactive
), num_inactive
,
30 len(unclean
), num_unclean
,
31 len(stale
), num_stale
)
32 assert len(inactive
) == num_inactive
33 assert len(unclean
) == num_unclean
34 assert len(stale
) == num_stale
36 def task(ctx
, config
):
38 Test the dump_stuck command.
41 :param config: Configuration
43 assert config
is None, \
44 'dump_stuck requires no configuration'
45 assert teuthology
.num_instances_of_type(ctx
.cluster
, 'osd') == 2, \
46 'dump_stuck requires exactly 2 osds'
49 first_mon
= teuthology
.get_first_mon(ctx
, config
)
50 (mon
,) = ctx
.cluster
.only(first_mon
).remotes
.keys()
52 manager
= ceph_manager
.CephManager(
55 logger
=log
.getChild('ceph_manager'),
58 manager
.flush_pg_stats([0, 1])
59 manager
.wait_for_clean(timeout
)
61 manager
.raw_cluster_cmd('tell', 'mon.a', 'injectargs', '--',
62 # '--mon-osd-report-timeout 90',
63 '--mon-pg-stuck-threshold 10')
72 num_pgs
= manager
.get_num_pgs()
74 manager
.mark_out_osd(0)
76 manager
.flush_pg_stats([1])
77 manager
.wait_for_recovery(timeout
)
79 # all active+clean+remapped
87 manager
.mark_in_osd(0)
88 manager
.flush_pg_stats([0, 1])
89 manager
.wait_for_clean(timeout
)
99 log
.info('stopping first osd')
101 manager
.mark_down_osd(0)
102 manager
.wait_for_active(timeout
)
104 log
.info('waiting for all to be unclean')
105 starttime
= time
.time()
116 except AssertionError:
117 # wait up to 15 minutes to become stale
118 if time
.time() - starttime
> 900:
122 log
.info('stopping second osd')
124 manager
.mark_down_osd(1)
126 log
.info('waiting for all to be stale')
127 starttime
= time
.time()
138 except AssertionError:
139 # wait up to 15 minutes to become stale
140 if time
.time() - starttime
> 900:
144 for id_
in teuthology
.all_roles_of_type(ctx
.cluster
, 'osd'):
145 manager
.revive_osd(id_
)
146 manager
.mark_in_osd(id_
)
149 manager
.flush_pg_stats([0, 1])
152 log
.exception('osds must not be started yet, waiting...')
154 manager
.wait_for_clean(timeout
)