]> git.proxmox.com Git - ceph.git/blame - ceph/qa/tasks/thrashosds.py
import quincy beta 17.1.0
[ceph.git] / ceph / qa / tasks / thrashosds.py
CommitLineData
7c673cae
FG
1"""
2Thrash -- Simulate random osd failures.
3"""
4import contextlib
5import logging
9f95a23c 6from tasks import ceph_manager
7c673cae
FG
7from teuthology import misc as teuthology
8
9
10log = logging.getLogger(__name__)
11
12@contextlib.contextmanager
13def task(ctx, config):
14 """
15 "Thrash" the OSDs by randomly marking them out/down (and then back
16 in) until the task is ended. This loops, and every op_delay
17 seconds it randomly chooses to add or remove an OSD (even odds)
18 unless there are fewer than min_out OSDs out of the cluster, or
19 more than min_in OSDs in the cluster.
20
21 All commands are run on mon0 and it stops when __exit__ is called.
22
23 The config is optional, and is a dict containing some or all of:
24
25 cluster: (default 'ceph') the name of the cluster to thrash
26
3efd9988 27 min_in: (default 4) the minimum number of OSDs to keep in the
7c673cae
FG
28 cluster
29
30 min_out: (default 0) the minimum number of OSDs to keep out of the
31 cluster
32
33 op_delay: (5) the length of time to sleep between changing an
34 OSD's status
35
36 min_dead: (0) minimum number of osds to leave down/dead.
37
38 max_dead: (0) maximum number of osds to leave down/dead before waiting
39 for clean. This should probably be num_replicas - 1.
40
41 clean_interval: (60) the approximate length of time to loop before
42 waiting until the cluster goes clean. (In reality this is used
43 to probabilistically choose when to wait, and the method used
44 makes it closer to -- but not identical to -- the half-life.)
45
46 scrub_interval: (-1) the approximate length of time to loop before
47 waiting until a scrub is performed while cleaning. (In reality
48 this is used to probabilistically choose when to wait, and it
49 only applies to the cases where cleaning is being performed).
50 -1 is used to indicate that no scrubbing will be done.
51
52 chance_down: (0.4) the probability that the thrasher will mark an
53 OSD down rather than marking it out. (The thrasher will not
54 consider that OSD out of the cluster, since presently an OSD
55 wrongly marked down will mark itself back up again.) This value
56 can be either an integer (eg, 75) or a float probability (eg
57 0.75).
58
59 chance_test_min_size: (0) chance to run test_pool_min_size,
60 which:
61 - kills all but one osd
62 - waits
63 - kills that osd
64 - revives all other osds
65 - verifies that the osds fully recover
66
67 timeout: (360) the number of seconds to wait for the cluster
68 to become clean after each cluster change. If this doesn't
69 happen within the timeout, an exception will be raised.
70
71 revive_timeout: (150) number of seconds to wait for an osd asok to
72 appear after attempting to revive the osd
73
74 thrash_primary_affinity: (true) randomly adjust primary-affinity
75
76 chance_pgnum_grow: (0) chance to increase a pool's size
77 chance_pgpnum_fix: (0) chance to adjust pgpnum to pg for a pool
78 pool_grow_by: (10) amount to increase pgnum by
11fdf7f2
TL
79 chance_pgnum_shrink: (0) chance to decrease a pool's size
80 pool_shrink_by: (10) amount to decrease pgnum by
7c673cae
FG
81 max_pgs_per_pool_osd: (1200) don't expand pools past this size per osd
82
83 pause_short: (3) duration of short pause
84 pause_long: (80) duration of long pause
85 pause_check_after: (50) assert osd down after this long
86 chance_inject_pause_short: (1) chance of injecting short stall
87 chance_inject_pause_long: (0) chance of injecting long stall
88
89 clean_wait: (0) duration to wait before resuming thrashing once clean
90
91 sighup_delay: (0.1) duration to delay between sending signal.SIGHUP to a
92 random live osd
93
94 powercycle: (false) whether to power cycle the node instead
95 of just the osd process. Note that this assumes that a single
96 osd is the only important process on the node.
97
98 bdev_inject_crash: (0) seconds to delay while inducing a synthetic crash.
99 the delay lets the BlockDevice "accept" more aio operations but blocks
100 any flush, and then eventually crashes (losing some or all ios). If 0,
101 no bdev failure injection is enabled.
102
103 bdev_inject_crash_probability: (.5) probability of doing a bdev failure
104 injection crash vs a normal OSD kill.
105
106 chance_test_backfill_full: (0) chance to simulate full disks stopping
107 backfill
108
109 chance_test_map_discontinuity: (0) chance to test map discontinuity
110 map_discontinuity_sleep_time: (40) time to wait for map trims
111
112 ceph_objectstore_tool: (true) whether to export/import a pg while an osd is down
113 chance_move_pg: (1.0) chance of moving a pg if more than 1 osd is down (default 100%)
114
115 optrack_toggle_delay: (2.0) duration to delay between toggling op tracker
116 enablement to all osds
117
118 dump_ops_enable: (true) continuously dump ops on all live osds
119
120 noscrub_toggle_delay: (2.0) duration to delay between toggling noscrub
121
122 disable_objectstore_tool_tests: (false) disable ceph_objectstore_tool based
123 tests
124
125 chance_thrash_cluster_full: .05
126
127 chance_thrash_pg_upmap: 1.0
128 chance_thrash_pg_upmap_items: 1.0
129
11fdf7f2
TL
130 aggressive_pg_num_changes: (true) whether we should bypass the careful throttling of pg_num and pgp_num changes in mgr's adjust_pgs() controller
131
7c673cae
FG
132 example:
133
134 tasks:
135 - ceph:
136 - thrashosds:
137 cluster: ceph
138 chance_down: 10
139 op_delay: 3
140 min_in: 1
141 timeout: 600
142 - interactive:
143 """
144 if config is None:
145 config = {}
146 assert isinstance(config, dict), \
147 'thrashosds task only accepts a dict for configuration'
148 # add default value for sighup_delay
149 config['sighup_delay'] = config.get('sighup_delay', 0.1)
150 # add default value for optrack_toggle_delay
151 config['optrack_toggle_delay'] = config.get('optrack_toggle_delay', 2.0)
152 # add default value for dump_ops_enable
153 config['dump_ops_enable'] = config.get('dump_ops_enable', "true")
154 # add default value for noscrub_toggle_delay
155 config['noscrub_toggle_delay'] = config.get('noscrub_toggle_delay', 2.0)
224ce89b
WB
156 # add default value for random_eio
157 config['random_eio'] = config.get('random_eio', 0.0)
11fdf7f2 158 aggro = config.get('aggressive_pg_num_changes', True)
7c673cae
FG
159
160 log.info("config is {config}".format(config=str(config)))
161
162 overrides = ctx.config.get('overrides', {})
163 log.info("overrides is {overrides}".format(overrides=str(overrides)))
164 teuthology.deep_merge(config, overrides.get('thrashosds', {}))
165 cluster = config.get('cluster', 'ceph')
166
167 log.info("config is {config}".format(config=str(config)))
168
169 if 'powercycle' in config:
170
171 # sync everyone first to avoid collateral damage to / etc.
172 log.info('Doing preliminary sync to avoid collateral damage...')
173 ctx.cluster.run(args=['sync'])
174
175 if 'ipmi_user' in ctx.teuthology_config:
176 for remote in ctx.cluster.remotes.keys():
177 log.debug('checking console status of %s' % remote.shortname)
178 if not remote.console.check_status():
e306af50 179 log.warning('Failed to get console status for %s',
7c673cae
FG
180 remote.shortname)
181
182 # check that all osd remotes have a valid console
183 osds = ctx.cluster.only(teuthology.is_type('osd', cluster))
184 for remote in osds.remotes.keys():
185 if not remote.console.has_ipmi_credentials:
186 raise Exception(
187 'IPMI console required for powercycling, '
188 'but not available on osd role: {r}'.format(
189 r=remote.name))
190
191 cluster_manager = ctx.managers[cluster]
192 for f in ['powercycle', 'bdev_inject_crash']:
193 if config.get(f):
194 cluster_manager.config[f] = config.get(f)
195
11fdf7f2
TL
196 if aggro:
197 cluster_manager.raw_cluster_cmd(
198 'config', 'set', 'mgr',
199 'mgr_debug_aggressive_pg_num_changes',
200 'true')
201
7c673cae 202 log.info('Beginning thrashosds...')
9f95a23c 203 thrash_proc = ceph_manager.OSDThrasher(
7c673cae
FG
204 cluster_manager,
205 config,
9f95a23c 206 "OSDThrasher",
7c673cae
FG
207 logger=log.getChild('thrasher')
208 )
9f95a23c 209 ctx.ceph[cluster].thrashers.append(thrash_proc)
7c673cae
FG
210 try:
211 yield
212 finally:
213 log.info('joining thrashosds')
214 thrash_proc.do_join()
c07f9fc5 215 cluster_manager.wait_for_all_osds_up()
31f18b77 216 cluster_manager.flush_all_pg_stats()
7c673cae 217 cluster_manager.wait_for_recovery(config.get('timeout', 360))
11fdf7f2
TL
218 if aggro:
219 cluster_manager.raw_cluster_cmd(
220 'config', 'rm', 'mgr',
221 'mgr_debug_aggressive_pg_num_changes')