]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | """ |
2 | Thrash -- Simulate random osd failures. | |
3 | """ | |
4 | import contextlib | |
5 | import logging | |
9f95a23c | 6 | from tasks import ceph_manager |
7c673cae FG |
7 | from teuthology import misc as teuthology |
8 | ||
9 | ||
10 | log = logging.getLogger(__name__) | |
11 | ||
12 | @contextlib.contextmanager | |
13 | def task(ctx, config): | |
14 | """ | |
15 | "Thrash" the OSDs by randomly marking them out/down (and then back | |
16 | in) until the task is ended. This loops, and every op_delay | |
17 | seconds it randomly chooses to add or remove an OSD (even odds) | |
18 | unless there are fewer than min_out OSDs out of the cluster, or | |
19 | more than min_in OSDs in the cluster. | |
20 | ||
21 | All commands are run on mon0 and it stops when __exit__ is called. | |
22 | ||
23 | The config is optional, and is a dict containing some or all of: | |
24 | ||
25 | cluster: (default 'ceph') the name of the cluster to thrash | |
26 | ||
3efd9988 | 27 | min_in: (default 4) the minimum number of OSDs to keep in the |
7c673cae FG |
28 | cluster |
29 | ||
30 | min_out: (default 0) the minimum number of OSDs to keep out of the | |
31 | cluster | |
32 | ||
33 | op_delay: (5) the length of time to sleep between changing an | |
34 | OSD's status | |
35 | ||
36 | min_dead: (0) minimum number of osds to leave down/dead. | |
37 | ||
38 | max_dead: (0) maximum number of osds to leave down/dead before waiting | |
39 | for clean. This should probably be num_replicas - 1. | |
40 | ||
41 | clean_interval: (60) the approximate length of time to loop before | |
42 | waiting until the cluster goes clean. (In reality this is used | |
43 | to probabilistically choose when to wait, and the method used | |
44 | makes it closer to -- but not identical to -- the half-life.) | |
45 | ||
46 | scrub_interval: (-1) the approximate length of time to loop before | |
47 | waiting until a scrub is performed while cleaning. (In reality | |
48 | this is used to probabilistically choose when to wait, and it | |
49 | only applies to the cases where cleaning is being performed). | |
50 | -1 is used to indicate that no scrubbing will be done. | |
51 | ||
52 | chance_down: (0.4) the probability that the thrasher will mark an | |
53 | OSD down rather than marking it out. (The thrasher will not | |
54 | consider that OSD out of the cluster, since presently an OSD | |
55 | wrongly marked down will mark itself back up again.) This value | |
56 | can be either an integer (eg, 75) or a float probability (eg | |
57 | 0.75). | |
58 | ||
59 | chance_test_min_size: (0) chance to run test_pool_min_size, | |
60 | which: | |
61 | - kills all but one osd | |
62 | - waits | |
63 | - kills that osd | |
64 | - revives all other osds | |
65 | - verifies that the osds fully recover | |
66 | ||
67 | timeout: (360) the number of seconds to wait for the cluster | |
68 | to become clean after each cluster change. If this doesn't | |
69 | happen within the timeout, an exception will be raised. | |
70 | ||
71 | revive_timeout: (150) number of seconds to wait for an osd asok to | |
72 | appear after attempting to revive the osd | |
73 | ||
74 | thrash_primary_affinity: (true) randomly adjust primary-affinity | |
75 | ||
76 | chance_pgnum_grow: (0) chance to increase a pool's size | |
77 | chance_pgpnum_fix: (0) chance to adjust pgpnum to pg for a pool | |
78 | pool_grow_by: (10) amount to increase pgnum by | |
11fdf7f2 TL |
79 | chance_pgnum_shrink: (0) chance to decrease a pool's size |
80 | pool_shrink_by: (10) amount to decrease pgnum by | |
7c673cae FG |
81 | max_pgs_per_pool_osd: (1200) don't expand pools past this size per osd |
82 | ||
83 | pause_short: (3) duration of short pause | |
84 | pause_long: (80) duration of long pause | |
85 | pause_check_after: (50) assert osd down after this long | |
86 | chance_inject_pause_short: (1) chance of injecting short stall | |
87 | chance_inject_pause_long: (0) chance of injecting long stall | |
88 | ||
89 | clean_wait: (0) duration to wait before resuming thrashing once clean | |
90 | ||
91 | sighup_delay: (0.1) duration to delay between sending signal.SIGHUP to a | |
92 | random live osd | |
93 | ||
94 | powercycle: (false) whether to power cycle the node instead | |
95 | of just the osd process. Note that this assumes that a single | |
96 | osd is the only important process on the node. | |
97 | ||
98 | bdev_inject_crash: (0) seconds to delay while inducing a synthetic crash. | |
99 | the delay lets the BlockDevice "accept" more aio operations but blocks | |
100 | any flush, and then eventually crashes (losing some or all ios). If 0, | |
101 | no bdev failure injection is enabled. | |
102 | ||
103 | bdev_inject_crash_probability: (.5) probability of doing a bdev failure | |
104 | injection crash vs a normal OSD kill. | |
105 | ||
106 | chance_test_backfill_full: (0) chance to simulate full disks stopping | |
107 | backfill | |
108 | ||
109 | chance_test_map_discontinuity: (0) chance to test map discontinuity | |
110 | map_discontinuity_sleep_time: (40) time to wait for map trims | |
111 | ||
112 | ceph_objectstore_tool: (true) whether to export/import a pg while an osd is down | |
113 | chance_move_pg: (1.0) chance of moving a pg if more than 1 osd is down (default 100%) | |
114 | ||
115 | optrack_toggle_delay: (2.0) duration to delay between toggling op tracker | |
116 | enablement to all osds | |
117 | ||
118 | dump_ops_enable: (true) continuously dump ops on all live osds | |
119 | ||
120 | noscrub_toggle_delay: (2.0) duration to delay between toggling noscrub | |
121 | ||
122 | disable_objectstore_tool_tests: (false) disable ceph_objectstore_tool based | |
123 | tests | |
124 | ||
125 | chance_thrash_cluster_full: .05 | |
126 | ||
127 | chance_thrash_pg_upmap: 1.0 | |
128 | chance_thrash_pg_upmap_items: 1.0 | |
129 | ||
11fdf7f2 TL |
130 | aggressive_pg_num_changes: (true) whether we should bypass the careful throttling of pg_num and pgp_num changes in mgr's adjust_pgs() controller |
131 | ||
7c673cae FG |
132 | example: |
133 | ||
134 | tasks: | |
135 | - ceph: | |
136 | - thrashosds: | |
137 | cluster: ceph | |
138 | chance_down: 10 | |
139 | op_delay: 3 | |
140 | min_in: 1 | |
141 | timeout: 600 | |
142 | - interactive: | |
143 | """ | |
144 | if config is None: | |
145 | config = {} | |
146 | assert isinstance(config, dict), \ | |
147 | 'thrashosds task only accepts a dict for configuration' | |
148 | # add default value for sighup_delay | |
149 | config['sighup_delay'] = config.get('sighup_delay', 0.1) | |
150 | # add default value for optrack_toggle_delay | |
151 | config['optrack_toggle_delay'] = config.get('optrack_toggle_delay', 2.0) | |
152 | # add default value for dump_ops_enable | |
153 | config['dump_ops_enable'] = config.get('dump_ops_enable', "true") | |
154 | # add default value for noscrub_toggle_delay | |
155 | config['noscrub_toggle_delay'] = config.get('noscrub_toggle_delay', 2.0) | |
224ce89b WB |
156 | # add default value for random_eio |
157 | config['random_eio'] = config.get('random_eio', 0.0) | |
11fdf7f2 | 158 | aggro = config.get('aggressive_pg_num_changes', True) |
7c673cae FG |
159 | |
160 | log.info("config is {config}".format(config=str(config))) | |
161 | ||
162 | overrides = ctx.config.get('overrides', {}) | |
163 | log.info("overrides is {overrides}".format(overrides=str(overrides))) | |
164 | teuthology.deep_merge(config, overrides.get('thrashosds', {})) | |
165 | cluster = config.get('cluster', 'ceph') | |
166 | ||
167 | log.info("config is {config}".format(config=str(config))) | |
168 | ||
169 | if 'powercycle' in config: | |
170 | ||
171 | # sync everyone first to avoid collateral damage to / etc. | |
172 | log.info('Doing preliminary sync to avoid collateral damage...') | |
173 | ctx.cluster.run(args=['sync']) | |
174 | ||
175 | if 'ipmi_user' in ctx.teuthology_config: | |
176 | for remote in ctx.cluster.remotes.keys(): | |
177 | log.debug('checking console status of %s' % remote.shortname) | |
178 | if not remote.console.check_status(): | |
e306af50 | 179 | log.warning('Failed to get console status for %s', |
7c673cae FG |
180 | remote.shortname) |
181 | ||
182 | # check that all osd remotes have a valid console | |
183 | osds = ctx.cluster.only(teuthology.is_type('osd', cluster)) | |
184 | for remote in osds.remotes.keys(): | |
185 | if not remote.console.has_ipmi_credentials: | |
186 | raise Exception( | |
187 | 'IPMI console required for powercycling, ' | |
188 | 'but not available on osd role: {r}'.format( | |
189 | r=remote.name)) | |
190 | ||
191 | cluster_manager = ctx.managers[cluster] | |
192 | for f in ['powercycle', 'bdev_inject_crash']: | |
193 | if config.get(f): | |
194 | cluster_manager.config[f] = config.get(f) | |
195 | ||
11fdf7f2 TL |
196 | if aggro: |
197 | cluster_manager.raw_cluster_cmd( | |
198 | 'config', 'set', 'mgr', | |
199 | 'mgr_debug_aggressive_pg_num_changes', | |
200 | 'true') | |
201 | ||
7c673cae | 202 | log.info('Beginning thrashosds...') |
9f95a23c | 203 | thrash_proc = ceph_manager.OSDThrasher( |
7c673cae FG |
204 | cluster_manager, |
205 | config, | |
9f95a23c | 206 | "OSDThrasher", |
7c673cae FG |
207 | logger=log.getChild('thrasher') |
208 | ) | |
9f95a23c | 209 | ctx.ceph[cluster].thrashers.append(thrash_proc) |
7c673cae FG |
210 | try: |
211 | yield | |
212 | finally: | |
213 | log.info('joining thrashosds') | |
214 | thrash_proc.do_join() | |
c07f9fc5 | 215 | cluster_manager.wait_for_all_osds_up() |
31f18b77 | 216 | cluster_manager.flush_all_pg_stats() |
7c673cae | 217 | cluster_manager.wait_for_recovery(config.get('timeout', 360)) |
11fdf7f2 TL |
218 | if aggro: |
219 | cluster_manager.raw_cluster_cmd( | |
220 | 'config', 'rm', 'mgr', | |
221 | 'mgr_debug_aggressive_pg_num_changes') |