]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | """ |
2 | Thrash -- Simulate random osd failures. | |
3 | """ | |
4 | import contextlib | |
5 | import logging | |
6 | import ceph_manager | |
7 | from teuthology import misc as teuthology | |
8 | ||
9 | ||
10 | log = logging.getLogger(__name__) | |
11 | ||
12 | @contextlib.contextmanager | |
13 | def task(ctx, config): | |
14 | """ | |
15 | "Thrash" the OSDs by randomly marking them out/down (and then back | |
16 | in) until the task is ended. This loops, and every op_delay | |
17 | seconds it randomly chooses to add or remove an OSD (even odds) | |
18 | unless there are fewer than min_out OSDs out of the cluster, or | |
19 | more than min_in OSDs in the cluster. | |
20 | ||
21 | All commands are run on mon0 and it stops when __exit__ is called. | |
22 | ||
23 | The config is optional, and is a dict containing some or all of: | |
24 | ||
25 | cluster: (default 'ceph') the name of the cluster to thrash | |
26 | ||
27 | min_in: (default 3) the minimum number of OSDs to keep in the | |
28 | cluster | |
29 | ||
30 | min_out: (default 0) the minimum number of OSDs to keep out of the | |
31 | cluster | |
32 | ||
33 | op_delay: (5) the length of time to sleep between changing an | |
34 | OSD's status | |
35 | ||
36 | min_dead: (0) minimum number of osds to leave down/dead. | |
37 | ||
38 | max_dead: (0) maximum number of osds to leave down/dead before waiting | |
39 | for clean. This should probably be num_replicas - 1. | |
40 | ||
41 | clean_interval: (60) the approximate length of time to loop before | |
42 | waiting until the cluster goes clean. (In reality this is used | |
43 | to probabilistically choose when to wait, and the method used | |
44 | makes it closer to -- but not identical to -- the half-life.) | |
45 | ||
46 | scrub_interval: (-1) the approximate length of time to loop before | |
47 | waiting until a scrub is performed while cleaning. (In reality | |
48 | this is used to probabilistically choose when to wait, and it | |
49 | only applies to the cases where cleaning is being performed). | |
50 | -1 is used to indicate that no scrubbing will be done. | |
51 | ||
52 | chance_down: (0.4) the probability that the thrasher will mark an | |
53 | OSD down rather than marking it out. (The thrasher will not | |
54 | consider that OSD out of the cluster, since presently an OSD | |
55 | wrongly marked down will mark itself back up again.) This value | |
56 | can be either an integer (eg, 75) or a float probability (eg | |
57 | 0.75). | |
58 | ||
59 | chance_test_min_size: (0) chance to run test_pool_min_size, | |
60 | which: | |
61 | - kills all but one osd | |
62 | - waits | |
63 | - kills that osd | |
64 | - revives all other osds | |
65 | - verifies that the osds fully recover | |
66 | ||
67 | timeout: (360) the number of seconds to wait for the cluster | |
68 | to become clean after each cluster change. If this doesn't | |
69 | happen within the timeout, an exception will be raised. | |
70 | ||
71 | revive_timeout: (150) number of seconds to wait for an osd asok to | |
72 | appear after attempting to revive the osd | |
73 | ||
74 | thrash_primary_affinity: (true) randomly adjust primary-affinity | |
75 | ||
76 | chance_pgnum_grow: (0) chance to increase a pool's size | |
77 | chance_pgpnum_fix: (0) chance to adjust pgpnum to pg for a pool | |
78 | pool_grow_by: (10) amount to increase pgnum by | |
79 | max_pgs_per_pool_osd: (1200) don't expand pools past this size per osd | |
80 | ||
81 | pause_short: (3) duration of short pause | |
82 | pause_long: (80) duration of long pause | |
83 | pause_check_after: (50) assert osd down after this long | |
84 | chance_inject_pause_short: (1) chance of injecting short stall | |
85 | chance_inject_pause_long: (0) chance of injecting long stall | |
86 | ||
87 | clean_wait: (0) duration to wait before resuming thrashing once clean | |
88 | ||
89 | sighup_delay: (0.1) duration to delay between sending signal.SIGHUP to a | |
90 | random live osd | |
91 | ||
92 | powercycle: (false) whether to power cycle the node instead | |
93 | of just the osd process. Note that this assumes that a single | |
94 | osd is the only important process on the node. | |
95 | ||
96 | bdev_inject_crash: (0) seconds to delay while inducing a synthetic crash. | |
97 | the delay lets the BlockDevice "accept" more aio operations but blocks | |
98 | any flush, and then eventually crashes (losing some or all ios). If 0, | |
99 | no bdev failure injection is enabled. | |
100 | ||
101 | bdev_inject_crash_probability: (.5) probability of doing a bdev failure | |
102 | injection crash vs a normal OSD kill. | |
103 | ||
104 | chance_test_backfill_full: (0) chance to simulate full disks stopping | |
105 | backfill | |
106 | ||
107 | chance_test_map_discontinuity: (0) chance to test map discontinuity | |
108 | map_discontinuity_sleep_time: (40) time to wait for map trims | |
109 | ||
110 | ceph_objectstore_tool: (true) whether to export/import a pg while an osd is down | |
111 | chance_move_pg: (1.0) chance of moving a pg if more than 1 osd is down (default 100%) | |
112 | ||
113 | optrack_toggle_delay: (2.0) duration to delay between toggling op tracker | |
114 | enablement to all osds | |
115 | ||
116 | dump_ops_enable: (true) continuously dump ops on all live osds | |
117 | ||
118 | noscrub_toggle_delay: (2.0) duration to delay between toggling noscrub | |
119 | ||
120 | disable_objectstore_tool_tests: (false) disable ceph_objectstore_tool based | |
121 | tests | |
122 | ||
123 | chance_thrash_cluster_full: .05 | |
124 | ||
125 | chance_thrash_pg_upmap: 1.0 | |
126 | chance_thrash_pg_upmap_items: 1.0 | |
127 | ||
128 | example: | |
129 | ||
130 | tasks: | |
131 | - ceph: | |
132 | - thrashosds: | |
133 | cluster: ceph | |
134 | chance_down: 10 | |
135 | op_delay: 3 | |
136 | min_in: 1 | |
137 | timeout: 600 | |
138 | - interactive: | |
139 | """ | |
140 | if config is None: | |
141 | config = {} | |
142 | assert isinstance(config, dict), \ | |
143 | 'thrashosds task only accepts a dict for configuration' | |
144 | # add default value for sighup_delay | |
145 | config['sighup_delay'] = config.get('sighup_delay', 0.1) | |
146 | # add default value for optrack_toggle_delay | |
147 | config['optrack_toggle_delay'] = config.get('optrack_toggle_delay', 2.0) | |
148 | # add default value for dump_ops_enable | |
149 | config['dump_ops_enable'] = config.get('dump_ops_enable', "true") | |
150 | # add default value for noscrub_toggle_delay | |
151 | config['noscrub_toggle_delay'] = config.get('noscrub_toggle_delay', 2.0) | |
224ce89b WB |
152 | # add default value for random_eio |
153 | config['random_eio'] = config.get('random_eio', 0.0) | |
7c673cae FG |
154 | |
155 | log.info("config is {config}".format(config=str(config))) | |
156 | ||
157 | overrides = ctx.config.get('overrides', {}) | |
158 | log.info("overrides is {overrides}".format(overrides=str(overrides))) | |
159 | teuthology.deep_merge(config, overrides.get('thrashosds', {})) | |
160 | cluster = config.get('cluster', 'ceph') | |
161 | ||
162 | log.info("config is {config}".format(config=str(config))) | |
163 | ||
164 | if 'powercycle' in config: | |
165 | ||
166 | # sync everyone first to avoid collateral damage to / etc. | |
167 | log.info('Doing preliminary sync to avoid collateral damage...') | |
168 | ctx.cluster.run(args=['sync']) | |
169 | ||
170 | if 'ipmi_user' in ctx.teuthology_config: | |
171 | for remote in ctx.cluster.remotes.keys(): | |
172 | log.debug('checking console status of %s' % remote.shortname) | |
173 | if not remote.console.check_status(): | |
174 | log.warn('Failed to get console status for %s', | |
175 | remote.shortname) | |
176 | ||
177 | # check that all osd remotes have a valid console | |
178 | osds = ctx.cluster.only(teuthology.is_type('osd', cluster)) | |
179 | for remote in osds.remotes.keys(): | |
180 | if not remote.console.has_ipmi_credentials: | |
181 | raise Exception( | |
182 | 'IPMI console required for powercycling, ' | |
183 | 'but not available on osd role: {r}'.format( | |
184 | r=remote.name)) | |
185 | ||
186 | cluster_manager = ctx.managers[cluster] | |
187 | for f in ['powercycle', 'bdev_inject_crash']: | |
188 | if config.get(f): | |
189 | cluster_manager.config[f] = config.get(f) | |
190 | ||
191 | log.info('Beginning thrashosds...') | |
192 | thrash_proc = ceph_manager.Thrasher( | |
193 | cluster_manager, | |
194 | config, | |
195 | logger=log.getChild('thrasher') | |
196 | ) | |
197 | try: | |
198 | yield | |
199 | finally: | |
200 | log.info('joining thrashosds') | |
201 | thrash_proc.do_join() | |
31f18b77 FG |
202 | cluster_manager.wait_for_all_up() |
203 | cluster_manager.flush_all_pg_stats() | |
7c673cae | 204 | cluster_manager.wait_for_recovery(config.get('timeout', 360)) |