]>
Commit | Line | Data |
---|---|---|
11fdf7f2 TL |
1 | |
2 | import json | |
3 | import logging | |
4 | import time | |
11fdf7f2 | 5 | |
e306af50 | 6 | from tasks.mgr.mgr_test_case import MgrTestCase |
11fdf7f2 TL |
7 | |
8 | ||
9 | log = logging.getLogger(__name__) | |
10 | ||
11 | ||
12 | class TestProgress(MgrTestCase): | |
13 | POOL = "progress_data" | |
14 | ||
15 | # How long we expect to wait at most between taking an OSD out | |
16 | # and seeing the progress event pop up. | |
17 | EVENT_CREATION_PERIOD = 5 | |
18 | ||
19 | WRITE_PERIOD = 30 | |
20 | ||
21 | # Generous period for OSD recovery, should be same order of magnitude | |
22 | # to how long it took to write the data to begin with | |
23 | RECOVERY_PERIOD = WRITE_PERIOD * 4 | |
24 | ||
25 | def _get_progress(self): | |
26 | out = self.mgr_cluster.mon_manager.raw_cluster_cmd("progress", "json") | |
27 | return json.loads(out) | |
28 | ||
29 | def _all_events(self): | |
30 | """ | |
31 | To avoid racing on completion, we almost always want to look | |
32 | for events in the total list of active and complete, so | |
33 | munge them into a single list. | |
34 | """ | |
35 | p = self._get_progress() | |
36 | log.info(json.dumps(p, indent=2)) | |
37 | return p['events'] + p['completed'] | |
38 | ||
9f95a23c TL |
39 | def _events_in_progress(self): |
40 | """ | |
41 | this function returns all events that are in progress | |
42 | """ | |
43 | p = self._get_progress() | |
44 | log.info(json.dumps(p, indent=2)) | |
45 | return p['events'] | |
46 | ||
11fdf7f2 TL |
47 | def _setup_pool(self, size=None): |
48 | self.mgr_cluster.mon_manager.create_pool(self.POOL) | |
49 | if size is not None: | |
50 | self.mgr_cluster.mon_manager.raw_cluster_cmd( | |
51 | 'osd', 'pool', 'set', self.POOL, 'size', str(size)) | |
52 | ||
53 | def _write_some_data(self, t): | |
54 | """ | |
55 | To adapt to test systems of varying performance, we write | |
56 | data for a defined time period, rather than to a defined | |
57 | capacity. This will hopefully result in a similar timescale | |
58 | for PG recovery after an OSD failure. | |
59 | """ | |
60 | ||
61 | args = [ | |
62 | "rados", "-p", self.POOL, "bench", str(t), "write", "-t", "16"] | |
63 | ||
64 | self.mgr_cluster.admin_remote.run(args=args, wait=True) | |
65 | ||
66 | def _osd_count(self): | |
67 | osd_map = self.mgr_cluster.mon_manager.get_osd_dump_json() | |
68 | return len(osd_map['osds']) | |
69 | ||
70 | def setUp(self): | |
9f95a23c | 71 | super(TestProgress, self).setUp() |
11fdf7f2 TL |
72 | # Ensure we have at least four OSDs |
73 | if self._osd_count() < 4: | |
9f95a23c | 74 | self.skipTest("Not enough OSDS!") |
11fdf7f2 TL |
75 | |
76 | # Remove any filesystems so that we can remove their pools | |
77 | if self.mds_cluster: | |
78 | self.mds_cluster.mds_stop() | |
79 | self.mds_cluster.mds_fail() | |
80 | self.mds_cluster.delete_all_filesystems() | |
81 | ||
82 | # Remove all other pools | |
83 | for pool in self.mgr_cluster.mon_manager.get_osd_dump_json()['pools']: | |
84 | self.mgr_cluster.mon_manager.remove_pool(pool['pool_name']) | |
85 | ||
86 | self._load_module("progress") | |
87 | self.mgr_cluster.mon_manager.raw_cluster_cmd('progress', 'clear') | |
88 | ||
89 | def _simulate_failure(self, osd_ids=None): | |
90 | """ | |
91 | Common lead-in to several tests: get some data in the cluster, | |
92 | then mark an OSD out to trigger the start of a progress event. | |
93 | ||
94 | Return the JSON representation of the failure event. | |
95 | """ | |
96 | ||
97 | if osd_ids is None: | |
98 | osd_ids = [0] | |
99 | ||
100 | self._setup_pool() | |
101 | self._write_some_data(self.WRITE_PERIOD) | |
102 | ||
103 | for osd_id in osd_ids: | |
104 | self.mgr_cluster.mon_manager.raw_cluster_cmd( | |
105 | 'osd', 'out', str(osd_id)) | |
106 | ||
107 | # Wait for a progress event to pop up | |
108 | self.wait_until_equal(lambda: len(self._all_events()), 1, | |
109 | timeout=self.EVENT_CREATION_PERIOD) | |
110 | ev = self._all_events()[0] | |
111 | log.info(json.dumps(ev, indent=1)) | |
112 | self.assertIn("Rebalancing after osd.0 marked out", ev['message']) | |
9f95a23c | 113 | |
11fdf7f2 TL |
114 | return ev |
115 | ||
9f95a23c TL |
116 | def _simulate_back_in(self, osd_ids, initial_event): |
117 | ||
118 | for osd_id in osd_ids: | |
119 | self.mgr_cluster.mon_manager.raw_cluster_cmd( | |
120 | 'osd', 'in', str(osd_id)) | |
121 | ||
122 | # First Event should complete promptly | |
123 | self.wait_until_true(lambda: self._is_complete(initial_event['id']), | |
124 | timeout=self.EVENT_CREATION_PERIOD) | |
125 | ||
126 | try: | |
127 | # Wait for progress event marked in to pop up | |
128 | self.wait_until_equal(lambda: len(self._events_in_progress()), 1, | |
129 | timeout=self.EVENT_CREATION_PERIOD) | |
130 | except RuntimeError as ex: | |
131 | if not "Timed out after" in str(ex): | |
132 | raise ex | |
133 | ||
134 | log.info("There was no PGs affected by osd being marked in") | |
135 | return None | |
136 | ||
137 | new_event = self._events_in_progress()[0] | |
138 | log.info(json.dumps(new_event, indent=1)) | |
139 | self.assertIn("Rebalancing after osd.0 marked in", new_event['message']) | |
140 | ||
141 | return new_event | |
142 | ||
11fdf7f2 TL |
143 | def _is_quiet(self): |
144 | """ | |
145 | Whether any progress events are live. | |
146 | """ | |
147 | return len(self._get_progress()['events']) == 0 | |
148 | ||
149 | def _is_complete(self, ev_id): | |
150 | progress = self._get_progress() | |
151 | live_ids = [ev['id'] for ev in progress['events']] | |
152 | complete_ids = [ev['id'] for ev in progress['completed']] | |
153 | if ev_id in complete_ids: | |
154 | assert ev_id not in live_ids | |
155 | return True | |
156 | else: | |
157 | assert ev_id in live_ids | |
158 | return False | |
159 | ||
160 | def tearDown(self): | |
161 | if self.POOL in self.mgr_cluster.mon_manager.pools: | |
162 | self.mgr_cluster.mon_manager.remove_pool(self.POOL) | |
163 | ||
164 | osd_map = self.mgr_cluster.mon_manager.get_osd_dump_json() | |
165 | for osd in osd_map['osds']: | |
166 | if osd['weight'] == 0.0: | |
167 | self.mgr_cluster.mon_manager.raw_cluster_cmd( | |
168 | 'osd', 'in', str(osd['osd'])) | |
169 | ||
170 | super(TestProgress, self).tearDown() | |
171 | ||
172 | def test_osd_healthy_recovery(self): | |
173 | """ | |
174 | The simple recovery case: an OSD goes down, its PGs get a new | |
175 | placement, and we wait for the PG to get healthy in its new | |
176 | locations. | |
177 | """ | |
178 | ev = self._simulate_failure() | |
179 | ||
180 | # Wait for progress event to ultimately reach completion | |
181 | self.wait_until_true(lambda: self._is_complete(ev['id']), | |
182 | timeout=self.RECOVERY_PERIOD) | |
183 | self.assertTrue(self._is_quiet()) | |
184 | ||
185 | def test_pool_removal(self): | |
186 | """ | |
187 | That a pool removed during OSD recovery causes the | |
188 | progress event to be correctly marked complete once there | |
189 | is no more data to move. | |
190 | """ | |
191 | ev = self._simulate_failure() | |
192 | ||
193 | self.mgr_cluster.mon_manager.remove_pool(self.POOL) | |
194 | ||
195 | # Event should complete promptly | |
196 | self.wait_until_true(lambda: self._is_complete(ev['id']), | |
197 | timeout=self.EVENT_CREATION_PERIOD) | |
198 | self.assertTrue(self._is_quiet()) | |
199 | ||
200 | def test_osd_came_back(self): | |
201 | """ | |
202 | When a recovery is underway, but then the out OSD | |
203 | comes back in, such that recovery is no longer necessary. | |
9f95a23c TL |
204 | It should create another event for when osd is marked in |
205 | and cancel the one that is still ongoing. | |
11fdf7f2 | 206 | """ |
9f95a23c | 207 | ev1 = self._simulate_failure() |
11fdf7f2 | 208 | |
9f95a23c TL |
209 | ev2 = self._simulate_back_in([0], ev1) |
210 | ||
211 | if ev2 is not None: | |
212 | # Wait for progress event to ultimately complete | |
213 | self.wait_until_true(lambda: self._is_complete(ev2['id']), | |
214 | timeout=self.RECOVERY_PERIOD) | |
11fdf7f2 | 215 | |
11fdf7f2 TL |
216 | self.assertTrue(self._is_quiet()) |
217 | ||
218 | def test_osd_cannot_recover(self): | |
219 | """ | |
220 | When the cluster cannot recover from a lost OSD, e.g. | |
221 | because there is no suitable new placement for it. | |
222 | (a size=3 pool when there are only 2 OSDs left) | |
223 | (a size=3 pool when the remaining osds are only on 2 hosts) | |
224 | ||
225 | Progress event should not be created. | |
226 | """ | |
227 | ||
228 | pool_size = 3 | |
229 | ||
230 | self._setup_pool(size=pool_size) | |
231 | self._write_some_data(self.WRITE_PERIOD) | |
232 | ||
233 | # Fail enough OSDs so there are less than N_replicas OSDs | |
234 | # available. | |
235 | osd_count = self._osd_count() | |
236 | ||
237 | # First do some failures that will result in a normal rebalance | |
238 | # (Assumption: we're in a test environment that is configured | |
239 | # not to require replicas be on different hosts, like teuthology) | |
240 | for osd_id in range(0, osd_count - pool_size): | |
241 | self.mgr_cluster.mon_manager.raw_cluster_cmd( | |
242 | 'osd', 'out', str(osd_id)) | |
243 | ||
244 | # We should see an event for each of the OSDs we took out | |
245 | self.wait_until_equal( | |
246 | lambda: len(self._all_events()), | |
247 | osd_count - pool_size, | |
248 | timeout=self.EVENT_CREATION_PERIOD) | |
249 | ||
250 | # Those should complete cleanly | |
251 | self.wait_until_true( | |
252 | lambda: self._is_quiet(), | |
253 | timeout=self.RECOVERY_PERIOD | |
254 | ) | |
255 | ||
256 | # Fail one last OSD, at the point the PGs have nowhere to go | |
257 | victim_osd = osd_count - pool_size | |
258 | self.mgr_cluster.mon_manager.raw_cluster_cmd( | |
259 | 'osd', 'out', str(victim_osd)) | |
260 | ||
261 | # Check that no event is created | |
262 | time.sleep(self.EVENT_CREATION_PERIOD) | |
263 | ||
264 | self.assertEqual(len(self._all_events()), osd_count - pool_size) |