]>
Commit | Line | Data |
---|---|---|
f67539c2 TL |
1 | import os |
2 | import json | |
3 | import time | |
4 | import errno | |
5 | import logging | |
6 | ||
7 | from tasks.cephfs.cephfs_test_case import CephFSTestCase | |
8 | from teuthology.exceptions import CommandFailedError | |
9 | from datetime import datetime, timedelta | |
10 | ||
11 | log = logging.getLogger(__name__) | |
12 | ||
13 | def extract_schedule_and_retention_spec(spec=[]): | |
14 | schedule = set([s[0] for s in spec]) | |
15 | retention = set([s[1] for s in spec]) | |
16 | return (schedule, retention) | |
17 | ||
18 | def seconds_upto_next_schedule(time_from, timo): | |
19 | ts = int(time_from) | |
20 | return ((int(ts / 60) * 60) + timo) - ts | |
21 | ||
22 | class TestSnapSchedules(CephFSTestCase): | |
23 | CLIENTS_REQUIRED = 1 | |
24 | ||
25 | TEST_VOLUME_NAME = 'snap_vol' | |
26 | TEST_DIRECTORY = 'snap_test_dir1' | |
27 | ||
28 | # this should be in sync with snap_schedule format | |
29 | SNAPSHOT_TS_FORMAT = '%Y-%m-%d-%H_%M_%S' | |
30 | ||
31 | def check_scheduled_snapshot(self, exec_time, timo): | |
32 | now = time.time() | |
33 | delta = now - exec_time | |
34 | log.debug(f'exec={exec_time}, now = {now}, timo = {timo}') | |
35 | # tolerate snapshot existance in the range [-5,+5] | |
36 | self.assertTrue((delta <= timo + 5) and (delta >= timo - 5)) | |
37 | ||
38 | def _fs_cmd(self, *args): | |
39 | return self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", *args) | |
40 | ||
b3b6e05e TL |
41 | def fs_snap_schedule_cmd(self, *args, **kwargs): |
42 | fs = kwargs.pop('fs', self.volname) | |
43 | args += ('--fs', fs) | |
20effc67 TL |
44 | if 'format' in kwargs: |
45 | fmt = kwargs.pop('format') | |
46 | args += ('--format', fmt) | |
b3b6e05e | 47 | for name, val in kwargs.items(): |
20effc67 | 48 | args += (str(val),) |
f67539c2 TL |
49 | res = self._fs_cmd('snap-schedule', *args) |
50 | log.debug(f'res={res}') | |
51 | return res | |
52 | ||
53 | def _create_or_reuse_test_volume(self): | |
54 | result = json.loads(self._fs_cmd("volume", "ls")) | |
55 | if len(result) == 0: | |
56 | self.vol_created = True | |
57 | self.volname = TestSnapSchedules.TEST_VOLUME_NAME | |
58 | self._fs_cmd("volume", "create", self.volname) | |
59 | else: | |
60 | self.volname = result[0]['name'] | |
61 | ||
62 | def _enable_snap_schedule(self): | |
63 | return self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "enable", "snap_schedule") | |
64 | ||
65 | def _disable_snap_schedule(self): | |
66 | return self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "disable", "snap_schedule") | |
67 | ||
68 | def _allow_minute_granularity_snapshots(self): | |
69 | self.config_set('mgr', 'mgr/snap_schedule/allow_m_granularity', True) | |
70 | ||
20effc67 TL |
71 | def _dump_on_update(self): |
72 | self.config_set('mgr', 'mgr/snap_schedule/dump_on_update', True) | |
73 | ||
f67539c2 TL |
74 | def setUp(self): |
75 | super(TestSnapSchedules, self).setUp() | |
76 | self.volname = None | |
77 | self.vol_created = False | |
78 | self._create_or_reuse_test_volume() | |
79 | self.create_cbks = [] | |
80 | self.remove_cbks = [] | |
81 | # used to figure out which snapshots are created/deleted | |
82 | self.snapshots = set() | |
83 | self._enable_snap_schedule() | |
84 | self._allow_minute_granularity_snapshots() | |
20effc67 | 85 | self._dump_on_update() |
f67539c2 TL |
86 | |
87 | def tearDown(self): | |
88 | if self.vol_created: | |
89 | self._delete_test_volume() | |
90 | self._disable_snap_schedule() | |
91 | super(TestSnapSchedules, self).tearDown() | |
92 | ||
93 | def _schedule_to_timeout(self, schedule): | |
94 | mult = schedule[-1] | |
95 | period = int(schedule[0:-1]) | |
96 | if mult == 'M': | |
97 | return period * 60 | |
98 | elif mult == 'h': | |
99 | return period * 60 * 60 | |
100 | elif mult == 'd': | |
101 | return period * 60 * 60 * 24 | |
102 | elif mult == 'w': | |
103 | return period * 60 * 60 * 24 * 7 | |
104 | else: | |
105 | raise RuntimeError('schedule multiplier not recognized') | |
106 | ||
107 | def add_snap_create_cbk(self, cbk): | |
108 | self.create_cbks.append(cbk) | |
109 | def remove_snap_create_cbk(self, cbk): | |
110 | self.create_cbks.remove(cbk) | |
111 | ||
112 | def add_snap_remove_cbk(self, cbk): | |
113 | self.remove_cbks.append(cbk) | |
114 | def remove_snap_remove_cbk(self, cbk): | |
115 | self.remove_cbks.remove(cbk) | |
116 | ||
117 | def assert_if_not_verified(self): | |
20effc67 TL |
118 | self.assertListEqual(self.create_cbks, []) |
119 | self.assertListEqual(self.remove_cbks, []) | |
f67539c2 TL |
120 | |
121 | def verify(self, dir_path, max_trials): | |
122 | trials = 0 | |
123 | snap_path = "{0}/.snap".format(dir_path) | |
124 | while (len(self.create_cbks) or len(self.remove_cbks)) and trials < max_trials: | |
125 | snapshots = set(self.mount_a.ls(path=snap_path)) | |
20effc67 | 126 | log.info(f"snapshots: {snapshots}") |
f67539c2 | 127 | added = snapshots - self.snapshots |
20effc67 | 128 | log.info(f"added: {added}") |
f67539c2 | 129 | removed = self.snapshots - snapshots |
20effc67 | 130 | log.info(f"removed: {removed}") |
f67539c2 TL |
131 | if added: |
132 | for cbk in list(self.create_cbks): | |
133 | res = cbk(list(added)) | |
134 | if res: | |
135 | self.remove_snap_create_cbk(cbk) | |
136 | break | |
137 | if removed: | |
138 | for cbk in list(self.remove_cbks): | |
139 | res = cbk(list(removed)) | |
140 | if res: | |
141 | self.remove_snap_remove_cbk(cbk) | |
142 | break | |
143 | self.snapshots = snapshots | |
144 | trials += 1 | |
145 | time.sleep(1) | |
146 | ||
147 | def calc_wait_time_and_snap_name(self, snap_sched_exec_epoch, schedule): | |
148 | timo = self._schedule_to_timeout(schedule) | |
149 | # calculate wait time upto the next minute | |
150 | wait_timo = seconds_upto_next_schedule(snap_sched_exec_epoch, timo) | |
151 | ||
152 | # expected "scheduled" snapshot name | |
153 | ts_name = (datetime.utcfromtimestamp(snap_sched_exec_epoch) | |
154 | + timedelta(seconds=wait_timo)).strftime(TestSnapSchedules.SNAPSHOT_TS_FORMAT) | |
155 | return (wait_timo, ts_name) | |
156 | ||
157 | def verify_schedule(self, dir_path, schedules, retentions=[]): | |
158 | log.debug(f'expected_schedule: {schedules}, expected_retention: {retentions}') | |
159 | ||
b3b6e05e | 160 | result = self.fs_snap_schedule_cmd('list', path=dir_path, format='json') |
f67539c2 TL |
161 | json_res = json.loads(result) |
162 | log.debug(f'json_res: {json_res}') | |
163 | ||
164 | for schedule in schedules: | |
165 | self.assertTrue(schedule in json_res['schedule']) | |
166 | for retention in retentions: | |
167 | self.assertTrue(retention in json_res['retention']) | |
168 | ||
169 | def remove_snapshots(self, dir_path): | |
170 | snap_path = f'{dir_path}/.snap' | |
171 | ||
172 | snapshots = self.mount_a.ls(path=snap_path) | |
173 | for snapshot in snapshots: | |
174 | snapshot_path = os.path.join(snap_path, snapshot) | |
175 | log.debug(f'removing snapshot: {snapshot_path}') | |
176 | self.mount_a.run_shell(['rmdir', snapshot_path]) | |
177 | ||
178 | def test_non_existent_snap_schedule_list(self): | |
179 | """Test listing snap schedules on a non-existing filesystem path failure""" | |
180 | try: | |
b3b6e05e | 181 | self.fs_snap_schedule_cmd('list', path=TestSnapSchedules.TEST_DIRECTORY) |
f67539c2 TL |
182 | except CommandFailedError as ce: |
183 | if ce.exitstatus != errno.ENOENT: | |
184 | raise RuntimeError('incorrect errno when listing a non-existing snap schedule') | |
185 | else: | |
186 | raise RuntimeError('expected "fs snap-schedule list" to fail') | |
187 | ||
188 | def test_non_existent_schedule(self): | |
189 | """Test listing non-existing snap schedules failure""" | |
190 | self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY]) | |
191 | ||
192 | try: | |
b3b6e05e | 193 | self.fs_snap_schedule_cmd('list', path=TestSnapSchedules.TEST_DIRECTORY) |
f67539c2 TL |
194 | except CommandFailedError as ce: |
195 | if ce.exitstatus != errno.ENOENT: | |
196 | raise RuntimeError('incorrect errno when listing a non-existing snap schedule') | |
197 | else: | |
198 | raise RuntimeError('expected "fs snap-schedule list" returned fail') | |
199 | ||
200 | self.mount_a.run_shell(['rmdir', TestSnapSchedules.TEST_DIRECTORY]) | |
201 | ||
202 | def test_snap_schedule_list_post_schedule_remove(self): | |
203 | """Test listing snap schedules post removal of a schedule""" | |
204 | self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY]) | |
205 | ||
b3b6e05e | 206 | self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='1h') |
f67539c2 | 207 | |
b3b6e05e | 208 | self.fs_snap_schedule_cmd('remove', path=TestSnapSchedules.TEST_DIRECTORY) |
f67539c2 TL |
209 | |
210 | try: | |
b3b6e05e | 211 | self.fs_snap_schedule_cmd('list', path=TestSnapSchedules.TEST_DIRECTORY) |
f67539c2 TL |
212 | except CommandFailedError as ce: |
213 | if ce.exitstatus != errno.ENOENT: | |
214 | raise RuntimeError('incorrect errno when listing a non-existing snap schedule') | |
215 | else: | |
216 | raise RuntimeError('"fs snap-schedule list" returned error') | |
217 | ||
218 | self.mount_a.run_shell(['rmdir', TestSnapSchedules.TEST_DIRECTORY]) | |
219 | ||
220 | def test_snap_schedule(self): | |
221 | """Test existence of a scheduled snapshot""" | |
222 | self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY]) | |
223 | ||
224 | # set a schedule on the dir | |
b3b6e05e | 225 | self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='1M') |
f67539c2 TL |
226 | exec_time = time.time() |
227 | ||
228 | timo, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1M') | |
229 | log.debug(f'expecting snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx} in ~{timo}s...') | |
230 | to_wait = timo + 2 # some leeway to avoid false failures... | |
231 | ||
232 | # verify snapshot schedule | |
233 | self.verify_schedule(TestSnapSchedules.TEST_DIRECTORY, ['1M']) | |
234 | ||
235 | def verify_added(snaps_added): | |
236 | log.debug(f'snapshots added={snaps_added}') | |
20effc67 | 237 | self.assertEqual(len(snaps_added), 1) |
f67539c2 | 238 | snapname = snaps_added[0] |
20effc67 TL |
239 | if snapname.startswith('scheduled-'): |
240 | if snapname[10:26] == snap_sfx[:16]: | |
241 | self.check_scheduled_snapshot(exec_time, timo) | |
242 | return True | |
f67539c2 TL |
243 | return False |
244 | self.add_snap_create_cbk(verify_added) | |
245 | self.verify(TestSnapSchedules.TEST_DIRECTORY, to_wait) | |
246 | self.assert_if_not_verified() | |
247 | ||
248 | # remove snapshot schedule | |
b3b6e05e | 249 | self.fs_snap_schedule_cmd('remove', path=TestSnapSchedules.TEST_DIRECTORY) |
f67539c2 TL |
250 | |
251 | # remove all scheduled snapshots | |
252 | self.remove_snapshots(TestSnapSchedules.TEST_DIRECTORY) | |
253 | ||
254 | self.mount_a.run_shell(['rmdir', TestSnapSchedules.TEST_DIRECTORY]) | |
255 | ||
256 | def test_multi_snap_schedule(self): | |
257 | """Test exisitence of multiple scheduled snapshots""" | |
258 | self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY]) | |
259 | ||
260 | # set schedules on the dir | |
b3b6e05e TL |
261 | self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='1M') |
262 | self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='2M') | |
f67539c2 TL |
263 | exec_time = time.time() |
264 | ||
265 | timo_1, snap_sfx_1 = self.calc_wait_time_and_snap_name(exec_time, '1M') | |
266 | log.debug(f'expecting snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx_1} in ~{timo_1}s...') | |
267 | timo_2, snap_sfx_2 = self.calc_wait_time_and_snap_name(exec_time, '2M') | |
268 | log.debug(f'expecting snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx_2} in ~{timo_2}s...') | |
269 | to_wait = timo_2 + 2 # use max timeout | |
270 | ||
271 | # verify snapshot schedule | |
272 | self.verify_schedule(TestSnapSchedules.TEST_DIRECTORY, ['1M', '2M']) | |
273 | ||
274 | def verify_added_1(snaps_added): | |
275 | log.debug(f'snapshots added={snaps_added}') | |
20effc67 | 276 | self.assertEqual(len(snaps_added), 1) |
f67539c2 | 277 | snapname = snaps_added[0] |
20effc67 TL |
278 | if snapname.startswith('scheduled-'): |
279 | if snapname[10:26] == snap_sfx_1[:16]: | |
280 | self.check_scheduled_snapshot(exec_time, timo_1) | |
281 | return True | |
f67539c2 TL |
282 | return False |
283 | def verify_added_2(snaps_added): | |
284 | log.debug(f'snapshots added={snaps_added}') | |
20effc67 | 285 | self.assertEqual(len(snaps_added), 1) |
f67539c2 | 286 | snapname = snaps_added[0] |
20effc67 TL |
287 | if snapname.startswith('scheduled-'): |
288 | if snapname[10:26] == snap_sfx_2[:16]: | |
289 | self.check_scheduled_snapshot(exec_time, timo_2) | |
290 | return True | |
f67539c2 TL |
291 | return False |
292 | self.add_snap_create_cbk(verify_added_1) | |
293 | self.add_snap_create_cbk(verify_added_2) | |
294 | self.verify(TestSnapSchedules.TEST_DIRECTORY, to_wait) | |
295 | self.assert_if_not_verified() | |
296 | ||
297 | # remove snapshot schedule | |
b3b6e05e | 298 | self.fs_snap_schedule_cmd('remove', path=TestSnapSchedules.TEST_DIRECTORY) |
f67539c2 TL |
299 | |
300 | # remove all scheduled snapshots | |
301 | self.remove_snapshots(TestSnapSchedules.TEST_DIRECTORY) | |
302 | ||
303 | self.mount_a.run_shell(['rmdir', TestSnapSchedules.TEST_DIRECTORY]) | |
304 | ||
305 | def test_snap_schedule_with_retention(self): | |
306 | """Test scheduled snapshots along with rentention policy""" | |
307 | self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY]) | |
308 | ||
309 | # set a schedule on the dir | |
b3b6e05e TL |
310 | self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='1M') |
311 | self.fs_snap_schedule_cmd('retention', 'add', path=TestSnapSchedules.TEST_DIRECTORY, retention_spec_or_period='1M') | |
f67539c2 TL |
312 | exec_time = time.time() |
313 | ||
314 | timo_1, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1M') | |
315 | log.debug(f'expecting snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx} in ~{timo_1}s...') | |
316 | to_wait = timo_1 + 2 # some leeway to avoid false failures... | |
317 | ||
318 | # verify snapshot schedule | |
319 | self.verify_schedule(TestSnapSchedules.TEST_DIRECTORY, ['1M'], retentions=[{'M':1}]) | |
320 | ||
321 | def verify_added(snaps_added): | |
322 | log.debug(f'snapshots added={snaps_added}') | |
20effc67 | 323 | self.assertEqual(len(snaps_added), 1) |
f67539c2 | 324 | snapname = snaps_added[0] |
20effc67 TL |
325 | if snapname.startswith('scheduled-'): |
326 | if snapname[10:26] == snap_sfx[:16]: | |
327 | self.check_scheduled_snapshot(exec_time, timo_1) | |
328 | return True | |
f67539c2 TL |
329 | return False |
330 | self.add_snap_create_cbk(verify_added) | |
331 | self.verify(TestSnapSchedules.TEST_DIRECTORY, to_wait) | |
332 | self.assert_if_not_verified() | |
333 | ||
334 | timo_2 = timo_1 + 60 # expected snapshot removal timeout | |
335 | def verify_removed(snaps_removed): | |
336 | log.debug(f'snapshots removed={snaps_removed}') | |
20effc67 | 337 | self.assertEqual(len(snaps_removed), 1) |
f67539c2 | 338 | snapname = snaps_removed[0] |
20effc67 TL |
339 | if snapname.startswith('scheduled-'): |
340 | if snapname[10:26] == snap_sfx[:16]: | |
341 | self.check_scheduled_snapshot(exec_time, timo_2) | |
342 | return True | |
f67539c2 TL |
343 | return False |
344 | log.debug(f'expecting removal of snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx} in ~{timo_2}s...') | |
345 | to_wait = timo_2 | |
346 | self.add_snap_remove_cbk(verify_removed) | |
347 | self.verify(TestSnapSchedules.TEST_DIRECTORY, to_wait+2) | |
348 | self.assert_if_not_verified() | |
349 | ||
350 | # remove snapshot schedule | |
b3b6e05e | 351 | self.fs_snap_schedule_cmd('remove', path=TestSnapSchedules.TEST_DIRECTORY) |
f67539c2 TL |
352 | |
353 | # remove all scheduled snapshots | |
354 | self.remove_snapshots(TestSnapSchedules.TEST_DIRECTORY) | |
355 | ||
356 | self.mount_a.run_shell(['rmdir', TestSnapSchedules.TEST_DIRECTORY]) | |
20effc67 TL |
357 | |
358 | def verify_snap_stats(self, dir_path): | |
359 | snap_path = f"{dir_path}/.snap"[1:] | |
360 | snapshots = self.mount_a.ls(path=snap_path) | |
361 | fs_count = len(snapshots) | |
362 | log.debug('snapshots: {snapshots}'); | |
363 | ||
364 | result = self.fs_snap_schedule_cmd('status', path=dir_path, snap_schedule='1M', format='json') | |
365 | json_res = json.loads(result)[0] | |
366 | db_count = int(json_res['created_count']) | |
367 | log.debug(f'json_res: {json_res}') | |
368 | ||
369 | self.assertTrue(fs_count == db_count) | |
370 | ||
371 | def test_concurrent_snap_creates(self): | |
372 | """ | |
373 | Test snap creates at same cadence on same fs to verify correct stats. | |
374 | A single SQLite DB Connection handle cannot be used to run concurrent | |
375 | transactions and results transaction aborts. This test makes sure that | |
376 | proper care has been taken in the code to avoid such situation by | |
377 | verifying number of dirs created on the file system with the | |
378 | created_count in the schedule_meta table for the specific path. | |
379 | """ | |
380 | self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY]) | |
381 | ||
382 | testdirs = [] | |
383 | for d in range(10): | |
384 | testdirs.append(os.path.join("/", TestSnapSchedules.TEST_DIRECTORY, "dir" + str(d))) | |
385 | ||
386 | for d in testdirs: | |
387 | self.mount_a.run_shell(['mkdir', '-p', d[1:]]) | |
388 | self.fs_snap_schedule_cmd('add', path=d, snap_schedule='1M') | |
389 | ||
390 | exec_time = time.time() | |
391 | timo_1, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1M') | |
392 | ||
393 | for d in testdirs: | |
394 | self.fs_snap_schedule_cmd('activate', path=d, snap_schedule='1M') | |
395 | ||
396 | # we wait for 10 snaps to be taken | |
397 | wait_time = timo_1 + 10 * 60 + 15 | |
398 | time.sleep(wait_time) | |
399 | ||
400 | for d in testdirs: | |
401 | self.fs_snap_schedule_cmd('deactivate', path=d, snap_schedule='1M') | |
402 | ||
403 | for d in testdirs: | |
404 | self.verify_snap_stats(d) | |
405 | ||
406 | for d in testdirs: | |
407 | self.fs_snap_schedule_cmd('remove', path=d, snap_schedule='1M') | |
408 | self.remove_snapshots(d[1:]) | |
409 | self.mount_a.run_shell(['rmdir', d[1:]]) |