]> git.proxmox.com Git - ceph.git/blame - ceph/qa/tasks/cephfs/test_snap_schedules.py
import quincy beta 17.1.0
[ceph.git] / ceph / qa / tasks / cephfs / test_snap_schedules.py
CommitLineData
f67539c2
TL
1import os
2import json
3import time
4import errno
5import logging
6
7from tasks.cephfs.cephfs_test_case import CephFSTestCase
8from teuthology.exceptions import CommandFailedError
9from datetime import datetime, timedelta
10
11log = logging.getLogger(__name__)
12
13def extract_schedule_and_retention_spec(spec=[]):
14 schedule = set([s[0] for s in spec])
15 retention = set([s[1] for s in spec])
16 return (schedule, retention)
17
18def seconds_upto_next_schedule(time_from, timo):
19 ts = int(time_from)
20 return ((int(ts / 60) * 60) + timo) - ts
21
22class TestSnapSchedules(CephFSTestCase):
23 CLIENTS_REQUIRED = 1
24
25 TEST_VOLUME_NAME = 'snap_vol'
26 TEST_DIRECTORY = 'snap_test_dir1'
27
28 # this should be in sync with snap_schedule format
29 SNAPSHOT_TS_FORMAT = '%Y-%m-%d-%H_%M_%S'
30
31 def check_scheduled_snapshot(self, exec_time, timo):
32 now = time.time()
33 delta = now - exec_time
34 log.debug(f'exec={exec_time}, now = {now}, timo = {timo}')
35 # tolerate snapshot existance in the range [-5,+5]
36 self.assertTrue((delta <= timo + 5) and (delta >= timo - 5))
37
38 def _fs_cmd(self, *args):
39 return self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", *args)
40
b3b6e05e
TL
41 def fs_snap_schedule_cmd(self, *args, **kwargs):
42 fs = kwargs.pop('fs', self.volname)
43 args += ('--fs', fs)
20effc67
TL
44 if 'format' in kwargs:
45 fmt = kwargs.pop('format')
46 args += ('--format', fmt)
b3b6e05e 47 for name, val in kwargs.items():
20effc67 48 args += (str(val),)
f67539c2
TL
49 res = self._fs_cmd('snap-schedule', *args)
50 log.debug(f'res={res}')
51 return res
52
53 def _create_or_reuse_test_volume(self):
54 result = json.loads(self._fs_cmd("volume", "ls"))
55 if len(result) == 0:
56 self.vol_created = True
57 self.volname = TestSnapSchedules.TEST_VOLUME_NAME
58 self._fs_cmd("volume", "create", self.volname)
59 else:
60 self.volname = result[0]['name']
61
62 def _enable_snap_schedule(self):
63 return self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "enable", "snap_schedule")
64
65 def _disable_snap_schedule(self):
66 return self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "disable", "snap_schedule")
67
68 def _allow_minute_granularity_snapshots(self):
69 self.config_set('mgr', 'mgr/snap_schedule/allow_m_granularity', True)
70
20effc67
TL
71 def _dump_on_update(self):
72 self.config_set('mgr', 'mgr/snap_schedule/dump_on_update', True)
73
f67539c2
TL
74 def setUp(self):
75 super(TestSnapSchedules, self).setUp()
76 self.volname = None
77 self.vol_created = False
78 self._create_or_reuse_test_volume()
79 self.create_cbks = []
80 self.remove_cbks = []
81 # used to figure out which snapshots are created/deleted
82 self.snapshots = set()
83 self._enable_snap_schedule()
84 self._allow_minute_granularity_snapshots()
20effc67 85 self._dump_on_update()
f67539c2
TL
86
87 def tearDown(self):
88 if self.vol_created:
89 self._delete_test_volume()
90 self._disable_snap_schedule()
91 super(TestSnapSchedules, self).tearDown()
92
93 def _schedule_to_timeout(self, schedule):
94 mult = schedule[-1]
95 period = int(schedule[0:-1])
96 if mult == 'M':
97 return period * 60
98 elif mult == 'h':
99 return period * 60 * 60
100 elif mult == 'd':
101 return period * 60 * 60 * 24
102 elif mult == 'w':
103 return period * 60 * 60 * 24 * 7
104 else:
105 raise RuntimeError('schedule multiplier not recognized')
106
107 def add_snap_create_cbk(self, cbk):
108 self.create_cbks.append(cbk)
109 def remove_snap_create_cbk(self, cbk):
110 self.create_cbks.remove(cbk)
111
112 def add_snap_remove_cbk(self, cbk):
113 self.remove_cbks.append(cbk)
114 def remove_snap_remove_cbk(self, cbk):
115 self.remove_cbks.remove(cbk)
116
117 def assert_if_not_verified(self):
20effc67
TL
118 self.assertListEqual(self.create_cbks, [])
119 self.assertListEqual(self.remove_cbks, [])
f67539c2
TL
120
121 def verify(self, dir_path, max_trials):
122 trials = 0
123 snap_path = "{0}/.snap".format(dir_path)
124 while (len(self.create_cbks) or len(self.remove_cbks)) and trials < max_trials:
125 snapshots = set(self.mount_a.ls(path=snap_path))
20effc67 126 log.info(f"snapshots: {snapshots}")
f67539c2 127 added = snapshots - self.snapshots
20effc67 128 log.info(f"added: {added}")
f67539c2 129 removed = self.snapshots - snapshots
20effc67 130 log.info(f"removed: {removed}")
f67539c2
TL
131 if added:
132 for cbk in list(self.create_cbks):
133 res = cbk(list(added))
134 if res:
135 self.remove_snap_create_cbk(cbk)
136 break
137 if removed:
138 for cbk in list(self.remove_cbks):
139 res = cbk(list(removed))
140 if res:
141 self.remove_snap_remove_cbk(cbk)
142 break
143 self.snapshots = snapshots
144 trials += 1
145 time.sleep(1)
146
147 def calc_wait_time_and_snap_name(self, snap_sched_exec_epoch, schedule):
148 timo = self._schedule_to_timeout(schedule)
149 # calculate wait time upto the next minute
150 wait_timo = seconds_upto_next_schedule(snap_sched_exec_epoch, timo)
151
152 # expected "scheduled" snapshot name
153 ts_name = (datetime.utcfromtimestamp(snap_sched_exec_epoch)
154 + timedelta(seconds=wait_timo)).strftime(TestSnapSchedules.SNAPSHOT_TS_FORMAT)
155 return (wait_timo, ts_name)
156
157 def verify_schedule(self, dir_path, schedules, retentions=[]):
158 log.debug(f'expected_schedule: {schedules}, expected_retention: {retentions}')
159
b3b6e05e 160 result = self.fs_snap_schedule_cmd('list', path=dir_path, format='json')
f67539c2
TL
161 json_res = json.loads(result)
162 log.debug(f'json_res: {json_res}')
163
164 for schedule in schedules:
165 self.assertTrue(schedule in json_res['schedule'])
166 for retention in retentions:
167 self.assertTrue(retention in json_res['retention'])
168
169 def remove_snapshots(self, dir_path):
170 snap_path = f'{dir_path}/.snap'
171
172 snapshots = self.mount_a.ls(path=snap_path)
173 for snapshot in snapshots:
174 snapshot_path = os.path.join(snap_path, snapshot)
175 log.debug(f'removing snapshot: {snapshot_path}')
176 self.mount_a.run_shell(['rmdir', snapshot_path])
177
178 def test_non_existent_snap_schedule_list(self):
179 """Test listing snap schedules on a non-existing filesystem path failure"""
180 try:
b3b6e05e 181 self.fs_snap_schedule_cmd('list', path=TestSnapSchedules.TEST_DIRECTORY)
f67539c2
TL
182 except CommandFailedError as ce:
183 if ce.exitstatus != errno.ENOENT:
184 raise RuntimeError('incorrect errno when listing a non-existing snap schedule')
185 else:
186 raise RuntimeError('expected "fs snap-schedule list" to fail')
187
188 def test_non_existent_schedule(self):
189 """Test listing non-existing snap schedules failure"""
190 self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY])
191
192 try:
b3b6e05e 193 self.fs_snap_schedule_cmd('list', path=TestSnapSchedules.TEST_DIRECTORY)
f67539c2
TL
194 except CommandFailedError as ce:
195 if ce.exitstatus != errno.ENOENT:
196 raise RuntimeError('incorrect errno when listing a non-existing snap schedule')
197 else:
198 raise RuntimeError('expected "fs snap-schedule list" returned fail')
199
200 self.mount_a.run_shell(['rmdir', TestSnapSchedules.TEST_DIRECTORY])
201
202 def test_snap_schedule_list_post_schedule_remove(self):
203 """Test listing snap schedules post removal of a schedule"""
204 self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY])
205
b3b6e05e 206 self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='1h')
f67539c2 207
b3b6e05e 208 self.fs_snap_schedule_cmd('remove', path=TestSnapSchedules.TEST_DIRECTORY)
f67539c2
TL
209
210 try:
b3b6e05e 211 self.fs_snap_schedule_cmd('list', path=TestSnapSchedules.TEST_DIRECTORY)
f67539c2
TL
212 except CommandFailedError as ce:
213 if ce.exitstatus != errno.ENOENT:
214 raise RuntimeError('incorrect errno when listing a non-existing snap schedule')
215 else:
216 raise RuntimeError('"fs snap-schedule list" returned error')
217
218 self.mount_a.run_shell(['rmdir', TestSnapSchedules.TEST_DIRECTORY])
219
220 def test_snap_schedule(self):
221 """Test existence of a scheduled snapshot"""
222 self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY])
223
224 # set a schedule on the dir
b3b6e05e 225 self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='1M')
f67539c2
TL
226 exec_time = time.time()
227
228 timo, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1M')
229 log.debug(f'expecting snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx} in ~{timo}s...')
230 to_wait = timo + 2 # some leeway to avoid false failures...
231
232 # verify snapshot schedule
233 self.verify_schedule(TestSnapSchedules.TEST_DIRECTORY, ['1M'])
234
235 def verify_added(snaps_added):
236 log.debug(f'snapshots added={snaps_added}')
20effc67 237 self.assertEqual(len(snaps_added), 1)
f67539c2 238 snapname = snaps_added[0]
20effc67
TL
239 if snapname.startswith('scheduled-'):
240 if snapname[10:26] == snap_sfx[:16]:
241 self.check_scheduled_snapshot(exec_time, timo)
242 return True
f67539c2
TL
243 return False
244 self.add_snap_create_cbk(verify_added)
245 self.verify(TestSnapSchedules.TEST_DIRECTORY, to_wait)
246 self.assert_if_not_verified()
247
248 # remove snapshot schedule
b3b6e05e 249 self.fs_snap_schedule_cmd('remove', path=TestSnapSchedules.TEST_DIRECTORY)
f67539c2
TL
250
251 # remove all scheduled snapshots
252 self.remove_snapshots(TestSnapSchedules.TEST_DIRECTORY)
253
254 self.mount_a.run_shell(['rmdir', TestSnapSchedules.TEST_DIRECTORY])
255
256 def test_multi_snap_schedule(self):
257 """Test exisitence of multiple scheduled snapshots"""
258 self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY])
259
260 # set schedules on the dir
b3b6e05e
TL
261 self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='1M')
262 self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='2M')
f67539c2
TL
263 exec_time = time.time()
264
265 timo_1, snap_sfx_1 = self.calc_wait_time_and_snap_name(exec_time, '1M')
266 log.debug(f'expecting snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx_1} in ~{timo_1}s...')
267 timo_2, snap_sfx_2 = self.calc_wait_time_and_snap_name(exec_time, '2M')
268 log.debug(f'expecting snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx_2} in ~{timo_2}s...')
269 to_wait = timo_2 + 2 # use max timeout
270
271 # verify snapshot schedule
272 self.verify_schedule(TestSnapSchedules.TEST_DIRECTORY, ['1M', '2M'])
273
274 def verify_added_1(snaps_added):
275 log.debug(f'snapshots added={snaps_added}')
20effc67 276 self.assertEqual(len(snaps_added), 1)
f67539c2 277 snapname = snaps_added[0]
20effc67
TL
278 if snapname.startswith('scheduled-'):
279 if snapname[10:26] == snap_sfx_1[:16]:
280 self.check_scheduled_snapshot(exec_time, timo_1)
281 return True
f67539c2
TL
282 return False
283 def verify_added_2(snaps_added):
284 log.debug(f'snapshots added={snaps_added}')
20effc67 285 self.assertEqual(len(snaps_added), 1)
f67539c2 286 snapname = snaps_added[0]
20effc67
TL
287 if snapname.startswith('scheduled-'):
288 if snapname[10:26] == snap_sfx_2[:16]:
289 self.check_scheduled_snapshot(exec_time, timo_2)
290 return True
f67539c2
TL
291 return False
292 self.add_snap_create_cbk(verify_added_1)
293 self.add_snap_create_cbk(verify_added_2)
294 self.verify(TestSnapSchedules.TEST_DIRECTORY, to_wait)
295 self.assert_if_not_verified()
296
297 # remove snapshot schedule
b3b6e05e 298 self.fs_snap_schedule_cmd('remove', path=TestSnapSchedules.TEST_DIRECTORY)
f67539c2
TL
299
300 # remove all scheduled snapshots
301 self.remove_snapshots(TestSnapSchedules.TEST_DIRECTORY)
302
303 self.mount_a.run_shell(['rmdir', TestSnapSchedules.TEST_DIRECTORY])
304
305 def test_snap_schedule_with_retention(self):
306 """Test scheduled snapshots along with rentention policy"""
307 self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY])
308
309 # set a schedule on the dir
b3b6e05e
TL
310 self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='1M')
311 self.fs_snap_schedule_cmd('retention', 'add', path=TestSnapSchedules.TEST_DIRECTORY, retention_spec_or_period='1M')
f67539c2
TL
312 exec_time = time.time()
313
314 timo_1, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1M')
315 log.debug(f'expecting snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx} in ~{timo_1}s...')
316 to_wait = timo_1 + 2 # some leeway to avoid false failures...
317
318 # verify snapshot schedule
319 self.verify_schedule(TestSnapSchedules.TEST_DIRECTORY, ['1M'], retentions=[{'M':1}])
320
321 def verify_added(snaps_added):
322 log.debug(f'snapshots added={snaps_added}')
20effc67 323 self.assertEqual(len(snaps_added), 1)
f67539c2 324 snapname = snaps_added[0]
20effc67
TL
325 if snapname.startswith('scheduled-'):
326 if snapname[10:26] == snap_sfx[:16]:
327 self.check_scheduled_snapshot(exec_time, timo_1)
328 return True
f67539c2
TL
329 return False
330 self.add_snap_create_cbk(verify_added)
331 self.verify(TestSnapSchedules.TEST_DIRECTORY, to_wait)
332 self.assert_if_not_verified()
333
334 timo_2 = timo_1 + 60 # expected snapshot removal timeout
335 def verify_removed(snaps_removed):
336 log.debug(f'snapshots removed={snaps_removed}')
20effc67 337 self.assertEqual(len(snaps_removed), 1)
f67539c2 338 snapname = snaps_removed[0]
20effc67
TL
339 if snapname.startswith('scheduled-'):
340 if snapname[10:26] == snap_sfx[:16]:
341 self.check_scheduled_snapshot(exec_time, timo_2)
342 return True
f67539c2
TL
343 return False
344 log.debug(f'expecting removal of snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx} in ~{timo_2}s...')
345 to_wait = timo_2
346 self.add_snap_remove_cbk(verify_removed)
347 self.verify(TestSnapSchedules.TEST_DIRECTORY, to_wait+2)
348 self.assert_if_not_verified()
349
350 # remove snapshot schedule
b3b6e05e 351 self.fs_snap_schedule_cmd('remove', path=TestSnapSchedules.TEST_DIRECTORY)
f67539c2
TL
352
353 # remove all scheduled snapshots
354 self.remove_snapshots(TestSnapSchedules.TEST_DIRECTORY)
355
356 self.mount_a.run_shell(['rmdir', TestSnapSchedules.TEST_DIRECTORY])
20effc67
TL
357
358 def verify_snap_stats(self, dir_path):
359 snap_path = f"{dir_path}/.snap"[1:]
360 snapshots = self.mount_a.ls(path=snap_path)
361 fs_count = len(snapshots)
362 log.debug('snapshots: {snapshots}');
363
364 result = self.fs_snap_schedule_cmd('status', path=dir_path, snap_schedule='1M', format='json')
365 json_res = json.loads(result)[0]
366 db_count = int(json_res['created_count'])
367 log.debug(f'json_res: {json_res}')
368
369 self.assertTrue(fs_count == db_count)
370
371 def test_concurrent_snap_creates(self):
372 """
373 Test snap creates at same cadence on same fs to verify correct stats.
374 A single SQLite DB Connection handle cannot be used to run concurrent
375 transactions and results transaction aborts. This test makes sure that
376 proper care has been taken in the code to avoid such situation by
377 verifying number of dirs created on the file system with the
378 created_count in the schedule_meta table for the specific path.
379 """
380 self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY])
381
382 testdirs = []
383 for d in range(10):
384 testdirs.append(os.path.join("/", TestSnapSchedules.TEST_DIRECTORY, "dir" + str(d)))
385
386 for d in testdirs:
387 self.mount_a.run_shell(['mkdir', '-p', d[1:]])
388 self.fs_snap_schedule_cmd('add', path=d, snap_schedule='1M')
389
390 exec_time = time.time()
391 timo_1, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1M')
392
393 for d in testdirs:
394 self.fs_snap_schedule_cmd('activate', path=d, snap_schedule='1M')
395
396 # we wait for 10 snaps to be taken
397 wait_time = timo_1 + 10 * 60 + 15
398 time.sleep(wait_time)
399
400 for d in testdirs:
401 self.fs_snap_schedule_cmd('deactivate', path=d, snap_schedule='1M')
402
403 for d in testdirs:
404 self.verify_snap_stats(d)
405
406 for d in testdirs:
407 self.fs_snap_schedule_cmd('remove', path=d, snap_schedule='1M')
408 self.remove_snapshots(d[1:])
409 self.mount_a.run_shell(['rmdir', d[1:]])