]>
git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/cephfs/test_snap_schedules.py
7 from tasks
.cephfs
.cephfs_test_case
import CephFSTestCase
8 from teuthology
.exceptions
import CommandFailedError
9 from datetime
import datetime
, timedelta
11 log
= logging
.getLogger(__name__
)
13 def extract_schedule_and_retention_spec(spec
=[]):
14 schedule
= set([s
[0] for s
in spec
])
15 retention
= set([s
[1] for s
in spec
])
16 return (schedule
, retention
)
18 def seconds_upto_next_schedule(time_from
, timo
):
20 return ((int(ts
/ 60) * 60) + timo
) - ts
22 class TestSnapSchedules(CephFSTestCase
):
25 TEST_VOLUME_NAME
= 'snap_vol'
26 TEST_DIRECTORY
= 'snap_test_dir1'
28 # this should be in sync with snap_schedule format
29 SNAPSHOT_TS_FORMAT
= '%Y-%m-%d-%H_%M_%S'
31 def check_scheduled_snapshot(self
, exec_time
, timo
):
33 delta
= now
- exec_time
34 log
.debug(f
'exec={exec_time}, now = {now}, timo = {timo}')
35 # tolerate snapshot existance in the range [-5,+5]
36 self
.assertTrue((delta
<= timo
+ 5) and (delta
>= timo
- 5))
38 def _fs_cmd(self
, *args
):
39 return self
.mgr_cluster
.mon_manager
.raw_cluster_cmd("fs", *args
)
41 def fs_snap_schedule_cmd(self
, *args
, **kwargs
):
42 fs
= kwargs
.pop('fs', self
.volname
)
44 if 'format' in kwargs
:
45 fmt
= kwargs
.pop('format')
46 args
+= ('--format', fmt
)
47 for name
, val
in kwargs
.items():
49 res
= self
._fs
_cmd
('snap-schedule', *args
)
50 log
.debug(f
'res={res}')
53 def _create_or_reuse_test_volume(self
):
54 result
= json
.loads(self
._fs
_cmd
("volume", "ls"))
56 self
.vol_created
= True
57 self
.volname
= TestSnapSchedules
.TEST_VOLUME_NAME
58 self
._fs
_cmd
("volume", "create", self
.volname
)
60 self
.volname
= result
[0]['name']
62 def _enable_snap_schedule(self
):
63 return self
.mgr_cluster
.mon_manager
.raw_cluster_cmd("mgr", "module", "enable", "snap_schedule")
65 def _disable_snap_schedule(self
):
66 return self
.mgr_cluster
.mon_manager
.raw_cluster_cmd("mgr", "module", "disable", "snap_schedule")
68 def _allow_minute_granularity_snapshots(self
):
69 self
.config_set('mgr', 'mgr/snap_schedule/allow_m_granularity', True)
71 def _dump_on_update(self
):
72 self
.config_set('mgr', 'mgr/snap_schedule/dump_on_update', True)
75 super(TestSnapSchedules
, self
).setUp()
77 self
.vol_created
= False
78 self
._create
_or
_reuse
_test
_volume
()
81 # used to figure out which snapshots are created/deleted
82 self
.snapshots
= set()
83 self
._enable
_snap
_schedule
()
84 self
._allow
_minute
_granularity
_snapshots
()
85 self
._dump
_on
_update
()
89 self
._delete
_test
_volume
()
90 self
._disable
_snap
_schedule
()
91 super(TestSnapSchedules
, self
).tearDown()
93 def _schedule_to_timeout(self
, schedule
):
95 period
= int(schedule
[0:-1])
99 return period
* 60 * 60
101 return period
* 60 * 60 * 24
103 return period
* 60 * 60 * 24 * 7
105 raise RuntimeError('schedule multiplier not recognized')
107 def add_snap_create_cbk(self
, cbk
):
108 self
.create_cbks
.append(cbk
)
109 def remove_snap_create_cbk(self
, cbk
):
110 self
.create_cbks
.remove(cbk
)
112 def add_snap_remove_cbk(self
, cbk
):
113 self
.remove_cbks
.append(cbk
)
114 def remove_snap_remove_cbk(self
, cbk
):
115 self
.remove_cbks
.remove(cbk
)
117 def assert_if_not_verified(self
):
118 self
.assertListEqual(self
.create_cbks
, [])
119 self
.assertListEqual(self
.remove_cbks
, [])
121 def verify(self
, dir_path
, max_trials
):
123 snap_path
= "{0}/.snap".format(dir_path
)
124 while (len(self
.create_cbks
) or len(self
.remove_cbks
)) and trials
< max_trials
:
125 snapshots
= set(self
.mount_a
.ls(path
=snap_path
))
126 log
.info(f
"snapshots: {snapshots}")
127 added
= snapshots
- self
.snapshots
128 log
.info(f
"added: {added}")
129 removed
= self
.snapshots
- snapshots
130 log
.info(f
"removed: {removed}")
132 for cbk
in list(self
.create_cbks
):
133 res
= cbk(list(added
))
135 self
.remove_snap_create_cbk(cbk
)
138 for cbk
in list(self
.remove_cbks
):
139 res
= cbk(list(removed
))
141 self
.remove_snap_remove_cbk(cbk
)
143 self
.snapshots
= snapshots
147 def calc_wait_time_and_snap_name(self
, snap_sched_exec_epoch
, schedule
):
148 timo
= self
._schedule
_to
_timeout
(schedule
)
149 # calculate wait time upto the next minute
150 wait_timo
= seconds_upto_next_schedule(snap_sched_exec_epoch
, timo
)
152 # expected "scheduled" snapshot name
153 ts_name
= (datetime
.utcfromtimestamp(snap_sched_exec_epoch
)
154 + timedelta(seconds
=wait_timo
)).strftime(TestSnapSchedules
.SNAPSHOT_TS_FORMAT
)
155 return (wait_timo
, ts_name
)
157 def verify_schedule(self
, dir_path
, schedules
, retentions
=[]):
158 log
.debug(f
'expected_schedule: {schedules}, expected_retention: {retentions}')
160 result
= self
.fs_snap_schedule_cmd('list', path
=dir_path
, format
='json')
161 json_res
= json
.loads(result
)
162 log
.debug(f
'json_res: {json_res}')
164 for schedule
in schedules
:
165 self
.assertTrue(schedule
in json_res
['schedule'])
166 for retention
in retentions
:
167 self
.assertTrue(retention
in json_res
['retention'])
169 def remove_snapshots(self
, dir_path
):
170 snap_path
= f
'{dir_path}/.snap'
172 snapshots
= self
.mount_a
.ls(path
=snap_path
)
173 for snapshot
in snapshots
:
174 snapshot_path
= os
.path
.join(snap_path
, snapshot
)
175 log
.debug(f
'removing snapshot: {snapshot_path}')
176 self
.mount_a
.run_shell(['rmdir', snapshot_path
])
178 def test_non_existent_snap_schedule_list(self
):
179 """Test listing snap schedules on a non-existing filesystem path failure"""
181 self
.fs_snap_schedule_cmd('list', path
=TestSnapSchedules
.TEST_DIRECTORY
)
182 except CommandFailedError
as ce
:
183 if ce
.exitstatus
!= errno
.ENOENT
:
184 raise RuntimeError('incorrect errno when listing a non-existing snap schedule')
186 raise RuntimeError('expected "fs snap-schedule list" to fail')
188 def test_non_existent_schedule(self
):
189 """Test listing non-existing snap schedules failure"""
190 self
.mount_a
.run_shell(['mkdir', '-p', TestSnapSchedules
.TEST_DIRECTORY
])
193 self
.fs_snap_schedule_cmd('list', path
=TestSnapSchedules
.TEST_DIRECTORY
)
194 except CommandFailedError
as ce
:
195 if ce
.exitstatus
!= errno
.ENOENT
:
196 raise RuntimeError('incorrect errno when listing a non-existing snap schedule')
198 raise RuntimeError('expected "fs snap-schedule list" returned fail')
200 self
.mount_a
.run_shell(['rmdir', TestSnapSchedules
.TEST_DIRECTORY
])
202 def test_snap_schedule_list_post_schedule_remove(self
):
203 """Test listing snap schedules post removal of a schedule"""
204 self
.mount_a
.run_shell(['mkdir', '-p', TestSnapSchedules
.TEST_DIRECTORY
])
206 self
.fs_snap_schedule_cmd('add', path
=TestSnapSchedules
.TEST_DIRECTORY
, snap_schedule
='1h')
208 self
.fs_snap_schedule_cmd('remove', path
=TestSnapSchedules
.TEST_DIRECTORY
)
211 self
.fs_snap_schedule_cmd('list', path
=TestSnapSchedules
.TEST_DIRECTORY
)
212 except CommandFailedError
as ce
:
213 if ce
.exitstatus
!= errno
.ENOENT
:
214 raise RuntimeError('incorrect errno when listing a non-existing snap schedule')
216 raise RuntimeError('"fs snap-schedule list" returned error')
218 self
.mount_a
.run_shell(['rmdir', TestSnapSchedules
.TEST_DIRECTORY
])
220 def test_snap_schedule(self
):
221 """Test existence of a scheduled snapshot"""
222 self
.mount_a
.run_shell(['mkdir', '-p', TestSnapSchedules
.TEST_DIRECTORY
])
224 # set a schedule on the dir
225 self
.fs_snap_schedule_cmd('add', path
=TestSnapSchedules
.TEST_DIRECTORY
, snap_schedule
='1M')
226 exec_time
= time
.time()
228 timo
, snap_sfx
= self
.calc_wait_time_and_snap_name(exec_time
, '1M')
229 log
.debug(f
'expecting snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx} in ~{timo}s...')
230 to_wait
= timo
+ 2 # some leeway to avoid false failures...
232 # verify snapshot schedule
233 self
.verify_schedule(TestSnapSchedules
.TEST_DIRECTORY
, ['1M'])
235 def verify_added(snaps_added
):
236 log
.debug(f
'snapshots added={snaps_added}')
237 self
.assertEqual(len(snaps_added
), 1)
238 snapname
= snaps_added
[0]
239 if snapname
.startswith('scheduled-'):
240 if snapname
[10:26] == snap_sfx
[:16]:
241 self
.check_scheduled_snapshot(exec_time
, timo
)
244 self
.add_snap_create_cbk(verify_added
)
245 self
.verify(TestSnapSchedules
.TEST_DIRECTORY
, to_wait
)
246 self
.assert_if_not_verified()
248 # remove snapshot schedule
249 self
.fs_snap_schedule_cmd('remove', path
=TestSnapSchedules
.TEST_DIRECTORY
)
251 # remove all scheduled snapshots
252 self
.remove_snapshots(TestSnapSchedules
.TEST_DIRECTORY
)
254 self
.mount_a
.run_shell(['rmdir', TestSnapSchedules
.TEST_DIRECTORY
])
256 def test_multi_snap_schedule(self
):
257 """Test exisitence of multiple scheduled snapshots"""
258 self
.mount_a
.run_shell(['mkdir', '-p', TestSnapSchedules
.TEST_DIRECTORY
])
260 # set schedules on the dir
261 self
.fs_snap_schedule_cmd('add', path
=TestSnapSchedules
.TEST_DIRECTORY
, snap_schedule
='1M')
262 self
.fs_snap_schedule_cmd('add', path
=TestSnapSchedules
.TEST_DIRECTORY
, snap_schedule
='2M')
263 exec_time
= time
.time()
265 timo_1
, snap_sfx_1
= self
.calc_wait_time_and_snap_name(exec_time
, '1M')
266 log
.debug(f
'expecting snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx_1} in ~{timo_1}s...')
267 timo_2
, snap_sfx_2
= self
.calc_wait_time_and_snap_name(exec_time
, '2M')
268 log
.debug(f
'expecting snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx_2} in ~{timo_2}s...')
269 to_wait
= timo_2
+ 2 # use max timeout
271 # verify snapshot schedule
272 self
.verify_schedule(TestSnapSchedules
.TEST_DIRECTORY
, ['1M', '2M'])
274 def verify_added_1(snaps_added
):
275 log
.debug(f
'snapshots added={snaps_added}')
276 self
.assertEqual(len(snaps_added
), 1)
277 snapname
= snaps_added
[0]
278 if snapname
.startswith('scheduled-'):
279 if snapname
[10:26] == snap_sfx_1
[:16]:
280 self
.check_scheduled_snapshot(exec_time
, timo_1
)
283 def verify_added_2(snaps_added
):
284 log
.debug(f
'snapshots added={snaps_added}')
285 self
.assertEqual(len(snaps_added
), 1)
286 snapname
= snaps_added
[0]
287 if snapname
.startswith('scheduled-'):
288 if snapname
[10:26] == snap_sfx_2
[:16]:
289 self
.check_scheduled_snapshot(exec_time
, timo_2
)
292 self
.add_snap_create_cbk(verify_added_1
)
293 self
.add_snap_create_cbk(verify_added_2
)
294 self
.verify(TestSnapSchedules
.TEST_DIRECTORY
, to_wait
)
295 self
.assert_if_not_verified()
297 # remove snapshot schedule
298 self
.fs_snap_schedule_cmd('remove', path
=TestSnapSchedules
.TEST_DIRECTORY
)
300 # remove all scheduled snapshots
301 self
.remove_snapshots(TestSnapSchedules
.TEST_DIRECTORY
)
303 self
.mount_a
.run_shell(['rmdir', TestSnapSchedules
.TEST_DIRECTORY
])
305 def test_snap_schedule_with_retention(self
):
306 """Test scheduled snapshots along with rentention policy"""
307 self
.mount_a
.run_shell(['mkdir', '-p', TestSnapSchedules
.TEST_DIRECTORY
])
309 # set a schedule on the dir
310 self
.fs_snap_schedule_cmd('add', path
=TestSnapSchedules
.TEST_DIRECTORY
, snap_schedule
='1M')
311 self
.fs_snap_schedule_cmd('retention', 'add', path
=TestSnapSchedules
.TEST_DIRECTORY
, retention_spec_or_period
='1M')
312 exec_time
= time
.time()
314 timo_1
, snap_sfx
= self
.calc_wait_time_and_snap_name(exec_time
, '1M')
315 log
.debug(f
'expecting snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx} in ~{timo_1}s...')
316 to_wait
= timo_1
+ 2 # some leeway to avoid false failures...
318 # verify snapshot schedule
319 self
.verify_schedule(TestSnapSchedules
.TEST_DIRECTORY
, ['1M'], retentions
=[{'M':1}])
321 def verify_added(snaps_added
):
322 log
.debug(f
'snapshots added={snaps_added}')
323 self
.assertEqual(len(snaps_added
), 1)
324 snapname
= snaps_added
[0]
325 if snapname
.startswith('scheduled-'):
326 if snapname
[10:26] == snap_sfx
[:16]:
327 self
.check_scheduled_snapshot(exec_time
, timo_1
)
330 self
.add_snap_create_cbk(verify_added
)
331 self
.verify(TestSnapSchedules
.TEST_DIRECTORY
, to_wait
)
332 self
.assert_if_not_verified()
334 timo_2
= timo_1
+ 60 # expected snapshot removal timeout
335 def verify_removed(snaps_removed
):
336 log
.debug(f
'snapshots removed={snaps_removed}')
337 self
.assertEqual(len(snaps_removed
), 1)
338 snapname
= snaps_removed
[0]
339 if snapname
.startswith('scheduled-'):
340 if snapname
[10:26] == snap_sfx
[:16]:
341 self
.check_scheduled_snapshot(exec_time
, timo_2
)
344 log
.debug(f
'expecting removal of snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx} in ~{timo_2}s...')
346 self
.add_snap_remove_cbk(verify_removed
)
347 self
.verify(TestSnapSchedules
.TEST_DIRECTORY
, to_wait
+2)
348 self
.assert_if_not_verified()
350 # remove snapshot schedule
351 self
.fs_snap_schedule_cmd('remove', path
=TestSnapSchedules
.TEST_DIRECTORY
)
353 # remove all scheduled snapshots
354 self
.remove_snapshots(TestSnapSchedules
.TEST_DIRECTORY
)
356 self
.mount_a
.run_shell(['rmdir', TestSnapSchedules
.TEST_DIRECTORY
])
358 def get_snap_stats(self
, dir_path
):
359 snap_path
= f
"{dir_path}/.snap"[1:]
360 snapshots
= self
.mount_a
.ls(path
=snap_path
)
361 fs_count
= len(snapshots
)
362 log
.debug(f
'snapshots: {snapshots}');
364 result
= self
.fs_snap_schedule_cmd('status', path
=dir_path
,
365 snap_schedule
='1M', format
='json')
366 json_res
= json
.loads(result
)[0]
367 db_count
= int(json_res
['created_count'])
368 log
.debug(f
'json_res: {json_res}')
371 snap_stats
['fs_count'] = fs_count
372 snap_stats
['db_count'] = db_count
376 def verify_snap_stats(self
, dir_path
):
377 snap_stats
= self
.get_snap_stats(dir_path
)
378 self
.assertTrue(snap_stats
['fs_count'] == snap_stats
['db_count'])
380 def test_concurrent_snap_creates(self
):
381 """Test concurrent snap creates in same file-system without db issues"""
383 Test snap creates at same cadence on same fs to verify correct stats.
384 A single SQLite DB Connection handle cannot be used to run concurrent
385 transactions and results transaction aborts. This test makes sure that
386 proper care has been taken in the code to avoid such situation by
387 verifying number of dirs created on the file system with the
388 created_count in the schedule_meta table for the specific path.
390 self
.mount_a
.run_shell(['mkdir', '-p', TestSnapSchedules
.TEST_DIRECTORY
])
394 testdirs
.append(os
.path
.join("/", TestSnapSchedules
.TEST_DIRECTORY
, "dir" + str(d
)))
397 self
.mount_a
.run_shell(['mkdir', '-p', d
[1:]])
398 self
.fs_snap_schedule_cmd('add', path
=d
, snap_schedule
='1M')
400 exec_time
= time
.time()
401 timo_1
, snap_sfx
= self
.calc_wait_time_and_snap_name(exec_time
, '1M')
404 self
.fs_snap_schedule_cmd('activate', path
=d
, snap_schedule
='1M')
406 # we wait for 10 snaps to be taken
407 wait_time
= timo_1
+ 10 * 60 + 15
408 time
.sleep(wait_time
)
411 self
.fs_snap_schedule_cmd('deactivate', path
=d
, snap_schedule
='1M')
414 self
.verify_snap_stats(d
)
417 self
.fs_snap_schedule_cmd('remove', path
=d
, snap_schedule
='1M')
418 self
.remove_snapshots(d
[1:])
419 self
.mount_a
.run_shell(['rmdir', d
[1:]])
421 def test_snap_schedule_with_mgr_restart(self
):
422 """Test that snap schedule is resumed after mgr restart"""
423 self
.mount_a
.run_shell(['mkdir', '-p', TestSnapSchedules
.TEST_DIRECTORY
])
424 testdir
= os
.path
.join("/", TestSnapSchedules
.TEST_DIRECTORY
, "test_restart")
425 self
.mount_a
.run_shell(['mkdir', '-p', testdir
[1:]])
426 self
.fs_snap_schedule_cmd('add', path
=testdir
, snap_schedule
='1M')
428 exec_time
= time
.time()
429 timo_1
, snap_sfx
= self
.calc_wait_time_and_snap_name(exec_time
, '1M')
431 self
.fs_snap_schedule_cmd('activate', path
=testdir
, snap_schedule
='1M')
433 # we wait for 10 snaps to be taken
434 wait_time
= timo_1
+ 10 * 60 + 15
435 time
.sleep(wait_time
)
437 old_stats
= self
.get_snap_stats(testdir
)
438 self
.assertTrue(old_stats
['fs_count'] == old_stats
['db_count'])
439 self
.assertTrue(old_stats
['fs_count'] > 9)
442 active_mgr
= self
.mgr_cluster
.mon_manager
.get_mgr_dump()['active_name']
443 log
.debug(f
'restarting active mgr: {active_mgr}')
444 self
.mgr_cluster
.mon_manager
.revive_mgr(active_mgr
)
445 time
.sleep(300) # sleep for 5 minutes
446 self
.fs_snap_schedule_cmd('deactivate', path
=testdir
, snap_schedule
='1M')
448 new_stats
= self
.get_snap_stats(testdir
)
449 self
.assertTrue(new_stats
['fs_count'] == new_stats
['db_count'])
450 self
.assertTrue(new_stats
['fs_count'] > old_stats
['fs_count'])
451 self
.assertTrue(new_stats
['db_count'] > old_stats
['db_count'])
454 self
.fs_snap_schedule_cmd('remove', path
=testdir
, snap_schedule
='1M')
455 self
.remove_snapshots(testdir
[1:])
456 self
.mount_a
.run_shell(['rmdir', testdir
[1:]])