]>
git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/cephfs/test_snap_schedules.py
4a9ce838ef7fc22ff2551c78ad3271eb20dd85f2
7 from tasks
.cephfs
.cephfs_test_case
import CephFSTestCase
8 from teuthology
.exceptions
import CommandFailedError
9 from datetime
import datetime
, timedelta
11 log
= logging
.getLogger(__name__
)
13 def extract_schedule_and_retention_spec(spec
=[]):
14 schedule
= set([s
[0] for s
in spec
])
15 retention
= set([s
[1] for s
in spec
])
16 return (schedule
, retention
)
18 def seconds_upto_next_schedule(time_from
, timo
):
20 return ((int(ts
/ 60) * 60) + timo
) - ts
22 class TestSnapSchedulesHelper(CephFSTestCase
):
25 TEST_VOLUME_NAME
= 'snap_vol'
26 TEST_DIRECTORY
= 'snap_test_dir1'
28 # this should be in sync with snap_schedule format
29 SNAPSHOT_TS_FORMAT
= '%Y-%m-%d-%H_%M_%S'
31 def check_scheduled_snapshot(self
, exec_time
, timo
):
33 delta
= now
- exec_time
34 log
.debug(f
'exec={exec_time}, now = {now}, timo = {timo}')
35 # tolerate snapshot existance in the range [-5,+5]
36 self
.assertTrue((delta
<= timo
+ 5) and (delta
>= timo
- 5))
38 def _fs_cmd(self
, *args
):
39 return self
.mgr_cluster
.mon_manager
.raw_cluster_cmd("fs", *args
)
41 def fs_snap_schedule_cmd(self
, *args
, **kwargs
):
42 fs
= kwargs
.pop('fs', self
.volname
)
44 if 'format' in kwargs
:
45 fmt
= kwargs
.pop('format')
46 args
+= ('--format', fmt
)
47 for name
, val
in kwargs
.items():
49 res
= self
._fs
_cmd
('snap-schedule', *args
)
50 log
.debug(f
'res={res}')
53 def _create_or_reuse_test_volume(self
):
54 result
= json
.loads(self
._fs
_cmd
("volume", "ls"))
56 self
.vol_created
= True
57 self
.volname
= TestSnapSchedulesHelper
.TEST_VOLUME_NAME
58 self
._fs
_cmd
("volume", "create", self
.volname
)
60 self
.volname
= result
[0]['name']
62 def _enable_snap_schedule(self
):
63 return self
.mgr_cluster
.mon_manager
.raw_cluster_cmd("mgr", "module", "enable", "snap_schedule")
65 def _disable_snap_schedule(self
):
66 return self
.mgr_cluster
.mon_manager
.raw_cluster_cmd("mgr", "module", "disable", "snap_schedule")
68 def _allow_minute_granularity_snapshots(self
):
69 self
.config_set('mgr', 'mgr/snap_schedule/allow_m_granularity', True)
71 def _dump_on_update(self
):
72 self
.config_set('mgr', 'mgr/snap_schedule/dump_on_update', True)
75 super(TestSnapSchedulesHelper
, self
).setUp()
77 self
.vol_created
= False
78 self
._create
_or
_reuse
_test
_volume
()
81 # used to figure out which snapshots are created/deleted
82 self
.snapshots
= set()
83 self
._enable
_snap
_schedule
()
84 self
._allow
_minute
_granularity
_snapshots
()
85 self
._dump
_on
_update
()
89 self
._delete
_test
_volume
()
90 self
._disable
_snap
_schedule
()
91 super(TestSnapSchedulesHelper
, self
).tearDown()
93 def _schedule_to_timeout(self
, schedule
):
95 period
= int(schedule
[0:-1])
99 return period
* 60 * 60
101 return period
* 60 * 60 * 24
103 return period
* 60 * 60 * 24 * 7
105 raise RuntimeError('schedule multiplier not recognized')
107 def add_snap_create_cbk(self
, cbk
):
108 self
.create_cbks
.append(cbk
)
109 def remove_snap_create_cbk(self
, cbk
):
110 self
.create_cbks
.remove(cbk
)
112 def add_snap_remove_cbk(self
, cbk
):
113 self
.remove_cbks
.append(cbk
)
114 def remove_snap_remove_cbk(self
, cbk
):
115 self
.remove_cbks
.remove(cbk
)
117 def assert_if_not_verified(self
):
118 self
.assertListEqual(self
.create_cbks
, [])
119 self
.assertListEqual(self
.remove_cbks
, [])
121 def verify(self
, dir_path
, max_trials
):
123 snap_path
= f
'{dir_path}/.snap'
124 while (len(self
.create_cbks
) or len(self
.remove_cbks
)) and trials
< max_trials
:
125 snapshots
= set(self
.mount_a
.ls(path
=snap_path
))
126 log
.info(f
'snapshots: {snapshots}')
127 added
= snapshots
- self
.snapshots
128 log
.info(f
'added: {added}')
129 removed
= self
.snapshots
- snapshots
130 log
.info(f
'removed: {removed}')
132 for cbk
in list(self
.create_cbks
):
133 res
= cbk(list(added
))
135 self
.remove_snap_create_cbk(cbk
)
138 for cbk
in list(self
.remove_cbks
):
139 res
= cbk(list(removed
))
141 self
.remove_snap_remove_cbk(cbk
)
143 self
.snapshots
= snapshots
147 def calc_wait_time_and_snap_name(self
, snap_sched_exec_epoch
, schedule
):
148 timo
= self
._schedule
_to
_timeout
(schedule
)
149 # calculate wait time upto the next minute
150 wait_timo
= seconds_upto_next_schedule(snap_sched_exec_epoch
, timo
)
152 # expected "scheduled" snapshot name
153 ts_name
= (datetime
.utcfromtimestamp(snap_sched_exec_epoch
)
154 + timedelta(seconds
=wait_timo
)).strftime(TestSnapSchedulesHelper
.SNAPSHOT_TS_FORMAT
)
155 return (wait_timo
, ts_name
)
157 def verify_schedule(self
, dir_path
, schedules
, retentions
=[]):
158 log
.debug(f
'expected_schedule: {schedules}, expected_retention: {retentions}')
160 result
= self
.fs_snap_schedule_cmd('list', path
=dir_path
, format
='json')
161 json_res
= json
.loads(result
)
162 log
.debug(f
'json_res: {json_res}')
164 for schedule
in schedules
:
165 self
.assertTrue(schedule
in json_res
['schedule'])
166 for retention
in retentions
:
167 self
.assertTrue(retention
in json_res
['retention'])
169 class TestSnapSchedules(TestSnapSchedulesHelper
):
170 def remove_snapshots(self
, dir_path
):
171 snap_path
= f
'{dir_path}/.snap'
173 snapshots
= self
.mount_a
.ls(path
=snap_path
)
174 for snapshot
in snapshots
:
175 snapshot_path
= os
.path
.join(snap_path
, snapshot
)
176 log
.debug(f
'removing snapshot: {snapshot_path}')
177 self
.mount_a
.run_shell(['rmdir', snapshot_path
])
179 def test_non_existent_snap_schedule_list(self
):
180 """Test listing snap schedules on a non-existing filesystem path failure"""
182 self
.fs_snap_schedule_cmd('list', path
=TestSnapSchedules
.TEST_DIRECTORY
)
183 except CommandFailedError
as ce
:
184 if ce
.exitstatus
!= errno
.ENOENT
:
185 raise RuntimeError('incorrect errno when listing a non-existing snap schedule')
187 raise RuntimeError('expected "fs snap-schedule list" to fail')
189 def test_non_existent_schedule(self
):
190 """Test listing non-existing snap schedules failure"""
191 self
.mount_a
.run_shell(['mkdir', '-p', TestSnapSchedules
.TEST_DIRECTORY
])
194 self
.fs_snap_schedule_cmd('list', path
=TestSnapSchedules
.TEST_DIRECTORY
)
195 except CommandFailedError
as ce
:
196 if ce
.exitstatus
!= errno
.ENOENT
:
197 raise RuntimeError('incorrect errno when listing a non-existing snap schedule')
199 raise RuntimeError('expected "fs snap-schedule list" returned fail')
201 self
.mount_a
.run_shell(['rmdir', TestSnapSchedules
.TEST_DIRECTORY
])
203 def test_snap_schedule_list_post_schedule_remove(self
):
204 """Test listing snap schedules post removal of a schedule"""
205 self
.mount_a
.run_shell(['mkdir', '-p', TestSnapSchedules
.TEST_DIRECTORY
])
207 self
.fs_snap_schedule_cmd('add', path
=TestSnapSchedules
.TEST_DIRECTORY
, snap_schedule
='1h')
209 self
.fs_snap_schedule_cmd('remove', path
=TestSnapSchedules
.TEST_DIRECTORY
)
212 self
.fs_snap_schedule_cmd('list', path
=TestSnapSchedules
.TEST_DIRECTORY
)
213 except CommandFailedError
as ce
:
214 if ce
.exitstatus
!= errno
.ENOENT
:
215 raise RuntimeError('incorrect errno when listing a non-existing snap schedule')
217 raise RuntimeError('"fs snap-schedule list" returned error')
219 self
.mount_a
.run_shell(['rmdir', TestSnapSchedules
.TEST_DIRECTORY
])
221 def test_snap_schedule(self
):
222 """Test existence of a scheduled snapshot"""
223 self
.mount_a
.run_shell(['mkdir', '-p', TestSnapSchedules
.TEST_DIRECTORY
])
225 # set a schedule on the dir
226 self
.fs_snap_schedule_cmd('add', path
=TestSnapSchedules
.TEST_DIRECTORY
, snap_schedule
='1M')
227 exec_time
= time
.time()
229 timo
, snap_sfx
= self
.calc_wait_time_and_snap_name(exec_time
, '1M')
230 log
.debug(f
'expecting snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx} in ~{timo}s...')
231 to_wait
= timo
+ 2 # some leeway to avoid false failures...
233 # verify snapshot schedule
234 self
.verify_schedule(TestSnapSchedules
.TEST_DIRECTORY
, ['1M'])
236 def verify_added(snaps_added
):
237 log
.debug(f
'snapshots added={snaps_added}')
238 self
.assertEqual(len(snaps_added
), 1)
239 snapname
= snaps_added
[0]
240 if snapname
.startswith('scheduled-'):
241 if snapname
[10:26] == snap_sfx
[:16]:
242 self
.check_scheduled_snapshot(exec_time
, timo
)
245 self
.add_snap_create_cbk(verify_added
)
246 self
.verify(TestSnapSchedules
.TEST_DIRECTORY
, to_wait
)
247 self
.assert_if_not_verified()
249 # remove snapshot schedule
250 self
.fs_snap_schedule_cmd('remove', path
=TestSnapSchedules
.TEST_DIRECTORY
)
252 # remove all scheduled snapshots
253 self
.remove_snapshots(TestSnapSchedules
.TEST_DIRECTORY
)
255 self
.mount_a
.run_shell(['rmdir', TestSnapSchedules
.TEST_DIRECTORY
])
257 def test_multi_snap_schedule(self
):
258 """Test exisitence of multiple scheduled snapshots"""
259 self
.mount_a
.run_shell(['mkdir', '-p', TestSnapSchedules
.TEST_DIRECTORY
])
261 # set schedules on the dir
262 self
.fs_snap_schedule_cmd('add', path
=TestSnapSchedules
.TEST_DIRECTORY
, snap_schedule
='1M')
263 self
.fs_snap_schedule_cmd('add', path
=TestSnapSchedules
.TEST_DIRECTORY
, snap_schedule
='2M')
264 exec_time
= time
.time()
266 timo_1
, snap_sfx_1
= self
.calc_wait_time_and_snap_name(exec_time
, '1M')
267 log
.debug(f
'expecting snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx_1} in ~{timo_1}s...')
268 timo_2
, snap_sfx_2
= self
.calc_wait_time_and_snap_name(exec_time
, '2M')
269 log
.debug(f
'expecting snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx_2} in ~{timo_2}s...')
270 to_wait
= timo_2
+ 2 # use max timeout
272 # verify snapshot schedule
273 self
.verify_schedule(TestSnapSchedules
.TEST_DIRECTORY
, ['1M', '2M'])
275 def verify_added_1(snaps_added
):
276 log
.debug(f
'snapshots added={snaps_added}')
277 self
.assertEqual(len(snaps_added
), 1)
278 snapname
= snaps_added
[0]
279 if snapname
.startswith('scheduled-'):
280 if snapname
[10:26] == snap_sfx_1
[:16]:
281 self
.check_scheduled_snapshot(exec_time
, timo_1
)
284 def verify_added_2(snaps_added
):
285 log
.debug(f
'snapshots added={snaps_added}')
286 self
.assertEqual(len(snaps_added
), 1)
287 snapname
= snaps_added
[0]
288 if snapname
.startswith('scheduled-'):
289 if snapname
[10:26] == snap_sfx_2
[:16]:
290 self
.check_scheduled_snapshot(exec_time
, timo_2
)
293 self
.add_snap_create_cbk(verify_added_1
)
294 self
.add_snap_create_cbk(verify_added_2
)
295 self
.verify(TestSnapSchedules
.TEST_DIRECTORY
, to_wait
)
296 self
.assert_if_not_verified()
298 # remove snapshot schedule
299 self
.fs_snap_schedule_cmd('remove', path
=TestSnapSchedules
.TEST_DIRECTORY
)
301 # remove all scheduled snapshots
302 self
.remove_snapshots(TestSnapSchedules
.TEST_DIRECTORY
)
304 self
.mount_a
.run_shell(['rmdir', TestSnapSchedules
.TEST_DIRECTORY
])
306 def test_snap_schedule_with_retention(self
):
307 """Test scheduled snapshots along with rentention policy"""
308 self
.mount_a
.run_shell(['mkdir', '-p', TestSnapSchedules
.TEST_DIRECTORY
])
310 # set a schedule on the dir
311 self
.fs_snap_schedule_cmd('add', path
=TestSnapSchedules
.TEST_DIRECTORY
, snap_schedule
='1M')
312 self
.fs_snap_schedule_cmd('retention', 'add', path
=TestSnapSchedules
.TEST_DIRECTORY
, retention_spec_or_period
='1M')
313 exec_time
= time
.time()
315 timo_1
, snap_sfx
= self
.calc_wait_time_and_snap_name(exec_time
, '1M')
316 log
.debug(f
'expecting snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx} in ~{timo_1}s...')
317 to_wait
= timo_1
+ 2 # some leeway to avoid false failures...
319 # verify snapshot schedule
320 self
.verify_schedule(TestSnapSchedules
.TEST_DIRECTORY
, ['1M'], retentions
=[{'M':1}])
322 def verify_added(snaps_added
):
323 log
.debug(f
'snapshots added={snaps_added}')
324 self
.assertEqual(len(snaps_added
), 1)
325 snapname
= snaps_added
[0]
326 if snapname
.startswith('scheduled-'):
327 if snapname
[10:26] == snap_sfx
[:16]:
328 self
.check_scheduled_snapshot(exec_time
, timo_1
)
331 self
.add_snap_create_cbk(verify_added
)
332 self
.verify(TestSnapSchedules
.TEST_DIRECTORY
, to_wait
)
333 self
.assert_if_not_verified()
335 timo_2
= timo_1
+ 60 # expected snapshot removal timeout
336 def verify_removed(snaps_removed
):
337 log
.debug(f
'snapshots removed={snaps_removed}')
338 self
.assertEqual(len(snaps_removed
), 1)
339 snapname
= snaps_removed
[0]
340 if snapname
.startswith('scheduled-'):
341 if snapname
[10:26] == snap_sfx
[:16]:
342 self
.check_scheduled_snapshot(exec_time
, timo_2
)
345 log
.debug(f
'expecting removal of snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx} in ~{timo_2}s...')
347 self
.add_snap_remove_cbk(verify_removed
)
348 self
.verify(TestSnapSchedules
.TEST_DIRECTORY
, to_wait
+2)
349 self
.assert_if_not_verified()
351 # remove snapshot schedule
352 self
.fs_snap_schedule_cmd('remove', path
=TestSnapSchedules
.TEST_DIRECTORY
)
354 # remove all scheduled snapshots
355 self
.remove_snapshots(TestSnapSchedules
.TEST_DIRECTORY
)
357 self
.mount_a
.run_shell(['rmdir', TestSnapSchedules
.TEST_DIRECTORY
])
359 def get_snap_stats(self
, dir_path
):
360 snap_path
= f
"{dir_path}/.snap"[1:]
361 snapshots
= self
.mount_a
.ls(path
=snap_path
)
362 fs_count
= len(snapshots
)
363 log
.debug(f
'snapshots: {snapshots}')
365 result
= self
.fs_snap_schedule_cmd('status', path
=dir_path
,
366 snap_schedule
='1M', format
='json')
367 json_res
= json
.loads(result
)[0]
368 db_count
= int(json_res
['created_count'])
369 log
.debug(f
'json_res: {json_res}')
372 snap_stats
['fs_count'] = fs_count
373 snap_stats
['db_count'] = db_count
377 def verify_snap_stats(self
, dir_path
):
378 snap_stats
= self
.get_snap_stats(dir_path
)
379 self
.assertTrue(snap_stats
['fs_count'] == snap_stats
['db_count'])
381 def test_concurrent_snap_creates(self
):
382 """Test concurrent snap creates in same file-system without db issues"""
384 Test snap creates at same cadence on same fs to verify correct stats.
385 A single SQLite DB Connection handle cannot be used to run concurrent
386 transactions and results transaction aborts. This test makes sure that
387 proper care has been taken in the code to avoid such situation by
388 verifying number of dirs created on the file system with the
389 created_count in the schedule_meta table for the specific path.
391 self
.mount_a
.run_shell(['mkdir', '-p', TestSnapSchedules
.TEST_DIRECTORY
])
395 testdirs
.append(os
.path
.join("/", TestSnapSchedules
.TEST_DIRECTORY
, "dir" + str(d
)))
398 self
.mount_a
.run_shell(['mkdir', '-p', d
[1:]])
399 self
.fs_snap_schedule_cmd('add', path
=d
, snap_schedule
='1M')
401 exec_time
= time
.time()
402 timo_1
, snap_sfx
= self
.calc_wait_time_and_snap_name(exec_time
, '1M')
405 self
.fs_snap_schedule_cmd('activate', path
=d
, snap_schedule
='1M')
407 # we wait for 10 snaps to be taken
408 wait_time
= timo_1
+ 10 * 60 + 15
409 time
.sleep(wait_time
)
412 self
.fs_snap_schedule_cmd('deactivate', path
=d
, snap_schedule
='1M')
415 self
.verify_snap_stats(d
)
418 self
.fs_snap_schedule_cmd('remove', path
=d
, snap_schedule
='1M')
419 self
.remove_snapshots(d
[1:])
420 self
.mount_a
.run_shell(['rmdir', d
[1:]])
422 def test_snap_schedule_with_mgr_restart(self
):
423 """Test that snap schedule is resumed after mgr restart"""
424 self
.mount_a
.run_shell(['mkdir', '-p', TestSnapSchedules
.TEST_DIRECTORY
])
425 testdir
= os
.path
.join("/", TestSnapSchedules
.TEST_DIRECTORY
, "test_restart")
426 self
.mount_a
.run_shell(['mkdir', '-p', testdir
[1:]])
427 self
.fs_snap_schedule_cmd('add', path
=testdir
, snap_schedule
='1M')
429 exec_time
= time
.time()
430 timo_1
, snap_sfx
= self
.calc_wait_time_and_snap_name(exec_time
, '1M')
432 self
.fs_snap_schedule_cmd('activate', path
=testdir
, snap_schedule
='1M')
434 # we wait for 10 snaps to be taken
435 wait_time
= timo_1
+ 10 * 60 + 15
436 time
.sleep(wait_time
)
438 old_stats
= self
.get_snap_stats(testdir
)
439 self
.assertTrue(old_stats
['fs_count'] == old_stats
['db_count'])
440 self
.assertTrue(old_stats
['fs_count'] > 9)
443 active_mgr
= self
.mgr_cluster
.mon_manager
.get_mgr_dump()['active_name']
444 log
.debug(f
'restarting active mgr: {active_mgr}')
445 self
.mgr_cluster
.mon_manager
.revive_mgr(active_mgr
)
446 time
.sleep(300) # sleep for 5 minutes
447 self
.fs_snap_schedule_cmd('deactivate', path
=testdir
, snap_schedule
='1M')
449 new_stats
= self
.get_snap_stats(testdir
)
450 self
.assertTrue(new_stats
['fs_count'] == new_stats
['db_count'])
451 self
.assertTrue(new_stats
['fs_count'] > old_stats
['fs_count'])
452 self
.assertTrue(new_stats
['db_count'] > old_stats
['db_count'])
455 self
.fs_snap_schedule_cmd('remove', path
=testdir
, snap_schedule
='1M')
456 self
.remove_snapshots(testdir
[1:])
457 self
.mount_a
.run_shell(['rmdir', testdir
[1:]])
459 class TestSnapSchedulesSnapdir(TestSnapSchedulesHelper
):
460 def remove_snapshots(self
, dir_path
, sdn
):
461 snap_path
= f
'{dir_path}/{sdn}'
463 snapshots
= self
.mount_a
.ls(path
=snap_path
)
464 for snapshot
in snapshots
:
465 snapshot_path
= os
.path
.join(snap_path
, snapshot
)
466 log
.debug(f
'removing snapshot: {snapshot_path}')
467 self
.mount_a
.run_shell(['rmdir', snapshot_path
])
469 def get_snap_dir_name(self
):
470 from tasks
.cephfs
.fuse_mount
import FuseMount
471 from tasks
.cephfs
.kernel_mount
import KernelMount
473 if isinstance(self
.mount_a
, KernelMount
):
474 sdn
= self
.mount_a
.client_config
.get('snapdirname', '.snap')
475 elif isinstance(self
.mount_a
, FuseMount
):
476 sdn
= self
.mount_a
.client_config
.get('client_snapdir', '.snap')
477 self
.fs
.set_ceph_conf('client', 'client snapdir', sdn
)
478 self
.mount_a
.remount()
481 def test_snap_dir_name(self
):
482 """Test the correctness of snap directory name"""
483 self
.mount_a
.run_shell(['mkdir', '-p', TestSnapSchedulesSnapdir
.TEST_DIRECTORY
])
485 # set a schedule on the dir
486 self
.fs_snap_schedule_cmd('add', path
=TestSnapSchedulesSnapdir
.TEST_DIRECTORY
, snap_schedule
='1M')
487 self
.fs_snap_schedule_cmd('retention', 'add', path
=TestSnapSchedulesSnapdir
.TEST_DIRECTORY
, retention_spec_or_period
='1M')
488 exec_time
= time
.time()
490 timo
, snap_sfx
= self
.calc_wait_time_and_snap_name(exec_time
, '1M')
491 sdn
= self
.get_snap_dir_name()
492 log
.info(f
'expecting snap {TestSnapSchedulesSnapdir.TEST_DIRECTORY}/{sdn}/scheduled-{snap_sfx} in ~{timo}s...')
494 # verify snapshot schedule
495 self
.verify_schedule(TestSnapSchedulesSnapdir
.TEST_DIRECTORY
, ['1M'], retentions
=[{'M':1}])
497 # remove snapshot schedule
498 self
.fs_snap_schedule_cmd('remove', path
=TestSnapSchedulesSnapdir
.TEST_DIRECTORY
)
500 # remove all scheduled snapshots
501 self
.remove_snapshots(TestSnapSchedulesSnapdir
.TEST_DIRECTORY
, sdn
)
503 self
.mount_a
.run_shell(['rmdir', TestSnapSchedulesSnapdir
.TEST_DIRECTORY
])