9 from .metadata_manager
import MetadataManager
10 from .subvolume_attrs
import SubvolumeTypes
, SubvolumeStates
, SubvolumeFeatures
11 from .op_sm
import SubvolumeOpSm
12 from .subvolume_v1
import SubvolumeV1
13 from ..template
import SubvolumeTemplate
14 from ...exception
import OpSmException
, VolumeException
, MetadataMgrException
15 from ...fs_util
import listdir
, create_base_dir
16 from ..template
import SubvolumeOpType
18 log
= logging
.getLogger(__name__
)
20 class SubvolumeV2(SubvolumeV1
):
22 Version 2 subvolumes creates a subvolume with path as follows,
23 volumes/<group-name>/<subvolume-name>/<uuid>/
25 The distinguishing feature of V2 subvolume as compared to V1 subvolumes is its ability to retain snapshots
26 of a subvolume on removal. This is done by creating snapshots under the <subvolume-name> directory,
27 rather than under the <uuid> directory, as is the case of V1 subvolumes.
29 - The directory under which user data resides is <uuid>
30 - Snapshots of the subvolume are taken within the <subvolume-name> directory
31 - A meta file is maintained under the <subvolume-name> directory as a metadata store, storing information similar
33 - On a request to remove subvolume but retain its snapshots, only the <uuid> directory is moved to trash, retaining
34 the rest of the subvolume and its meta file.
35 - The <uuid> directory, when present, is the current incarnation of the subvolume, which may have snapshots of
36 older incarnations of the same subvolume.
37 - V1 subvolumes that currently do not have any snapshots are upgraded to V2 subvolumes automatically, to support the
38 snapshot retention feature
44 return SubvolumeV2
.VERSION
48 return [SubvolumeFeatures
.FEATURE_SNAPSHOT_CLONE
.value
,
49 SubvolumeFeatures
.FEATURE_SNAPSHOT_AUTOPROTECT
.value
,
50 SubvolumeFeatures
.FEATURE_SNAPSHOT_RETENTION
.value
]
55 self
.metadata_mgr
.refresh()
56 if self
.state
== SubvolumeStates
.STATE_RETAINED
:
59 except MetadataMgrException
as me
:
60 if me
.errno
!= -errno
.ENOENT
:
61 raise VolumeException(me
.errno
, "internal error while processing subvolume '{0}'".format(self
.subvolname
))
66 if not self
.retained
or self
.list_snapshots() or self
.has_pending_purges
:
71 def has_pending_purges(self
):
73 return not listdir(self
.fs
, self
.trash_dir
) == []
74 except VolumeException
as ve
:
75 if ve
.errno
== -errno
.ENOENT
:
81 return os
.path
.join(self
.base_path
, b
".trash")
83 def create_trashcan(self
):
84 """per subvolume trash directory"""
86 self
.fs
.stat(self
.trash_dir
)
87 except cephfs
.Error
as e
:
88 if e
.args
[0] == errno
.ENOENT
:
90 self
.fs
.mkdir(self
.trash_dir
, 0o700)
91 except cephfs
.Error
as ce
:
92 raise VolumeException(-ce
.args
[0], ce
.args
[1])
94 raise VolumeException(-e
.args
[0], e
.args
[1])
96 def mark_subvolume(self
):
97 # set subvolume attr, on subvolume root, marking it as a CephFS subvolume
98 # subvolume root is where snapshots would be taken, and hence is the base_path for v2 subvolumes
100 # MDS treats this as a noop for already marked subvolume
101 self
.fs
.setxattr(self
.base_path
, 'ceph.dir.subvolume', b
'1', 0)
102 except cephfs
.InvalidValue
as e
:
103 raise VolumeException(-errno
.EINVAL
, "invalid value specified for ceph.dir.subvolume")
104 except cephfs
.Error
as e
:
105 raise VolumeException(-e
.args
[0], e
.args
[1])
108 def is_valid_uuid(uuid_str
):
115 def snapshot_base_path(self
):
116 return os
.path
.join(self
.base_path
, self
.vol_spec
.snapshot_dir_prefix
.encode('utf-8'))
118 def snapshot_data_path(self
, snapname
):
119 snap_base_path
= self
.snapshot_path(snapname
)
122 with self
.fs
.opendir(snap_base_path
) as dir_handle
:
123 d
= self
.fs
.readdir(dir_handle
)
125 if d
.d_name
not in (b
".", b
".."):
126 d_full_path
= os
.path
.join(snap_base_path
, d
.d_name
)
127 stx
= self
.fs
.statx(d_full_path
, cephfs
.CEPH_STATX_MODE
, cephfs
.AT_SYMLINK_NOFOLLOW
)
128 if stat
.S_ISDIR(stx
.get('mode')):
129 if self
.is_valid_uuid(d
.d_name
.decode('utf-8')):
131 d
= self
.fs
.readdir(dir_handle
)
132 except cephfs
.Error
as e
:
133 if e
.errno
== errno
.ENOENT
:
134 raise VolumeException(-errno
.ENOENT
, "snapshot '{0}' does not exist".format(snapname
))
135 raise VolumeException(-e
.args
[0], e
.args
[1])
138 raise VolumeException(-errno
.ENOENT
, "snapshot '{0}' does not exist".format(snapname
))
140 return os
.path
.join(snap_base_path
, uuid_str
)
142 def _remove_on_failure(self
, subvol_path
, retained
):
144 log
.info("cleaning up subvolume incarnation with path: {0}".format(subvol_path
))
146 self
.fs
.rmdir(subvol_path
)
147 except cephfs
.Error
as e
:
148 raise VolumeException(-e
.args
[0], e
.args
[1])
150 log
.info("cleaning up subvolume with path: {0}".format(self
.subvolname
))
151 self
.remove(internal_cleanup
=True)
153 def _set_incarnation_metadata(self
, subvolume_type
, qpath
, initial_state
):
154 self
.metadata_mgr
.update_global_section(MetadataManager
.GLOBAL_META_KEY_TYPE
, subvolume_type
.value
)
155 self
.metadata_mgr
.update_global_section(MetadataManager
.GLOBAL_META_KEY_PATH
, qpath
)
156 self
.metadata_mgr
.update_global_section(MetadataManager
.GLOBAL_META_KEY_STATE
, initial_state
.value
)
158 def create(self
, size
, isolate_nspace
, pool
, mode
, uid
, gid
):
159 subvolume_type
= SubvolumeTypes
.TYPE_NORMAL
161 initial_state
= SubvolumeOpSm
.get_init_state(subvolume_type
)
162 except OpSmException
as oe
:
163 raise VolumeException(-errno
.EINVAL
, "subvolume creation failed: internal error")
165 retained
= self
.retained
166 if retained
and self
.has_pending_purges
:
167 raise VolumeException(-errno
.EAGAIN
, "asynchronous purge of subvolume in progress")
168 subvol_path
= os
.path
.join(self
.base_path
, str(uuid
.uuid4()).encode('utf-8'))
170 # create group directory with default mode(0o755) if it doesn't exist.
171 create_base_dir(self
.fs
, self
.group
.path
, self
.vol_spec
.DEFAULT_MODE
)
172 self
.fs
.mkdirs(subvol_path
, mode
)
173 self
.mark_subvolume()
178 'pool_namespace': self
.namespace
if isolate_nspace
else None,
181 self
.set_attrs(subvol_path
, attrs
)
183 # persist subvolume metadata
184 qpath
= subvol_path
.decode('utf-8')
186 self
._set
_incarnation
_metadata
(subvolume_type
, qpath
, initial_state
)
187 self
.metadata_mgr
.flush()
189 self
.init_config(SubvolumeV2
.VERSION
, subvolume_type
, qpath
, initial_state
)
191 # Create the subvolume metadata file which manages auth-ids if it doesn't exist
192 self
.auth_mdata_mgr
.create_subvolume_metadata_file(self
.group
.groupname
, self
.subvolname
)
193 except (VolumeException
, MetadataMgrException
, cephfs
.Error
) as e
:
195 self
._remove
_on
_failure
(subvol_path
, retained
)
196 except VolumeException
as ve
:
197 log
.info("failed to cleanup subvolume '{0}' ({1})".format(self
.subvolname
, ve
))
199 if isinstance(e
, MetadataMgrException
):
200 log
.error("metadata manager exception: {0}".format(e
))
201 e
= VolumeException(-errno
.EINVAL
, f
"exception in subvolume metadata: {os.strerror(-e.args[0])}")
202 elif isinstance(e
, cephfs
.Error
):
203 e
= VolumeException(-e
.args
[0], e
.args
[1])
206 def create_clone(self
, pool
, source_volname
, source_subvolume
, snapname
):
207 subvolume_type
= SubvolumeTypes
.TYPE_CLONE
209 initial_state
= SubvolumeOpSm
.get_init_state(subvolume_type
)
210 except OpSmException
as oe
:
211 raise VolumeException(-errno
.EINVAL
, "clone failed: internal error")
213 retained
= self
.retained
214 if retained
and self
.has_pending_purges
:
215 raise VolumeException(-errno
.EAGAIN
, "asynchronous purge of subvolume in progress")
216 subvol_path
= os
.path
.join(self
.base_path
, str(uuid
.uuid4()).encode('utf-8'))
218 # source snapshot attrs are used to create clone subvolume
219 # attributes of subvolume's content though, are synced during the cloning process.
220 attrs
= source_subvolume
.get_attrs(source_subvolume
.snapshot_data_path(snapname
))
222 # The source of the clone may have exceeded its quota limit as
223 # CephFS quotas are imprecise. Cloning such a source may fail if
224 # the quota on the destination is set before starting the clone
225 # copy. So always set the quota on destination after cloning is
227 attrs
["quota"] = None
229 # override snapshot pool setting, if one is provided for the clone
231 attrs
["data_pool"] = pool
232 attrs
["pool_namespace"] = None
234 # create directory and set attributes
235 self
.fs
.mkdirs(subvol_path
, attrs
.get("mode"))
236 self
.mark_subvolume()
237 self
.set_attrs(subvol_path
, attrs
)
239 # persist subvolume metadata and clone source
240 qpath
= subvol_path
.decode('utf-8')
242 self
._set
_incarnation
_metadata
(subvolume_type
, qpath
, initial_state
)
244 self
.metadata_mgr
.init(SubvolumeV2
.VERSION
, subvolume_type
.value
, qpath
, initial_state
.value
)
245 self
.add_clone_source(source_volname
, source_subvolume
, snapname
)
246 self
.metadata_mgr
.flush()
247 except (VolumeException
, MetadataMgrException
, cephfs
.Error
) as e
:
249 self
._remove
_on
_failure
(subvol_path
, retained
)
250 except VolumeException
as ve
:
251 log
.info("failed to cleanup subvolume '{0}' ({1})".format(self
.subvolname
, ve
))
253 if isinstance(e
, MetadataMgrException
):
254 log
.error("metadata manager exception: {0}".format(e
))
255 e
= VolumeException(-errno
.EINVAL
, f
"exception in subvolume metadata: {os.strerror(-e.args[0])}")
256 elif isinstance(e
, cephfs
.Error
):
257 e
= VolumeException(-e
.args
[0], e
.args
[1])
260 def allowed_ops_by_type(self
, vol_type
):
261 if vol_type
== SubvolumeTypes
.TYPE_CLONE
:
262 return {op_type
for op_type
in SubvolumeOpType
}
264 if vol_type
== SubvolumeTypes
.TYPE_NORMAL
:
265 return {op_type
for op_type
in SubvolumeOpType
} - {SubvolumeOpType
.CLONE_STATUS
,
266 SubvolumeOpType
.CLONE_CANCEL
,
267 SubvolumeOpType
.CLONE_INTERNAL
}
271 def allowed_ops_by_state(self
, vol_state
):
272 if vol_state
== SubvolumeStates
.STATE_COMPLETE
:
273 return {op_type
for op_type
in SubvolumeOpType
}
275 if vol_state
== SubvolumeStates
.STATE_RETAINED
:
277 SubvolumeOpType
.REMOVE
,
278 SubvolumeOpType
.REMOVE_FORCE
,
279 SubvolumeOpType
.LIST
,
280 SubvolumeOpType
.INFO
,
281 SubvolumeOpType
.SNAP_REMOVE
,
282 SubvolumeOpType
.SNAP_LIST
,
283 SubvolumeOpType
.SNAP_INFO
,
284 SubvolumeOpType
.SNAP_PROTECT
,
285 SubvolumeOpType
.SNAP_UNPROTECT
,
286 SubvolumeOpType
.CLONE_SOURCE
289 return {SubvolumeOpType
.REMOVE_FORCE
,
290 SubvolumeOpType
.CLONE_CREATE
,
291 SubvolumeOpType
.CLONE_STATUS
,
292 SubvolumeOpType
.CLONE_CANCEL
,
293 SubvolumeOpType
.CLONE_INTERNAL
,
294 SubvolumeOpType
.CLONE_SOURCE
}
296 def open(self
, op_type
):
297 if not isinstance(op_type
, SubvolumeOpType
):
298 raise VolumeException(-errno
.ENOTSUP
, "operation {0} not supported on subvolume '{1}'".format(
299 op_type
.value
, self
.subvolname
))
301 self
.metadata_mgr
.refresh()
302 # unconditionally mark as subvolume, to handle pre-existing subvolumes without the mark
303 self
.mark_subvolume()
305 etype
= self
.subvol_type
306 if op_type
not in self
.allowed_ops_by_type(etype
):
307 raise VolumeException(-errno
.ENOTSUP
, "operation '{0}' is not allowed on subvolume '{1}' of type {2}".format(
308 op_type
.value
, self
.subvolname
, etype
.value
))
311 if op_type
not in self
.allowed_ops_by_state(estate
) and estate
== SubvolumeStates
.STATE_RETAINED
:
312 raise VolumeException(-errno
.ENOENT
, "subvolume '{0}' is removed and has only snapshots retained".format(
315 if op_type
not in self
.allowed_ops_by_state(estate
) and estate
!= SubvolumeStates
.STATE_RETAINED
:
316 raise VolumeException(-errno
.EAGAIN
, "subvolume '{0}' is not ready for operation {1}".format(
317 self
.subvolname
, op_type
.value
))
319 if estate
!= SubvolumeStates
.STATE_RETAINED
:
320 subvol_path
= self
.path
321 log
.debug("refreshed metadata, checking subvolume path '{0}'".format(subvol_path
))
322 st
= self
.fs
.stat(subvol_path
)
324 self
.uid
= int(st
.st_uid
)
325 self
.gid
= int(st
.st_gid
)
326 self
.mode
= int(st
.st_mode
& ~stat
.S_IFMT(st
.st_mode
))
327 except MetadataMgrException
as me
:
328 if me
.errno
== -errno
.ENOENT
:
329 raise VolumeException(-errno
.ENOENT
, "subvolume '{0}' does not exist".format(self
.subvolname
))
330 raise VolumeException(me
.args
[0], me
.args
[1])
331 except cephfs
.ObjectNotFound
:
332 log
.debug("missing subvolume path '{0}' for subvolume '{1}'".format(subvol_path
, self
.subvolname
))
333 raise VolumeException(-errno
.ENOENT
, "mount path missing for subvolume '{0}'".format(self
.subvolname
))
334 except cephfs
.Error
as e
:
335 raise VolumeException(-e
.args
[0], e
.args
[1])
337 def trash_incarnation_dir(self
):
338 """rename subvolume (uuid component) to trash"""
339 self
.create_trashcan()
341 bname
= os
.path
.basename(self
.path
)
342 tpath
= os
.path
.join(self
.trash_dir
, bname
)
343 log
.debug("trash: {0} -> {1}".format(self
.path
, tpath
))
344 self
.fs
.rename(self
.path
, tpath
)
345 self
._link
_dir
(tpath
, bname
)
346 except cephfs
.Error
as e
:
347 raise VolumeException(-e
.args
[0], e
.args
[1])
350 def safe_to_remove_subvolume_clone(subvol_state
):
351 # Both the STATE_FAILED and STATE_CANCELED are handled by 'handle_clone_failed' in the state
352 # machine which removes the entry from the index. Hence, it's safe to removed clone with
353 # force option for both.
354 acceptable_rm_clone_states
= [SubvolumeStates
.STATE_COMPLETE
, SubvolumeStates
.STATE_CANCELED
,
355 SubvolumeStates
.STATE_FAILED
, SubvolumeStates
.STATE_RETAINED
]
356 if subvol_state
not in acceptable_rm_clone_states
:
360 def remove(self
, retainsnaps
=False, internal_cleanup
=False):
361 if self
.list_snapshots():
363 raise VolumeException(-errno
.ENOTEMPTY
, "subvolume '{0}' has snapshots".format(self
.subvolname
))
365 if not internal_cleanup
and not self
.safe_to_remove_subvolume_clone(self
.state
):
366 raise VolumeException(-errno
.EAGAIN
,
367 "{0} clone in-progress -- please cancel the clone and retry".format(self
.subvolname
))
368 if not self
.has_pending_purges
:
369 self
.trash_base_dir()
370 # Delete the volume meta file, if it's not already deleted
371 self
.auth_mdata_mgr
.delete_subvolume_metadata_file(self
.group
.groupname
, self
.subvolname
)
373 if self
.state
!= SubvolumeStates
.STATE_RETAINED
:
374 self
.trash_incarnation_dir()
375 self
.metadata_mgr
.remove_section(MetadataManager
.USER_METADATA_SECTION
)
376 self
.metadata_mgr
.update_global_section(MetadataManager
.GLOBAL_META_KEY_PATH
, "")
377 self
.metadata_mgr
.update_global_section(MetadataManager
.GLOBAL_META_KEY_STATE
, SubvolumeStates
.STATE_RETAINED
.value
)
378 self
.metadata_mgr
.flush()
379 # Delete the volume meta file, if it's not already deleted
380 self
.auth_mdata_mgr
.delete_subvolume_metadata_file(self
.group
.groupname
, self
.subvolname
)
383 if self
.state
!= SubvolumeStates
.STATE_RETAINED
:
384 return super(SubvolumeV2
, self
).info()
386 return {'type': self
.subvol_type
.value
, 'features': self
.features
, 'state': SubvolumeStates
.STATE_RETAINED
.value
}
388 def remove_snapshot(self
, snapname
, force
=False):
389 super(SubvolumeV2
, self
).remove_snapshot(snapname
, force
)
391 self
.trash_base_dir()
392 # tickle the volume purge job to purge this entry, using ESTALE
393 raise VolumeException(-errno
.ESTALE
, "subvolume '{0}' has been removed as the last retained snapshot is removed".format(self
.subvolname
))
394 # if not purgeable, subvol is not retained, or has snapshots, or already has purge jobs that will garbage collect this subvol