9 from .metadata_manager
import MetadataManager
10 from .subvolume_attrs
import SubvolumeTypes
, SubvolumeStates
, SubvolumeFeatures
11 from .op_sm
import SubvolumeOpSm
12 from .subvolume_v1
import SubvolumeV1
13 from ..template
import SubvolumeTemplate
14 from ...exception
import OpSmException
, VolumeException
, MetadataMgrException
15 from ...fs_util
import listdir
, create_base_dir
16 from ..template
import SubvolumeOpType
18 log
= logging
.getLogger(__name__
)
20 class SubvolumeV2(SubvolumeV1
):
22 Version 2 subvolumes creates a subvolume with path as follows,
23 volumes/<group-name>/<subvolume-name>/<uuid>/
25 The distinguishing feature of V2 subvolume as compared to V1 subvolumes is its ability to retain snapshots
26 of a subvolume on removal. This is done by creating snapshots under the <subvolume-name> directory,
27 rather than under the <uuid> directory, as is the case of V1 subvolumes.
29 - The directory under which user data resides is <uuid>
30 - Snapshots of the subvolume are taken within the <subvolume-name> directory
31 - A meta file is maintained under the <subvolume-name> directory as a metadata store, storing information similar
33 - On a request to remove subvolume but retain its snapshots, only the <uuid> directory is moved to trash, retaining
34 the rest of the subvolume and its meta file.
35 - The <uuid> directory, when present, is the current incarnation of the subvolume, which may have snapshots of
36 older incarnations of the same subvolume.
37 - V1 subvolumes that currently do not have any snapshots are upgraded to V2 subvolumes automatically, to support the
38 snapshot retention feature
44 return SubvolumeV2
.VERSION
48 return [SubvolumeFeatures
.FEATURE_SNAPSHOT_CLONE
.value
,
49 SubvolumeFeatures
.FEATURE_SNAPSHOT_AUTOPROTECT
.value
,
50 SubvolumeFeatures
.FEATURE_SNAPSHOT_RETENTION
.value
]
55 self
.metadata_mgr
.refresh()
56 if self
.state
== SubvolumeStates
.STATE_RETAINED
:
59 except MetadataMgrException
as me
:
60 if me
.errno
!= -errno
.ENOENT
:
61 raise VolumeException(me
.errno
, "internal error while processing subvolume '{0}'".format(self
.subvolname
))
66 if not self
.retained
or self
.list_snapshots() or self
.has_pending_purges
:
71 def has_pending_purges(self
):
73 return not listdir(self
.fs
, self
.trash_dir
) == []
74 except VolumeException
as ve
:
75 if ve
.errno
== -errno
.ENOENT
:
81 return os
.path
.join(self
.base_path
, b
".trash")
83 def create_trashcan(self
):
84 """per subvolume trash directory"""
86 self
.fs
.stat(self
.trash_dir
)
87 except cephfs
.Error
as e
:
88 if e
.args
[0] == errno
.ENOENT
:
90 self
.fs
.mkdir(self
.trash_dir
, 0o700)
91 except cephfs
.Error
as ce
:
92 raise VolumeException(-ce
.args
[0], ce
.args
[1])
94 raise VolumeException(-e
.args
[0], e
.args
[1])
96 def mark_subvolume(self
):
97 # set subvolume attr, on subvolume root, marking it as a CephFS subvolume
98 # subvolume root is where snapshots would be taken, and hence is the base_path for v2 subvolumes
100 # MDS treats this as a noop for already marked subvolume
101 self
.fs
.setxattr(self
.base_path
, 'ceph.dir.subvolume', b
'1', 0)
102 except cephfs
.InvalidValue
as e
:
103 raise VolumeException(-errno
.EINVAL
, "invalid value specified for ceph.dir.subvolume")
104 except cephfs
.Error
as e
:
105 raise VolumeException(-e
.args
[0], e
.args
[1])
108 def is_valid_uuid(uuid_str
):
115 def snapshot_base_path(self
):
116 return os
.path
.join(self
.base_path
, self
.vol_spec
.snapshot_dir_prefix
.encode('utf-8'))
118 def snapshot_data_path(self
, snapname
):
119 snap_base_path
= self
.snapshot_path(snapname
)
122 with self
.fs
.opendir(snap_base_path
) as dir_handle
:
123 d
= self
.fs
.readdir(dir_handle
)
125 if d
.d_name
not in (b
".", b
".."):
126 d_full_path
= os
.path
.join(snap_base_path
, d
.d_name
)
127 stx
= self
.fs
.statx(d_full_path
, cephfs
.CEPH_STATX_MODE
, cephfs
.AT_SYMLINK_NOFOLLOW
)
128 if stat
.S_ISDIR(stx
.get('mode')):
129 if self
.is_valid_uuid(d
.d_name
.decode('utf-8')):
131 d
= self
.fs
.readdir(dir_handle
)
132 except cephfs
.Error
as e
:
133 if e
.errno
== errno
.ENOENT
:
134 raise VolumeException(-errno
.ENOENT
, "snapshot '{0}' does not exist".format(snapname
))
135 raise VolumeException(-e
.args
[0], e
.args
[1])
138 raise VolumeException(-errno
.ENOENT
, "snapshot '{0}' does not exist".format(snapname
))
140 return os
.path
.join(snap_base_path
, uuid_str
)
142 def _remove_on_failure(self
, subvol_path
, retained
):
144 log
.info("cleaning up subvolume incarnation with path: {0}".format(subvol_path
))
146 self
.fs
.rmdir(subvol_path
)
147 except cephfs
.Error
as e
:
148 raise VolumeException(-e
.args
[0], e
.args
[1])
150 log
.info("cleaning up subvolume with path: {0}".format(self
.subvolname
))
153 def _set_incarnation_metadata(self
, subvolume_type
, qpath
, initial_state
):
154 self
.metadata_mgr
.update_global_section(MetadataManager
.GLOBAL_META_KEY_TYPE
, subvolume_type
.value
)
155 self
.metadata_mgr
.update_global_section(MetadataManager
.GLOBAL_META_KEY_PATH
, qpath
)
156 self
.metadata_mgr
.update_global_section(MetadataManager
.GLOBAL_META_KEY_STATE
, initial_state
.value
)
158 def create(self
, size
, isolate_nspace
, pool
, mode
, uid
, gid
):
159 subvolume_type
= SubvolumeTypes
.TYPE_NORMAL
161 initial_state
= SubvolumeOpSm
.get_init_state(subvolume_type
)
162 except OpSmException
as oe
:
163 raise VolumeException(-errno
.EINVAL
, "subvolume creation failed: internal error")
165 retained
= self
.retained
166 if retained
and self
.has_pending_purges
:
167 raise VolumeException(-errno
.EAGAIN
, "asynchronous purge of subvolume in progress")
168 subvol_path
= os
.path
.join(self
.base_path
, str(uuid
.uuid4()).encode('utf-8'))
170 # create group directory with default mode(0o755) if it doesn't exist.
171 create_base_dir(self
.fs
, self
.group
.path
, self
.vol_spec
.DEFAULT_MODE
)
172 self
.fs
.mkdirs(subvol_path
, mode
)
173 self
.mark_subvolume()
178 'pool_namespace': self
.namespace
if isolate_nspace
else None,
181 self
.set_attrs(subvol_path
, attrs
)
183 # persist subvolume metadata
184 qpath
= subvol_path
.decode('utf-8')
186 self
._set
_incarnation
_metadata
(subvolume_type
, qpath
, initial_state
)
187 self
.metadata_mgr
.flush()
189 self
.init_config(SubvolumeV2
.VERSION
, subvolume_type
, qpath
, initial_state
)
191 # Create the subvolume metadata file which manages auth-ids if it doesn't exist
192 self
.auth_mdata_mgr
.create_subvolume_metadata_file(self
.group
.groupname
, self
.subvolname
)
193 except (VolumeException
, MetadataMgrException
, cephfs
.Error
) as e
:
195 self
._remove
_on
_failure
(subvol_path
, retained
)
196 except VolumeException
as ve
:
197 log
.info("failed to cleanup subvolume '{0}' ({1})".format(self
.subvolname
, ve
))
199 if isinstance(e
, MetadataMgrException
):
200 log
.error("metadata manager exception: {0}".format(e
))
201 e
= VolumeException(-errno
.EINVAL
, "exception in subvolume metadata")
202 elif isinstance(e
, cephfs
.Error
):
203 e
= VolumeException(-e
.args
[0], e
.args
[1])
206 def create_clone(self
, pool
, source_volname
, source_subvolume
, snapname
):
207 subvolume_type
= SubvolumeTypes
.TYPE_CLONE
209 initial_state
= SubvolumeOpSm
.get_init_state(subvolume_type
)
210 except OpSmException
as oe
:
211 raise VolumeException(-errno
.EINVAL
, "clone failed: internal error")
213 retained
= self
.retained
214 if retained
and self
.has_pending_purges
:
215 raise VolumeException(-errno
.EAGAIN
, "asynchronous purge of subvolume in progress")
216 subvol_path
= os
.path
.join(self
.base_path
, str(uuid
.uuid4()).encode('utf-8'))
218 # source snapshot attrs are used to create clone subvolume
219 # attributes of subvolume's content though, are synced during the cloning process.
220 attrs
= source_subvolume
.get_attrs(source_subvolume
.snapshot_data_path(snapname
))
222 # override snapshot pool setting, if one is provided for the clone
224 attrs
["data_pool"] = pool
225 attrs
["pool_namespace"] = None
227 # create directory and set attributes
228 self
.fs
.mkdirs(subvol_path
, attrs
.get("mode"))
229 self
.mark_subvolume()
230 self
.set_attrs(subvol_path
, attrs
)
232 # persist subvolume metadata and clone source
233 qpath
= subvol_path
.decode('utf-8')
235 self
._set
_incarnation
_metadata
(subvolume_type
, qpath
, initial_state
)
237 self
.metadata_mgr
.init(SubvolumeV2
.VERSION
, subvolume_type
.value
, qpath
, initial_state
.value
)
238 self
.add_clone_source(source_volname
, source_subvolume
, snapname
)
239 self
.metadata_mgr
.flush()
240 except (VolumeException
, MetadataMgrException
, cephfs
.Error
) as e
:
242 self
._remove
_on
_failure
(subvol_path
, retained
)
243 except VolumeException
as ve
:
244 log
.info("failed to cleanup subvolume '{0}' ({1})".format(self
.subvolname
, ve
))
246 if isinstance(e
, MetadataMgrException
):
247 log
.error("metadata manager exception: {0}".format(e
))
248 e
= VolumeException(-errno
.EINVAL
, "exception in subvolume metadata")
249 elif isinstance(e
, cephfs
.Error
):
250 e
= VolumeException(-e
.args
[0], e
.args
[1])
253 def allowed_ops_by_type(self
, vol_type
):
254 if vol_type
== SubvolumeTypes
.TYPE_CLONE
:
255 return {op_type
for op_type
in SubvolumeOpType
}
257 if vol_type
== SubvolumeTypes
.TYPE_NORMAL
:
258 return {op_type
for op_type
in SubvolumeOpType
} - {SubvolumeOpType
.CLONE_STATUS
,
259 SubvolumeOpType
.CLONE_CANCEL
,
260 SubvolumeOpType
.CLONE_INTERNAL
}
264 def allowed_ops_by_state(self
, vol_state
):
265 if vol_state
== SubvolumeStates
.STATE_COMPLETE
:
266 return {op_type
for op_type
in SubvolumeOpType
}
268 if vol_state
== SubvolumeStates
.STATE_RETAINED
:
270 SubvolumeOpType
.REMOVE
,
271 SubvolumeOpType
.REMOVE_FORCE
,
272 SubvolumeOpType
.LIST
,
273 SubvolumeOpType
.INFO
,
274 SubvolumeOpType
.SNAP_REMOVE
,
275 SubvolumeOpType
.SNAP_LIST
,
276 SubvolumeOpType
.SNAP_INFO
,
277 SubvolumeOpType
.SNAP_PROTECT
,
278 SubvolumeOpType
.SNAP_UNPROTECT
,
279 SubvolumeOpType
.CLONE_SOURCE
282 return {SubvolumeOpType
.REMOVE_FORCE
,
283 SubvolumeOpType
.CLONE_CREATE
,
284 SubvolumeOpType
.CLONE_STATUS
,
285 SubvolumeOpType
.CLONE_CANCEL
,
286 SubvolumeOpType
.CLONE_INTERNAL
,
287 SubvolumeOpType
.CLONE_SOURCE
}
289 def open(self
, op_type
):
290 if not isinstance(op_type
, SubvolumeOpType
):
291 raise VolumeException(-errno
.ENOTSUP
, "operation {0} not supported on subvolume '{1}'".format(
292 op_type
.value
, self
.subvolname
))
294 self
.metadata_mgr
.refresh()
295 # unconditionally mark as subvolume, to handle pre-existing subvolumes without the mark
296 self
.mark_subvolume()
298 etype
= self
.subvol_type
299 if op_type
not in self
.allowed_ops_by_type(etype
):
300 raise VolumeException(-errno
.ENOTSUP
, "operation '{0}' is not allowed on subvolume '{1}' of type {2}".format(
301 op_type
.value
, self
.subvolname
, etype
.value
))
304 if op_type
not in self
.allowed_ops_by_state(estate
) and estate
== SubvolumeStates
.STATE_RETAINED
:
305 raise VolumeException(-errno
.ENOENT
, "subvolume '{0}' is removed and has only snapshots retained".format(
308 if op_type
not in self
.allowed_ops_by_state(estate
) and estate
!= SubvolumeStates
.STATE_RETAINED
:
309 raise VolumeException(-errno
.EAGAIN
, "subvolume '{0}' is not ready for operation {1}".format(
310 self
.subvolname
, op_type
.value
))
312 if estate
!= SubvolumeStates
.STATE_RETAINED
:
313 subvol_path
= self
.path
314 log
.debug("refreshed metadata, checking subvolume path '{0}'".format(subvol_path
))
315 st
= self
.fs
.stat(subvol_path
)
317 self
.uid
= int(st
.st_uid
)
318 self
.gid
= int(st
.st_gid
)
319 self
.mode
= int(st
.st_mode
& ~stat
.S_IFMT(st
.st_mode
))
320 except MetadataMgrException
as me
:
321 if me
.errno
== -errno
.ENOENT
:
322 raise VolumeException(-errno
.ENOENT
, "subvolume '{0}' does not exist".format(self
.subvolname
))
323 raise VolumeException(me
.args
[0], me
.args
[1])
324 except cephfs
.ObjectNotFound
:
325 log
.debug("missing subvolume path '{0}' for subvolume '{1}'".format(subvol_path
, self
.subvolname
))
326 raise VolumeException(-errno
.ENOENT
, "mount path missing for subvolume '{0}'".format(self
.subvolname
))
327 except cephfs
.Error
as e
:
328 raise VolumeException(-e
.args
[0], e
.args
[1])
330 def trash_incarnation_dir(self
):
331 """rename subvolume (uuid component) to trash"""
332 self
.create_trashcan()
334 bname
= os
.path
.basename(self
.path
)
335 tpath
= os
.path
.join(self
.trash_dir
, bname
)
336 log
.debug("trash: {0} -> {1}".format(self
.path
, tpath
))
337 self
.fs
.rename(self
.path
, tpath
)
338 self
._link
_dir
(tpath
, bname
)
339 except cephfs
.Error
as e
:
340 raise VolumeException(-e
.args
[0], e
.args
[1])
342 def remove(self
, retainsnaps
=False):
343 if self
.list_snapshots():
345 raise VolumeException(-errno
.ENOTEMPTY
, "subvolume '{0}' has snapshots".format(self
.subvolname
))
347 if not self
.has_pending_purges
:
348 self
.trash_base_dir()
349 # Delete the volume meta file, if it's not already deleted
350 self
.auth_mdata_mgr
.delete_subvolume_metadata_file(self
.group
.groupname
, self
.subvolname
)
352 if self
.state
!= SubvolumeStates
.STATE_RETAINED
:
353 self
.trash_incarnation_dir()
354 self
.metadata_mgr
.update_global_section(MetadataManager
.GLOBAL_META_KEY_PATH
, "")
355 self
.metadata_mgr
.update_global_section(MetadataManager
.GLOBAL_META_KEY_STATE
, SubvolumeStates
.STATE_RETAINED
.value
)
356 self
.metadata_mgr
.flush()
357 # Delete the volume meta file, if it's not already deleted
358 self
.auth_mdata_mgr
.delete_subvolume_metadata_file(self
.group
.groupname
, self
.subvolname
)
361 if self
.state
!= SubvolumeStates
.STATE_RETAINED
:
362 return super(SubvolumeV2
, self
).info()
364 return {'type': self
.subvol_type
.value
, 'features': self
.features
, 'state': SubvolumeStates
.STATE_RETAINED
.value
}
366 def remove_snapshot(self
, snapname
):
367 super(SubvolumeV2
, self
).remove_snapshot(snapname
)
369 self
.trash_base_dir()
370 # tickle the volume purge job to purge this entry, using ESTALE
371 raise VolumeException(-errno
.ESTALE
, "subvolume '{0}' has been removed as the last retained snapshot is removed".format(self
.subvolname
))
372 # if not purgeable, subvol is not retained, or has snapshots, or already has purge jobs that will garbage collect this subvol