9 from .metadata_manager
import MetadataManager
10 from .subvolume_attrs
import SubvolumeTypes
, SubvolumeStates
, SubvolumeFeatures
11 from .op_sm
import SubvolumeOpSm
12 from .subvolume_v1
import SubvolumeV1
13 from ..template
import SubvolumeTemplate
14 from ...exception
import OpSmException
, VolumeException
, MetadataMgrException
15 from ...fs_util
import listdir
16 from ..template
import SubvolumeOpType
18 log
= logging
.getLogger(__name__
)
20 class SubvolumeV2(SubvolumeV1
):
22 Version 2 subvolumes creates a subvolume with path as follows,
23 volumes/<group-name>/<subvolume-name>/<uuid>/
25 The distinguishing feature of V2 subvolume as compared to V1 subvolumes is its ability to retain snapshots
26 of a subvolume on removal. This is done by creating snapshots under the <subvolume-name> directory,
27 rather than under the <uuid> directory, as is the case of V1 subvolumes.
29 - The directory under which user data resides is <uuid>
30 - Snapshots of the subvolume are taken within the <subvolume-name> directory
31 - A meta file is maintained under the <subvolume-name> directory as a metadata store, storing information similar
33 - On a request to remove subvolume but retain its snapshots, only the <uuid> directory is moved to trash, retaining
34 the rest of the subvolume and its meta file.
35 - The <uuid> directory, when present, is the current incarnation of the subvolume, which may have snapshots of
36 older incarnations of the same subvolume.
37 - V1 subvolumes that currently do not have any snapshots are upgraded to V2 subvolumes automatically, to support the
38 snapshot retention feature
44 return SubvolumeV2
.VERSION
48 return [SubvolumeFeatures
.FEATURE_SNAPSHOT_CLONE
.value
,
49 SubvolumeFeatures
.FEATURE_SNAPSHOT_AUTOPROTECT
.value
,
50 SubvolumeFeatures
.FEATURE_SNAPSHOT_RETENTION
.value
]
55 self
.metadata_mgr
.refresh()
56 if self
.state
== SubvolumeStates
.STATE_RETAINED
:
59 except MetadataMgrException
as me
:
60 if me
.errno
!= -errno
.ENOENT
:
61 raise VolumeException(me
.errno
, "internal error while processing subvolume '{0}'".format(self
.subvolname
))
66 if not self
.retained
or self
.list_snapshots() or self
.has_pending_purges
:
71 def has_pending_purges(self
):
73 return not listdir(self
.fs
, self
.trash_dir
) == []
74 except VolumeException
as ve
:
75 if ve
.errno
== -errno
.ENOENT
:
81 return os
.path
.join(self
.base_path
, b
".trash")
83 def create_trashcan(self
):
84 """per subvolume trash directory"""
86 self
.fs
.stat(self
.trash_dir
)
87 except cephfs
.Error
as e
:
88 if e
.args
[0] == errno
.ENOENT
:
90 self
.fs
.mkdir(self
.trash_dir
, 0o700)
91 except cephfs
.Error
as ce
:
92 raise VolumeException(-ce
.args
[0], ce
.args
[1])
94 raise VolumeException(-e
.args
[0], e
.args
[1])
96 def mark_subvolume(self
):
97 # set subvolume attr, on subvolume root, marking it as a CephFS subvolume
98 # subvolume root is where snapshots would be taken, and hence is the base_path for v2 subvolumes
100 # MDS treats this as a noop for already marked subvolume
101 self
.fs
.setxattr(self
.base_path
, 'ceph.dir.subvolume', b
'1', 0)
102 except cephfs
.InvalidValue
as e
:
103 raise VolumeException(-errno
.EINVAL
, "invalid value specified for ceph.dir.subvolume")
104 except cephfs
.Error
as e
:
105 raise VolumeException(-e
.args
[0], e
.args
[1])
108 def is_valid_uuid(uuid_str
):
115 def snapshot_base_path(self
):
116 return os
.path
.join(self
.base_path
, self
.vol_spec
.snapshot_dir_prefix
.encode('utf-8'))
118 def snapshot_data_path(self
, snapname
):
119 snap_base_path
= self
.snapshot_path(snapname
)
122 with self
.fs
.opendir(snap_base_path
) as dir_handle
:
123 d
= self
.fs
.readdir(dir_handle
)
125 if d
.d_name
not in (b
".", b
".."):
126 d_full_path
= os
.path
.join(snap_base_path
, d
.d_name
)
127 stx
= self
.fs
.statx(d_full_path
, cephfs
.CEPH_STATX_MODE
, cephfs
.AT_SYMLINK_NOFOLLOW
)
128 if stat
.S_ISDIR(stx
.get('mode')):
129 if self
.is_valid_uuid(d
.d_name
.decode('utf-8')):
131 d
= self
.fs
.readdir(dir_handle
)
132 except cephfs
.Error
as e
:
133 if e
.errno
== errno
.ENOENT
:
134 raise VolumeException(-errno
.ENOENT
, "snapshot '{0}' does not exist".format(snapname
))
135 raise VolumeException(-e
.args
[0], e
.args
[1])
138 raise VolumeException(-errno
.ENOENT
, "snapshot '{0}' does not exist".format(snapname
))
140 return os
.path
.join(snap_base_path
, uuid_str
)
142 def _remove_on_failure(self
, subvol_path
, retained
):
144 log
.info("cleaning up subvolume incarnation with path: {0}".format(subvol_path
))
146 self
.fs
.rmdir(subvol_path
)
147 except cephfs
.Error
as e
:
148 raise VolumeException(-e
.args
[0], e
.args
[1])
150 log
.info("cleaning up subvolume with path: {0}".format(self
.subvolname
))
153 def _set_incarnation_metadata(self
, subvolume_type
, qpath
, initial_state
):
154 self
.metadata_mgr
.update_global_section(MetadataManager
.GLOBAL_META_KEY_TYPE
, subvolume_type
.value
)
155 self
.metadata_mgr
.update_global_section(MetadataManager
.GLOBAL_META_KEY_PATH
, qpath
)
156 self
.metadata_mgr
.update_global_section(MetadataManager
.GLOBAL_META_KEY_STATE
, initial_state
.value
)
158 def create(self
, size
, isolate_nspace
, pool
, mode
, uid
, gid
):
159 subvolume_type
= SubvolumeTypes
.TYPE_NORMAL
161 initial_state
= SubvolumeOpSm
.get_init_state(subvolume_type
)
162 except OpSmException
as oe
:
163 raise VolumeException(-errno
.EINVAL
, "subvolume creation failed: internal error")
165 retained
= self
.retained
166 if retained
and self
.has_pending_purges
:
167 raise VolumeException(-errno
.EAGAIN
, "asynchronous purge of subvolume in progress")
168 subvol_path
= os
.path
.join(self
.base_path
, str(uuid
.uuid4()).encode('utf-8'))
170 self
.fs
.mkdirs(subvol_path
, mode
)
171 self
.mark_subvolume()
176 'pool_namespace': self
.namespace
if isolate_nspace
else None,
179 self
.set_attrs(subvol_path
, attrs
)
181 # persist subvolume metadata
182 qpath
= subvol_path
.decode('utf-8')
184 self
._set
_incarnation
_metadata
(subvolume_type
, qpath
, initial_state
)
185 self
.metadata_mgr
.flush()
187 self
.init_config(SubvolumeV2
.VERSION
, subvolume_type
, qpath
, initial_state
)
188 except (VolumeException
, MetadataMgrException
, cephfs
.Error
) as e
:
190 self
._remove
_on
_failure
(subvol_path
, retained
)
191 except VolumeException
as ve
:
192 log
.info("failed to cleanup subvolume '{0}' ({1})".format(self
.subvolname
, ve
))
194 if isinstance(e
, MetadataMgrException
):
195 log
.error("metadata manager exception: {0}".format(e
))
196 e
= VolumeException(-errno
.EINVAL
, "exception in subvolume metadata")
197 elif isinstance(e
, cephfs
.Error
):
198 e
= VolumeException(-e
.args
[0], e
.args
[1])
201 def create_clone(self
, pool
, source_volname
, source_subvolume
, snapname
):
202 subvolume_type
= SubvolumeTypes
.TYPE_CLONE
204 initial_state
= SubvolumeOpSm
.get_init_state(subvolume_type
)
205 except OpSmException
as oe
:
206 raise VolumeException(-errno
.EINVAL
, "clone failed: internal error")
208 retained
= self
.retained
209 if retained
and self
.has_pending_purges
:
210 raise VolumeException(-errno
.EAGAIN
, "asynchronous purge of subvolume in progress")
211 subvol_path
= os
.path
.join(self
.base_path
, str(uuid
.uuid4()).encode('utf-8'))
213 # source snapshot attrs are used to create clone subvolume
214 # attributes of subvolume's content though, are synced during the cloning process.
215 attrs
= source_subvolume
.get_attrs(source_subvolume
.snapshot_data_path(snapname
))
217 # override snapshot pool setting, if one is provided for the clone
219 attrs
["data_pool"] = pool
220 attrs
["pool_namespace"] = None
222 # create directory and set attributes
223 self
.fs
.mkdirs(subvol_path
, attrs
.get("mode"))
224 self
.mark_subvolume()
225 self
.set_attrs(subvol_path
, attrs
)
227 # persist subvolume metadata and clone source
228 qpath
= subvol_path
.decode('utf-8')
230 self
._set
_incarnation
_metadata
(subvolume_type
, qpath
, initial_state
)
232 self
.metadata_mgr
.init(SubvolumeV2
.VERSION
, subvolume_type
.value
, qpath
, initial_state
.value
)
233 self
.add_clone_source(source_volname
, source_subvolume
, snapname
)
234 self
.metadata_mgr
.flush()
235 except (VolumeException
, MetadataMgrException
, cephfs
.Error
) as e
:
237 self
._remove
_on
_failure
(subvol_path
, retained
)
238 except VolumeException
as ve
:
239 log
.info("failed to cleanup subvolume '{0}' ({1})".format(self
.subvolname
, ve
))
241 if isinstance(e
, MetadataMgrException
):
242 log
.error("metadata manager exception: {0}".format(e
))
243 e
= VolumeException(-errno
.EINVAL
, "exception in subvolume metadata")
244 elif isinstance(e
, cephfs
.Error
):
245 e
= VolumeException(-e
.args
[0], e
.args
[1])
248 def allowed_ops_by_type(self
, vol_type
):
249 if vol_type
== SubvolumeTypes
.TYPE_CLONE
:
250 return {op_type
for op_type
in SubvolumeOpType
}
252 if vol_type
== SubvolumeTypes
.TYPE_NORMAL
:
253 return {op_type
for op_type
in SubvolumeOpType
} - {SubvolumeOpType
.CLONE_STATUS
,
254 SubvolumeOpType
.CLONE_CANCEL
,
255 SubvolumeOpType
.CLONE_INTERNAL
}
259 def allowed_ops_by_state(self
, vol_state
):
260 if vol_state
== SubvolumeStates
.STATE_COMPLETE
:
261 return {op_type
for op_type
in SubvolumeOpType
}
263 if vol_state
== SubvolumeStates
.STATE_RETAINED
:
265 SubvolumeOpType
.REMOVE
,
266 SubvolumeOpType
.REMOVE_FORCE
,
267 SubvolumeOpType
.LIST
,
268 SubvolumeOpType
.INFO
,
269 SubvolumeOpType
.SNAP_REMOVE
,
270 SubvolumeOpType
.SNAP_LIST
,
271 SubvolumeOpType
.SNAP_INFO
,
272 SubvolumeOpType
.SNAP_PROTECT
,
273 SubvolumeOpType
.SNAP_UNPROTECT
,
274 SubvolumeOpType
.CLONE_SOURCE
277 return {SubvolumeOpType
.REMOVE_FORCE
,
278 SubvolumeOpType
.CLONE_CREATE
,
279 SubvolumeOpType
.CLONE_STATUS
,
280 SubvolumeOpType
.CLONE_CANCEL
,
281 SubvolumeOpType
.CLONE_INTERNAL
,
282 SubvolumeOpType
.CLONE_SOURCE
}
284 def open(self
, op_type
):
285 if not isinstance(op_type
, SubvolumeOpType
):
286 raise VolumeException(-errno
.ENOTSUP
, "operation {0} not supported on subvolume '{1}'".format(
287 op_type
.value
, self
.subvolname
))
289 self
.metadata_mgr
.refresh()
290 # unconditionally mark as subvolume, to handle pre-existing subvolumes without the mark
291 self
.mark_subvolume()
293 etype
= self
.subvol_type
294 if op_type
not in self
.allowed_ops_by_type(etype
):
295 raise VolumeException(-errno
.ENOTSUP
, "operation '{0}' is not allowed on subvolume '{1}' of type {2}".format(
296 op_type
.value
, self
.subvolname
, etype
.value
))
299 if op_type
not in self
.allowed_ops_by_state(estate
) and estate
== SubvolumeStates
.STATE_RETAINED
:
300 raise VolumeException(-errno
.ENOENT
, "subvolume '{0}' is removed and has only snapshots retained".format(
303 if op_type
not in self
.allowed_ops_by_state(estate
) and estate
!= SubvolumeStates
.STATE_RETAINED
:
304 raise VolumeException(-errno
.EAGAIN
, "subvolume '{0}' is not ready for operation {1}".format(
305 self
.subvolname
, op_type
.value
))
307 if estate
!= SubvolumeStates
.STATE_RETAINED
:
308 subvol_path
= self
.path
309 log
.debug("refreshed metadata, checking subvolume path '{0}'".format(subvol_path
))
310 st
= self
.fs
.stat(subvol_path
)
312 self
.uid
= int(st
.st_uid
)
313 self
.gid
= int(st
.st_gid
)
314 self
.mode
= int(st
.st_mode
& ~stat
.S_IFMT(st
.st_mode
))
315 except MetadataMgrException
as me
:
316 if me
.errno
== -errno
.ENOENT
:
317 raise VolumeException(-errno
.ENOENT
, "subvolume '{0}' does not exist".format(self
.subvolname
))
318 raise VolumeException(me
.args
[0], me
.args
[1])
319 except cephfs
.ObjectNotFound
:
320 log
.debug("missing subvolume path '{0}' for subvolume '{1}'".format(subvol_path
, self
.subvolname
))
321 raise VolumeException(-errno
.ENOENT
, "mount path missing for subvolume '{0}'".format(self
.subvolname
))
322 except cephfs
.Error
as e
:
323 raise VolumeException(-e
.args
[0], e
.args
[1])
325 def trash_incarnation_dir(self
):
326 """rename subvolume (uuid component) to trash"""
327 self
.create_trashcan()
329 bname
= os
.path
.basename(self
.path
)
330 tpath
= os
.path
.join(self
.trash_dir
, bname
)
331 log
.debug("trash: {0} -> {1}".format(self
.path
, tpath
))
332 self
.fs
.rename(self
.path
, tpath
)
333 self
._link
_dir
(tpath
, bname
)
334 except cephfs
.Error
as e
:
335 raise VolumeException(-e
.args
[0], e
.args
[1])
337 def remove(self
, retainsnaps
=False):
338 if self
.list_snapshots():
340 raise VolumeException(-errno
.ENOTEMPTY
, "subvolume '{0}' has snapshots".format(self
.subvolname
))
342 if not self
.has_pending_purges
:
343 self
.trash_base_dir()
345 if self
.state
!= SubvolumeStates
.STATE_RETAINED
:
346 self
.trash_incarnation_dir()
347 self
.metadata_mgr
.update_global_section(MetadataManager
.GLOBAL_META_KEY_PATH
, "")
348 self
.metadata_mgr
.update_global_section(MetadataManager
.GLOBAL_META_KEY_STATE
, SubvolumeStates
.STATE_RETAINED
.value
)
349 self
.metadata_mgr
.flush()
352 if self
.state
!= SubvolumeStates
.STATE_RETAINED
:
353 return super(SubvolumeV2
, self
).info()
355 return {'type': self
.subvol_type
.value
, 'features': self
.features
, 'state': SubvolumeStates
.STATE_RETAINED
.value
}
357 def remove_snapshot(self
, snapname
):
358 super(SubvolumeV2
, self
).remove_snapshot(snapname
)
360 self
.trash_base_dir()
361 # tickle the volume purge job to purge this entry, using ESTALE
362 raise VolumeException(-errno
.ESTALE
, "subvolume '{0}' has been removed as the last retained snapshot is removed".format(self
.subvolname
))
363 # if not purgeable, subvol is not retained, or has snapshots, or already has purge jobs that will garbage collect this subvol