]> git.proxmox.com Git - ceph.git/blob - ceph/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v2.py
import ceph quincy 17.2.1
[ceph.git] / ceph / src / pybind / mgr / volumes / fs / operations / versions / subvolume_v2.py
1 import os
2 import stat
3 import uuid
4 import errno
5 import logging
6
7 import cephfs
8
9 from .metadata_manager import MetadataManager
10 from .subvolume_attrs import SubvolumeTypes, SubvolumeStates, SubvolumeFeatures
11 from .op_sm import SubvolumeOpSm
12 from .subvolume_v1 import SubvolumeV1
13 from ..template import SubvolumeTemplate
14 from ...exception import OpSmException, VolumeException, MetadataMgrException
15 from ...fs_util import listdir, create_base_dir
16 from ..template import SubvolumeOpType
17
18 log = logging.getLogger(__name__)
19
20 class SubvolumeV2(SubvolumeV1):
21 """
22 Version 2 subvolumes creates a subvolume with path as follows,
23 volumes/<group-name>/<subvolume-name>/<uuid>/
24
25 The distinguishing feature of V2 subvolume as compared to V1 subvolumes is its ability to retain snapshots
26 of a subvolume on removal. This is done by creating snapshots under the <subvolume-name> directory,
27 rather than under the <uuid> directory, as is the case of V1 subvolumes.
28
29 - The directory under which user data resides is <uuid>
30 - Snapshots of the subvolume are taken within the <subvolume-name> directory
31 - A meta file is maintained under the <subvolume-name> directory as a metadata store, storing information similar
32 to V1 subvolumes
33 - On a request to remove subvolume but retain its snapshots, only the <uuid> directory is moved to trash, retaining
34 the rest of the subvolume and its meta file.
35 - The <uuid> directory, when present, is the current incarnation of the subvolume, which may have snapshots of
36 older incarnations of the same subvolume.
37 - V1 subvolumes that currently do not have any snapshots are upgraded to V2 subvolumes automatically, to support the
38 snapshot retention feature
39 """
40 VERSION = 2
41
42 @staticmethod
43 def version():
44 return SubvolumeV2.VERSION
45
46 @property
47 def features(self):
48 return [SubvolumeFeatures.FEATURE_SNAPSHOT_CLONE.value,
49 SubvolumeFeatures.FEATURE_SNAPSHOT_AUTOPROTECT.value,
50 SubvolumeFeatures.FEATURE_SNAPSHOT_RETENTION.value]
51
52 @property
53 def retained(self):
54 try:
55 self.metadata_mgr.refresh()
56 if self.state == SubvolumeStates.STATE_RETAINED:
57 return True
58 return False
59 except MetadataMgrException as me:
60 if me.errno != -errno.ENOENT:
61 raise VolumeException(me.errno, "internal error while processing subvolume '{0}'".format(self.subvolname))
62 return False
63
64 @property
65 def purgeable(self):
66 if not self.retained or self.list_snapshots() or self.has_pending_purges:
67 return False
68 return True
69
70 @property
71 def has_pending_purges(self):
72 try:
73 return not listdir(self.fs, self.trash_dir) == []
74 except VolumeException as ve:
75 if ve.errno == -errno.ENOENT:
76 return False
77 raise
78
79 @property
80 def trash_dir(self):
81 return os.path.join(self.base_path, b".trash")
82
83 def create_trashcan(self):
84 """per subvolume trash directory"""
85 try:
86 self.fs.stat(self.trash_dir)
87 except cephfs.Error as e:
88 if e.args[0] == errno.ENOENT:
89 try:
90 self.fs.mkdir(self.trash_dir, 0o700)
91 except cephfs.Error as ce:
92 raise VolumeException(-ce.args[0], ce.args[1])
93 else:
94 raise VolumeException(-e.args[0], e.args[1])
95
96 def mark_subvolume(self):
97 # set subvolume attr, on subvolume root, marking it as a CephFS subvolume
98 # subvolume root is where snapshots would be taken, and hence is the base_path for v2 subvolumes
99 try:
100 # MDS treats this as a noop for already marked subvolume
101 self.fs.setxattr(self.base_path, 'ceph.dir.subvolume', b'1', 0)
102 except cephfs.InvalidValue as e:
103 raise VolumeException(-errno.EINVAL, "invalid value specified for ceph.dir.subvolume")
104 except cephfs.Error as e:
105 raise VolumeException(-e.args[0], e.args[1])
106
107 @staticmethod
108 def is_valid_uuid(uuid_str):
109 try:
110 uuid.UUID(uuid_str)
111 return True
112 except ValueError:
113 return False
114
115 def snapshot_base_path(self):
116 return os.path.join(self.base_path, self.vol_spec.snapshot_dir_prefix.encode('utf-8'))
117
118 def snapshot_data_path(self, snapname):
119 snap_base_path = self.snapshot_path(snapname)
120 uuid_str = None
121 try:
122 with self.fs.opendir(snap_base_path) as dir_handle:
123 d = self.fs.readdir(dir_handle)
124 while d:
125 if d.d_name not in (b".", b".."):
126 d_full_path = os.path.join(snap_base_path, d.d_name)
127 stx = self.fs.statx(d_full_path, cephfs.CEPH_STATX_MODE, cephfs.AT_SYMLINK_NOFOLLOW)
128 if stat.S_ISDIR(stx.get('mode')):
129 if self.is_valid_uuid(d.d_name.decode('utf-8')):
130 uuid_str = d.d_name
131 d = self.fs.readdir(dir_handle)
132 except cephfs.Error as e:
133 if e.errno == errno.ENOENT:
134 raise VolumeException(-errno.ENOENT, "snapshot '{0}' does not exist".format(snapname))
135 raise VolumeException(-e.args[0], e.args[1])
136
137 if not uuid_str:
138 raise VolumeException(-errno.ENOENT, "snapshot '{0}' does not exist".format(snapname))
139
140 return os.path.join(snap_base_path, uuid_str)
141
142 def _remove_on_failure(self, subvol_path, retained):
143 if retained:
144 log.info("cleaning up subvolume incarnation with path: {0}".format(subvol_path))
145 try:
146 self.fs.rmdir(subvol_path)
147 except cephfs.Error as e:
148 raise VolumeException(-e.args[0], e.args[1])
149 else:
150 log.info("cleaning up subvolume with path: {0}".format(self.subvolname))
151 self.remove(internal_cleanup=True)
152
153 def _set_incarnation_metadata(self, subvolume_type, qpath, initial_state):
154 self.metadata_mgr.update_global_section(MetadataManager.GLOBAL_META_KEY_TYPE, subvolume_type.value)
155 self.metadata_mgr.update_global_section(MetadataManager.GLOBAL_META_KEY_PATH, qpath)
156 self.metadata_mgr.update_global_section(MetadataManager.GLOBAL_META_KEY_STATE, initial_state.value)
157
158 def create(self, size, isolate_nspace, pool, mode, uid, gid):
159 subvolume_type = SubvolumeTypes.TYPE_NORMAL
160 try:
161 initial_state = SubvolumeOpSm.get_init_state(subvolume_type)
162 except OpSmException as oe:
163 raise VolumeException(-errno.EINVAL, "subvolume creation failed: internal error")
164
165 retained = self.retained
166 if retained and self.has_pending_purges:
167 raise VolumeException(-errno.EAGAIN, "asynchronous purge of subvolume in progress")
168 subvol_path = os.path.join(self.base_path, str(uuid.uuid4()).encode('utf-8'))
169 try:
170 # create group directory with default mode(0o755) if it doesn't exist.
171 create_base_dir(self.fs, self.group.path, self.vol_spec.DEFAULT_MODE)
172 self.fs.mkdirs(subvol_path, mode)
173 self.mark_subvolume()
174 attrs = {
175 'uid': uid,
176 'gid': gid,
177 'data_pool': pool,
178 'pool_namespace': self.namespace if isolate_nspace else None,
179 'quota': size
180 }
181 self.set_attrs(subvol_path, attrs)
182
183 # persist subvolume metadata
184 qpath = subvol_path.decode('utf-8')
185 if retained:
186 self._set_incarnation_metadata(subvolume_type, qpath, initial_state)
187 self.metadata_mgr.flush()
188 else:
189 self.init_config(SubvolumeV2.VERSION, subvolume_type, qpath, initial_state)
190
191 # Create the subvolume metadata file which manages auth-ids if it doesn't exist
192 self.auth_mdata_mgr.create_subvolume_metadata_file(self.group.groupname, self.subvolname)
193 except (VolumeException, MetadataMgrException, cephfs.Error) as e:
194 try:
195 self._remove_on_failure(subvol_path, retained)
196 except VolumeException as ve:
197 log.info("failed to cleanup subvolume '{0}' ({1})".format(self.subvolname, ve))
198
199 if isinstance(e, MetadataMgrException):
200 log.error("metadata manager exception: {0}".format(e))
201 e = VolumeException(-errno.EINVAL, "exception in subvolume metadata")
202 elif isinstance(e, cephfs.Error):
203 e = VolumeException(-e.args[0], e.args[1])
204 raise e
205
206 def create_clone(self, pool, source_volname, source_subvolume, snapname):
207 subvolume_type = SubvolumeTypes.TYPE_CLONE
208 try:
209 initial_state = SubvolumeOpSm.get_init_state(subvolume_type)
210 except OpSmException as oe:
211 raise VolumeException(-errno.EINVAL, "clone failed: internal error")
212
213 retained = self.retained
214 if retained and self.has_pending_purges:
215 raise VolumeException(-errno.EAGAIN, "asynchronous purge of subvolume in progress")
216 subvol_path = os.path.join(self.base_path, str(uuid.uuid4()).encode('utf-8'))
217 try:
218 # source snapshot attrs are used to create clone subvolume
219 # attributes of subvolume's content though, are synced during the cloning process.
220 attrs = source_subvolume.get_attrs(source_subvolume.snapshot_data_path(snapname))
221
222 # The source of the clone may have exceeded its quota limit as
223 # CephFS quotas are imprecise. Cloning such a source may fail if
224 # the quota on the destination is set before starting the clone
225 # copy. So always set the quota on destination after cloning is
226 # successful.
227 attrs["quota"] = None
228
229 # override snapshot pool setting, if one is provided for the clone
230 if pool is not None:
231 attrs["data_pool"] = pool
232 attrs["pool_namespace"] = None
233
234 # create directory and set attributes
235 self.fs.mkdirs(subvol_path, attrs.get("mode"))
236 self.mark_subvolume()
237 self.set_attrs(subvol_path, attrs)
238
239 # persist subvolume metadata and clone source
240 qpath = subvol_path.decode('utf-8')
241 if retained:
242 self._set_incarnation_metadata(subvolume_type, qpath, initial_state)
243 else:
244 self.metadata_mgr.init(SubvolumeV2.VERSION, subvolume_type.value, qpath, initial_state.value)
245 self.add_clone_source(source_volname, source_subvolume, snapname)
246 self.metadata_mgr.flush()
247 except (VolumeException, MetadataMgrException, cephfs.Error) as e:
248 try:
249 self._remove_on_failure(subvol_path, retained)
250 except VolumeException as ve:
251 log.info("failed to cleanup subvolume '{0}' ({1})".format(self.subvolname, ve))
252
253 if isinstance(e, MetadataMgrException):
254 log.error("metadata manager exception: {0}".format(e))
255 e = VolumeException(-errno.EINVAL, "exception in subvolume metadata")
256 elif isinstance(e, cephfs.Error):
257 e = VolumeException(-e.args[0], e.args[1])
258 raise e
259
260 def allowed_ops_by_type(self, vol_type):
261 if vol_type == SubvolumeTypes.TYPE_CLONE:
262 return {op_type for op_type in SubvolumeOpType}
263
264 if vol_type == SubvolumeTypes.TYPE_NORMAL:
265 return {op_type for op_type in SubvolumeOpType} - {SubvolumeOpType.CLONE_STATUS,
266 SubvolumeOpType.CLONE_CANCEL,
267 SubvolumeOpType.CLONE_INTERNAL}
268
269 return {}
270
271 def allowed_ops_by_state(self, vol_state):
272 if vol_state == SubvolumeStates.STATE_COMPLETE:
273 return {op_type for op_type in SubvolumeOpType}
274
275 if vol_state == SubvolumeStates.STATE_RETAINED:
276 return {
277 SubvolumeOpType.REMOVE,
278 SubvolumeOpType.REMOVE_FORCE,
279 SubvolumeOpType.LIST,
280 SubvolumeOpType.INFO,
281 SubvolumeOpType.SNAP_REMOVE,
282 SubvolumeOpType.SNAP_LIST,
283 SubvolumeOpType.SNAP_INFO,
284 SubvolumeOpType.SNAP_PROTECT,
285 SubvolumeOpType.SNAP_UNPROTECT,
286 SubvolumeOpType.CLONE_SOURCE
287 }
288
289 return {SubvolumeOpType.REMOVE_FORCE,
290 SubvolumeOpType.CLONE_CREATE,
291 SubvolumeOpType.CLONE_STATUS,
292 SubvolumeOpType.CLONE_CANCEL,
293 SubvolumeOpType.CLONE_INTERNAL,
294 SubvolumeOpType.CLONE_SOURCE}
295
296 def open(self, op_type):
297 if not isinstance(op_type, SubvolumeOpType):
298 raise VolumeException(-errno.ENOTSUP, "operation {0} not supported on subvolume '{1}'".format(
299 op_type.value, self.subvolname))
300 try:
301 self.metadata_mgr.refresh()
302 # unconditionally mark as subvolume, to handle pre-existing subvolumes without the mark
303 self.mark_subvolume()
304
305 etype = self.subvol_type
306 if op_type not in self.allowed_ops_by_type(etype):
307 raise VolumeException(-errno.ENOTSUP, "operation '{0}' is not allowed on subvolume '{1}' of type {2}".format(
308 op_type.value, self.subvolname, etype.value))
309
310 estate = self.state
311 if op_type not in self.allowed_ops_by_state(estate) and estate == SubvolumeStates.STATE_RETAINED:
312 raise VolumeException(-errno.ENOENT, "subvolume '{0}' is removed and has only snapshots retained".format(
313 self.subvolname))
314
315 if op_type not in self.allowed_ops_by_state(estate) and estate != SubvolumeStates.STATE_RETAINED:
316 raise VolumeException(-errno.EAGAIN, "subvolume '{0}' is not ready for operation {1}".format(
317 self.subvolname, op_type.value))
318
319 if estate != SubvolumeStates.STATE_RETAINED:
320 subvol_path = self.path
321 log.debug("refreshed metadata, checking subvolume path '{0}'".format(subvol_path))
322 st = self.fs.stat(subvol_path)
323
324 self.uid = int(st.st_uid)
325 self.gid = int(st.st_gid)
326 self.mode = int(st.st_mode & ~stat.S_IFMT(st.st_mode))
327 except MetadataMgrException as me:
328 if me.errno == -errno.ENOENT:
329 raise VolumeException(-errno.ENOENT, "subvolume '{0}' does not exist".format(self.subvolname))
330 raise VolumeException(me.args[0], me.args[1])
331 except cephfs.ObjectNotFound:
332 log.debug("missing subvolume path '{0}' for subvolume '{1}'".format(subvol_path, self.subvolname))
333 raise VolumeException(-errno.ENOENT, "mount path missing for subvolume '{0}'".format(self.subvolname))
334 except cephfs.Error as e:
335 raise VolumeException(-e.args[0], e.args[1])
336
337 def trash_incarnation_dir(self):
338 """rename subvolume (uuid component) to trash"""
339 self.create_trashcan()
340 try:
341 bname = os.path.basename(self.path)
342 tpath = os.path.join(self.trash_dir, bname)
343 log.debug("trash: {0} -> {1}".format(self.path, tpath))
344 self.fs.rename(self.path, tpath)
345 self._link_dir(tpath, bname)
346 except cephfs.Error as e:
347 raise VolumeException(-e.args[0], e.args[1])
348
349 @staticmethod
350 def safe_to_remove_subvolume_clone(subvol_state):
351 # Both the STATE_FAILED and STATE_CANCELED are handled by 'handle_clone_failed' in the state
352 # machine which removes the entry from the index. Hence, it's safe to removed clone with
353 # force option for both.
354 acceptable_rm_clone_states = [SubvolumeStates.STATE_COMPLETE, SubvolumeStates.STATE_CANCELED,
355 SubvolumeStates.STATE_FAILED, SubvolumeStates.STATE_RETAINED]
356 if subvol_state not in acceptable_rm_clone_states:
357 return False
358 return True
359
360 def remove(self, retainsnaps=False, internal_cleanup=False):
361 if self.list_snapshots():
362 if not retainsnaps:
363 raise VolumeException(-errno.ENOTEMPTY, "subvolume '{0}' has snapshots".format(self.subvolname))
364 else:
365 if not internal_cleanup and not self.safe_to_remove_subvolume_clone(self.state):
366 raise VolumeException(-errno.EAGAIN,
367 "{0} clone in-progress -- please cancel the clone and retry".format(self.subvolname))
368 if not self.has_pending_purges:
369 self.trash_base_dir()
370 # Delete the volume meta file, if it's not already deleted
371 self.auth_mdata_mgr.delete_subvolume_metadata_file(self.group.groupname, self.subvolname)
372 return
373 if self.state != SubvolumeStates.STATE_RETAINED:
374 self.trash_incarnation_dir()
375 self.metadata_mgr.remove_section(MetadataManager.USER_METADATA_SECTION)
376 self.metadata_mgr.update_global_section(MetadataManager.GLOBAL_META_KEY_PATH, "")
377 self.metadata_mgr.update_global_section(MetadataManager.GLOBAL_META_KEY_STATE, SubvolumeStates.STATE_RETAINED.value)
378 self.metadata_mgr.flush()
379 # Delete the volume meta file, if it's not already deleted
380 self.auth_mdata_mgr.delete_subvolume_metadata_file(self.group.groupname, self.subvolname)
381
382 def info(self):
383 if self.state != SubvolumeStates.STATE_RETAINED:
384 return super(SubvolumeV2, self).info()
385
386 return {'type': self.subvol_type.value, 'features': self.features, 'state': SubvolumeStates.STATE_RETAINED.value}
387
388 def remove_snapshot(self, snapname):
389 super(SubvolumeV2, self).remove_snapshot(snapname)
390 if self.purgeable:
391 self.trash_base_dir()
392 # tickle the volume purge job to purge this entry, using ESTALE
393 raise VolumeException(-errno.ESTALE, "subvolume '{0}' has been removed as the last retained snapshot is removed".format(self.subvolname))
394 # if not purgeable, subvol is not retained, or has snapshots, or already has purge jobs that will garbage collect this subvol