]> git.proxmox.com Git - ceph.git/blame - ceph/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v2.py
import ceph 15.2.10
[ceph.git] / ceph / src / pybind / mgr / volumes / fs / operations / versions / subvolume_v2.py
CommitLineData
adb31ebb
TL
1import os
2import stat
3import uuid
4import errno
5import logging
6
7import cephfs
8
9from .metadata_manager import MetadataManager
10from .subvolume_attrs import SubvolumeTypes, SubvolumeStates, SubvolumeFeatures
11from .op_sm import SubvolumeOpSm
12from .subvolume_v1 import SubvolumeV1
13from ..template import SubvolumeTemplate
14from ...exception import OpSmException, VolumeException, MetadataMgrException
15from ...fs_util import listdir
16from ..template import SubvolumeOpType
17
18log = logging.getLogger(__name__)
19
20class SubvolumeV2(SubvolumeV1):
21 """
22 Version 2 subvolumes creates a subvolume with path as follows,
23 volumes/<group-name>/<subvolume-name>/<uuid>/
24
25 The distinguishing feature of V2 subvolume as compared to V1 subvolumes is its ability to retain snapshots
26 of a subvolume on removal. This is done by creating snapshots under the <subvolume-name> directory,
27 rather than under the <uuid> directory, as is the case of V1 subvolumes.
28
29 - The directory under which user data resides is <uuid>
30 - Snapshots of the subvolume are taken within the <subvolume-name> directory
31 - A meta file is maintained under the <subvolume-name> directory as a metadata store, storing information similar
32 to V1 subvolumes
33 - On a request to remove subvolume but retain its snapshots, only the <uuid> directory is moved to trash, retaining
34 the rest of the subvolume and its meta file.
35 - The <uuid> directory, when present, is the current incarnation of the subvolume, which may have snapshots of
36 older incarnations of the same subvolume.
37 - V1 subvolumes that currently do not have any snapshots are upgraded to V2 subvolumes automatically, to support the
38 snapshot retention feature
39 """
40 VERSION = 2
41
42 @staticmethod
43 def version():
44 return SubvolumeV2.VERSION
45
46 @property
47 def features(self):
48 return [SubvolumeFeatures.FEATURE_SNAPSHOT_CLONE.value,
49 SubvolumeFeatures.FEATURE_SNAPSHOT_AUTOPROTECT.value,
50 SubvolumeFeatures.FEATURE_SNAPSHOT_RETENTION.value]
51
52 @property
53 def retained(self):
54 try:
55 self.metadata_mgr.refresh()
56 if self.state == SubvolumeStates.STATE_RETAINED:
57 return True
58 return False
59 except MetadataMgrException as me:
60 if me.errno != -errno.ENOENT:
61 raise VolumeException(me.errno, "internal error while processing subvolume '{0}'".format(self.subvolname))
62 return False
63
64 @property
65 def purgeable(self):
66 if not self.retained or self.list_snapshots() or self.has_pending_purges:
67 return False
68 return True
69
70 @property
71 def has_pending_purges(self):
72 try:
73 return not listdir(self.fs, self.trash_dir) == []
74 except VolumeException as ve:
75 if ve.errno == -errno.ENOENT:
76 return False
77 raise
78
79 @property
80 def trash_dir(self):
81 return os.path.join(self.base_path, b".trash")
82
83 def create_trashcan(self):
84 """per subvolume trash directory"""
85 try:
86 self.fs.stat(self.trash_dir)
87 except cephfs.Error as e:
88 if e.args[0] == errno.ENOENT:
89 try:
90 self.fs.mkdir(self.trash_dir, 0o700)
91 except cephfs.Error as ce:
92 raise VolumeException(-ce.args[0], ce.args[1])
93 else:
94 raise VolumeException(-e.args[0], e.args[1])
95
96 def mark_subvolume(self):
97 # set subvolume attr, on subvolume root, marking it as a CephFS subvolume
98 # subvolume root is where snapshots would be taken, and hence is the base_path for v2 subvolumes
99 try:
100 # MDS treats this as a noop for already marked subvolume
101 self.fs.setxattr(self.base_path, 'ceph.dir.subvolume', b'1', 0)
102 except cephfs.InvalidValue as e:
103 raise VolumeException(-errno.EINVAL, "invalid value specified for ceph.dir.subvolume")
104 except cephfs.Error as e:
105 raise VolumeException(-e.args[0], e.args[1])
106
107 @staticmethod
108 def is_valid_uuid(uuid_str):
109 try:
110 uuid.UUID(uuid_str)
111 return True
112 except ValueError:
113 return False
114
115 def snapshot_base_path(self):
116 return os.path.join(self.base_path, self.vol_spec.snapshot_dir_prefix.encode('utf-8'))
117
118 def snapshot_data_path(self, snapname):
119 snap_base_path = self.snapshot_path(snapname)
120 uuid_str = None
121 try:
122 with self.fs.opendir(snap_base_path) as dir_handle:
123 d = self.fs.readdir(dir_handle)
124 while d:
125 if d.d_name not in (b".", b".."):
126 d_full_path = os.path.join(snap_base_path, d.d_name)
127 stx = self.fs.statx(d_full_path, cephfs.CEPH_STATX_MODE, cephfs.AT_SYMLINK_NOFOLLOW)
128 if stat.S_ISDIR(stx.get('mode')):
129 if self.is_valid_uuid(d.d_name.decode('utf-8')):
130 uuid_str = d.d_name
131 d = self.fs.readdir(dir_handle)
132 except cephfs.Error as e:
133 if e.errno == errno.ENOENT:
134 raise VolumeException(-errno.ENOENT, "snapshot '{0}' does not exist".format(snapname))
135 raise VolumeException(-e.args[0], e.args[1])
136
137 if not uuid_str:
138 raise VolumeException(-errno.ENOENT, "snapshot '{0}' does not exist".format(snapname))
139
140 return os.path.join(snap_base_path, uuid_str)
141
142 def _remove_on_failure(self, subvol_path, retained):
143 if retained:
144 log.info("cleaning up subvolume incarnation with path: {0}".format(subvol_path))
145 try:
146 self.fs.rmdir(subvol_path)
147 except cephfs.Error as e:
148 raise VolumeException(-e.args[0], e.args[1])
149 else:
150 log.info("cleaning up subvolume with path: {0}".format(self.subvolname))
151 self.remove()
152
153 def _set_incarnation_metadata(self, subvolume_type, qpath, initial_state):
154 self.metadata_mgr.update_global_section(MetadataManager.GLOBAL_META_KEY_TYPE, subvolume_type.value)
155 self.metadata_mgr.update_global_section(MetadataManager.GLOBAL_META_KEY_PATH, qpath)
156 self.metadata_mgr.update_global_section(MetadataManager.GLOBAL_META_KEY_STATE, initial_state.value)
157
158 def create(self, size, isolate_nspace, pool, mode, uid, gid):
159 subvolume_type = SubvolumeTypes.TYPE_NORMAL
160 try:
161 initial_state = SubvolumeOpSm.get_init_state(subvolume_type)
162 except OpSmException as oe:
163 raise VolumeException(-errno.EINVAL, "subvolume creation failed: internal error")
164
165 retained = self.retained
166 if retained and self.has_pending_purges:
167 raise VolumeException(-errno.EAGAIN, "asynchronous purge of subvolume in progress")
168 subvol_path = os.path.join(self.base_path, str(uuid.uuid4()).encode('utf-8'))
169 try:
170 self.fs.mkdirs(subvol_path, mode)
171 self.mark_subvolume()
172 attrs = {
173 'uid': uid,
174 'gid': gid,
175 'data_pool': pool,
176 'pool_namespace': self.namespace if isolate_nspace else None,
177 'quota': size
178 }
179 self.set_attrs(subvol_path, attrs)
180
181 # persist subvolume metadata
182 qpath = subvol_path.decode('utf-8')
183 if retained:
184 self._set_incarnation_metadata(subvolume_type, qpath, initial_state)
185 self.metadata_mgr.flush()
186 else:
187 self.init_config(SubvolumeV2.VERSION, subvolume_type, qpath, initial_state)
cd265ab1
TL
188
189 # Create the subvolume metadata file which manages auth-ids if it doesn't exist
190 self.auth_mdata_mgr.create_subvolume_metadata_file(self.group.groupname, self.subvolname)
adb31ebb
TL
191 except (VolumeException, MetadataMgrException, cephfs.Error) as e:
192 try:
193 self._remove_on_failure(subvol_path, retained)
194 except VolumeException as ve:
195 log.info("failed to cleanup subvolume '{0}' ({1})".format(self.subvolname, ve))
196
197 if isinstance(e, MetadataMgrException):
198 log.error("metadata manager exception: {0}".format(e))
199 e = VolumeException(-errno.EINVAL, "exception in subvolume metadata")
200 elif isinstance(e, cephfs.Error):
201 e = VolumeException(-e.args[0], e.args[1])
202 raise e
203
204 def create_clone(self, pool, source_volname, source_subvolume, snapname):
205 subvolume_type = SubvolumeTypes.TYPE_CLONE
206 try:
207 initial_state = SubvolumeOpSm.get_init_state(subvolume_type)
208 except OpSmException as oe:
209 raise VolumeException(-errno.EINVAL, "clone failed: internal error")
210
211 retained = self.retained
212 if retained and self.has_pending_purges:
213 raise VolumeException(-errno.EAGAIN, "asynchronous purge of subvolume in progress")
214 subvol_path = os.path.join(self.base_path, str(uuid.uuid4()).encode('utf-8'))
215 try:
216 # source snapshot attrs are used to create clone subvolume
217 # attributes of subvolume's content though, are synced during the cloning process.
218 attrs = source_subvolume.get_attrs(source_subvolume.snapshot_data_path(snapname))
219
220 # override snapshot pool setting, if one is provided for the clone
221 if pool is not None:
222 attrs["data_pool"] = pool
223 attrs["pool_namespace"] = None
224
225 # create directory and set attributes
226 self.fs.mkdirs(subvol_path, attrs.get("mode"))
227 self.mark_subvolume()
228 self.set_attrs(subvol_path, attrs)
229
230 # persist subvolume metadata and clone source
231 qpath = subvol_path.decode('utf-8')
232 if retained:
233 self._set_incarnation_metadata(subvolume_type, qpath, initial_state)
234 else:
235 self.metadata_mgr.init(SubvolumeV2.VERSION, subvolume_type.value, qpath, initial_state.value)
236 self.add_clone_source(source_volname, source_subvolume, snapname)
237 self.metadata_mgr.flush()
238 except (VolumeException, MetadataMgrException, cephfs.Error) as e:
239 try:
240 self._remove_on_failure(subvol_path, retained)
241 except VolumeException as ve:
242 log.info("failed to cleanup subvolume '{0}' ({1})".format(self.subvolname, ve))
243
244 if isinstance(e, MetadataMgrException):
245 log.error("metadata manager exception: {0}".format(e))
246 e = VolumeException(-errno.EINVAL, "exception in subvolume metadata")
247 elif isinstance(e, cephfs.Error):
248 e = VolumeException(-e.args[0], e.args[1])
249 raise e
250
251 def allowed_ops_by_type(self, vol_type):
252 if vol_type == SubvolumeTypes.TYPE_CLONE:
253 return {op_type for op_type in SubvolumeOpType}
254
255 if vol_type == SubvolumeTypes.TYPE_NORMAL:
256 return {op_type for op_type in SubvolumeOpType} - {SubvolumeOpType.CLONE_STATUS,
257 SubvolumeOpType.CLONE_CANCEL,
258 SubvolumeOpType.CLONE_INTERNAL}
259
260 return {}
261
262 def allowed_ops_by_state(self, vol_state):
263 if vol_state == SubvolumeStates.STATE_COMPLETE:
264 return {op_type for op_type in SubvolumeOpType}
265
266 if vol_state == SubvolumeStates.STATE_RETAINED:
267 return {
268 SubvolumeOpType.REMOVE,
269 SubvolumeOpType.REMOVE_FORCE,
270 SubvolumeOpType.LIST,
271 SubvolumeOpType.INFO,
272 SubvolumeOpType.SNAP_REMOVE,
273 SubvolumeOpType.SNAP_LIST,
274 SubvolumeOpType.SNAP_INFO,
275 SubvolumeOpType.SNAP_PROTECT,
276 SubvolumeOpType.SNAP_UNPROTECT,
277 SubvolumeOpType.CLONE_SOURCE
278 }
279
280 return {SubvolumeOpType.REMOVE_FORCE,
281 SubvolumeOpType.CLONE_CREATE,
282 SubvolumeOpType.CLONE_STATUS,
283 SubvolumeOpType.CLONE_CANCEL,
284 SubvolumeOpType.CLONE_INTERNAL,
285 SubvolumeOpType.CLONE_SOURCE}
286
287 def open(self, op_type):
288 if not isinstance(op_type, SubvolumeOpType):
289 raise VolumeException(-errno.ENOTSUP, "operation {0} not supported on subvolume '{1}'".format(
290 op_type.value, self.subvolname))
291 try:
292 self.metadata_mgr.refresh()
293 # unconditionally mark as subvolume, to handle pre-existing subvolumes without the mark
294 self.mark_subvolume()
295
296 etype = self.subvol_type
297 if op_type not in self.allowed_ops_by_type(etype):
298 raise VolumeException(-errno.ENOTSUP, "operation '{0}' is not allowed on subvolume '{1}' of type {2}".format(
299 op_type.value, self.subvolname, etype.value))
300
301 estate = self.state
302 if op_type not in self.allowed_ops_by_state(estate) and estate == SubvolumeStates.STATE_RETAINED:
303 raise VolumeException(-errno.ENOENT, "subvolume '{0}' is removed and has only snapshots retained".format(
304 self.subvolname))
305
306 if op_type not in self.allowed_ops_by_state(estate) and estate != SubvolumeStates.STATE_RETAINED:
307 raise VolumeException(-errno.EAGAIN, "subvolume '{0}' is not ready for operation {1}".format(
308 self.subvolname, op_type.value))
309
310 if estate != SubvolumeStates.STATE_RETAINED:
311 subvol_path = self.path
312 log.debug("refreshed metadata, checking subvolume path '{0}'".format(subvol_path))
313 st = self.fs.stat(subvol_path)
314
315 self.uid = int(st.st_uid)
316 self.gid = int(st.st_gid)
317 self.mode = int(st.st_mode & ~stat.S_IFMT(st.st_mode))
318 except MetadataMgrException as me:
319 if me.errno == -errno.ENOENT:
320 raise VolumeException(-errno.ENOENT, "subvolume '{0}' does not exist".format(self.subvolname))
321 raise VolumeException(me.args[0], me.args[1])
322 except cephfs.ObjectNotFound:
323 log.debug("missing subvolume path '{0}' for subvolume '{1}'".format(subvol_path, self.subvolname))
324 raise VolumeException(-errno.ENOENT, "mount path missing for subvolume '{0}'".format(self.subvolname))
325 except cephfs.Error as e:
326 raise VolumeException(-e.args[0], e.args[1])
327
328 def trash_incarnation_dir(self):
329 """rename subvolume (uuid component) to trash"""
330 self.create_trashcan()
331 try:
332 bname = os.path.basename(self.path)
333 tpath = os.path.join(self.trash_dir, bname)
334 log.debug("trash: {0} -> {1}".format(self.path, tpath))
335 self.fs.rename(self.path, tpath)
336 self._link_dir(tpath, bname)
337 except cephfs.Error as e:
338 raise VolumeException(-e.args[0], e.args[1])
339
340 def remove(self, retainsnaps=False):
341 if self.list_snapshots():
342 if not retainsnaps:
343 raise VolumeException(-errno.ENOTEMPTY, "subvolume '{0}' has snapshots".format(self.subvolname))
344 else:
345 if not self.has_pending_purges:
346 self.trash_base_dir()
cd265ab1
TL
347 # Delete the volume meta file, if it's not already deleted
348 self.auth_mdata_mgr.delete_subvolume_metadata_file(self.group.groupname, self.subvolname)
adb31ebb
TL
349 return
350 if self.state != SubvolumeStates.STATE_RETAINED:
351 self.trash_incarnation_dir()
352 self.metadata_mgr.update_global_section(MetadataManager.GLOBAL_META_KEY_PATH, "")
353 self.metadata_mgr.update_global_section(MetadataManager.GLOBAL_META_KEY_STATE, SubvolumeStates.STATE_RETAINED.value)
354 self.metadata_mgr.flush()
cd265ab1
TL
355 # Delete the volume meta file, if it's not already deleted
356 self.auth_mdata_mgr.delete_subvolume_metadata_file(self.group.groupname, self.subvolname)
adb31ebb
TL
357
358 def info(self):
359 if self.state != SubvolumeStates.STATE_RETAINED:
360 return super(SubvolumeV2, self).info()
361
362 return {'type': self.subvol_type.value, 'features': self.features, 'state': SubvolumeStates.STATE_RETAINED.value}
363
364 def remove_snapshot(self, snapname):
365 super(SubvolumeV2, self).remove_snapshot(snapname)
366 if self.purgeable:
367 self.trash_base_dir()
368 # tickle the volume purge job to purge this entry, using ESTALE
369 raise VolumeException(-errno.ESTALE, "subvolume '{0}' has been removed as the last retained snapshot is removed".format(self.subvolname))
370 # if not purgeable, subvol is not retained, or has snapshots, or already has purge jobs that will garbage collect this subvol