]> git.proxmox.com Git - ceph.git/blob - ceph/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v2.py
import ceph 16.2.7
[ceph.git] / ceph / src / pybind / mgr / volumes / fs / operations / versions / subvolume_v2.py
1 import os
2 import stat
3 import uuid
4 import errno
5 import logging
6
7 import cephfs
8
9 from .metadata_manager import MetadataManager
10 from .subvolume_attrs import SubvolumeTypes, SubvolumeStates, SubvolumeFeatures
11 from .op_sm import SubvolumeOpSm
12 from .subvolume_v1 import SubvolumeV1
13 from ..template import SubvolumeTemplate
14 from ...exception import OpSmException, VolumeException, MetadataMgrException
15 from ...fs_util import listdir, create_base_dir
16 from ..template import SubvolumeOpType
17
18 log = logging.getLogger(__name__)
19
20 class SubvolumeV2(SubvolumeV1):
21 """
22 Version 2 subvolumes creates a subvolume with path as follows,
23 volumes/<group-name>/<subvolume-name>/<uuid>/
24
25 The distinguishing feature of V2 subvolume as compared to V1 subvolumes is its ability to retain snapshots
26 of a subvolume on removal. This is done by creating snapshots under the <subvolume-name> directory,
27 rather than under the <uuid> directory, as is the case of V1 subvolumes.
28
29 - The directory under which user data resides is <uuid>
30 - Snapshots of the subvolume are taken within the <subvolume-name> directory
31 - A meta file is maintained under the <subvolume-name> directory as a metadata store, storing information similar
32 to V1 subvolumes
33 - On a request to remove subvolume but retain its snapshots, only the <uuid> directory is moved to trash, retaining
34 the rest of the subvolume and its meta file.
35 - The <uuid> directory, when present, is the current incarnation of the subvolume, which may have snapshots of
36 older incarnations of the same subvolume.
37 - V1 subvolumes that currently do not have any snapshots are upgraded to V2 subvolumes automatically, to support the
38 snapshot retention feature
39 """
40 VERSION = 2
41
42 @staticmethod
43 def version():
44 return SubvolumeV2.VERSION
45
46 @property
47 def features(self):
48 return [SubvolumeFeatures.FEATURE_SNAPSHOT_CLONE.value,
49 SubvolumeFeatures.FEATURE_SNAPSHOT_AUTOPROTECT.value,
50 SubvolumeFeatures.FEATURE_SNAPSHOT_RETENTION.value]
51
52 @property
53 def retained(self):
54 try:
55 self.metadata_mgr.refresh()
56 if self.state == SubvolumeStates.STATE_RETAINED:
57 return True
58 return False
59 except MetadataMgrException as me:
60 if me.errno != -errno.ENOENT:
61 raise VolumeException(me.errno, "internal error while processing subvolume '{0}'".format(self.subvolname))
62 return False
63
64 @property
65 def purgeable(self):
66 if not self.retained or self.list_snapshots() or self.has_pending_purges:
67 return False
68 return True
69
70 @property
71 def has_pending_purges(self):
72 try:
73 return not listdir(self.fs, self.trash_dir) == []
74 except VolumeException as ve:
75 if ve.errno == -errno.ENOENT:
76 return False
77 raise
78
79 @property
80 def trash_dir(self):
81 return os.path.join(self.base_path, b".trash")
82
83 def create_trashcan(self):
84 """per subvolume trash directory"""
85 try:
86 self.fs.stat(self.trash_dir)
87 except cephfs.Error as e:
88 if e.args[0] == errno.ENOENT:
89 try:
90 self.fs.mkdir(self.trash_dir, 0o700)
91 except cephfs.Error as ce:
92 raise VolumeException(-ce.args[0], ce.args[1])
93 else:
94 raise VolumeException(-e.args[0], e.args[1])
95
96 def mark_subvolume(self):
97 # set subvolume attr, on subvolume root, marking it as a CephFS subvolume
98 # subvolume root is where snapshots would be taken, and hence is the base_path for v2 subvolumes
99 try:
100 # MDS treats this as a noop for already marked subvolume
101 self.fs.setxattr(self.base_path, 'ceph.dir.subvolume', b'1', 0)
102 except cephfs.InvalidValue as e:
103 raise VolumeException(-errno.EINVAL, "invalid value specified for ceph.dir.subvolume")
104 except cephfs.Error as e:
105 raise VolumeException(-e.args[0], e.args[1])
106
107 @staticmethod
108 def is_valid_uuid(uuid_str):
109 try:
110 uuid.UUID(uuid_str)
111 return True
112 except ValueError:
113 return False
114
115 def snapshot_base_path(self):
116 return os.path.join(self.base_path, self.vol_spec.snapshot_dir_prefix.encode('utf-8'))
117
118 def snapshot_data_path(self, snapname):
119 snap_base_path = self.snapshot_path(snapname)
120 uuid_str = None
121 try:
122 with self.fs.opendir(snap_base_path) as dir_handle:
123 d = self.fs.readdir(dir_handle)
124 while d:
125 if d.d_name not in (b".", b".."):
126 d_full_path = os.path.join(snap_base_path, d.d_name)
127 stx = self.fs.statx(d_full_path, cephfs.CEPH_STATX_MODE, cephfs.AT_SYMLINK_NOFOLLOW)
128 if stat.S_ISDIR(stx.get('mode')):
129 if self.is_valid_uuid(d.d_name.decode('utf-8')):
130 uuid_str = d.d_name
131 d = self.fs.readdir(dir_handle)
132 except cephfs.Error as e:
133 if e.errno == errno.ENOENT:
134 raise VolumeException(-errno.ENOENT, "snapshot '{0}' does not exist".format(snapname))
135 raise VolumeException(-e.args[0], e.args[1])
136
137 if not uuid_str:
138 raise VolumeException(-errno.ENOENT, "snapshot '{0}' does not exist".format(snapname))
139
140 return os.path.join(snap_base_path, uuid_str)
141
142 def _remove_on_failure(self, subvol_path, retained):
143 if retained:
144 log.info("cleaning up subvolume incarnation with path: {0}".format(subvol_path))
145 try:
146 self.fs.rmdir(subvol_path)
147 except cephfs.Error as e:
148 raise VolumeException(-e.args[0], e.args[1])
149 else:
150 log.info("cleaning up subvolume with path: {0}".format(self.subvolname))
151 self.remove()
152
153 def _set_incarnation_metadata(self, subvolume_type, qpath, initial_state):
154 self.metadata_mgr.update_global_section(MetadataManager.GLOBAL_META_KEY_TYPE, subvolume_type.value)
155 self.metadata_mgr.update_global_section(MetadataManager.GLOBAL_META_KEY_PATH, qpath)
156 self.metadata_mgr.update_global_section(MetadataManager.GLOBAL_META_KEY_STATE, initial_state.value)
157
158 def create(self, size, isolate_nspace, pool, mode, uid, gid):
159 subvolume_type = SubvolumeTypes.TYPE_NORMAL
160 try:
161 initial_state = SubvolumeOpSm.get_init_state(subvolume_type)
162 except OpSmException as oe:
163 raise VolumeException(-errno.EINVAL, "subvolume creation failed: internal error")
164
165 retained = self.retained
166 if retained and self.has_pending_purges:
167 raise VolumeException(-errno.EAGAIN, "asynchronous purge of subvolume in progress")
168 subvol_path = os.path.join(self.base_path, str(uuid.uuid4()).encode('utf-8'))
169 try:
170 # create group directory with default mode(0o755) if it doesn't exist.
171 create_base_dir(self.fs, self.group.path, self.vol_spec.DEFAULT_MODE)
172 self.fs.mkdirs(subvol_path, mode)
173 self.mark_subvolume()
174 attrs = {
175 'uid': uid,
176 'gid': gid,
177 'data_pool': pool,
178 'pool_namespace': self.namespace if isolate_nspace else None,
179 'quota': size
180 }
181 self.set_attrs(subvol_path, attrs)
182
183 # persist subvolume metadata
184 qpath = subvol_path.decode('utf-8')
185 if retained:
186 self._set_incarnation_metadata(subvolume_type, qpath, initial_state)
187 self.metadata_mgr.flush()
188 else:
189 self.init_config(SubvolumeV2.VERSION, subvolume_type, qpath, initial_state)
190
191 # Create the subvolume metadata file which manages auth-ids if it doesn't exist
192 self.auth_mdata_mgr.create_subvolume_metadata_file(self.group.groupname, self.subvolname)
193 except (VolumeException, MetadataMgrException, cephfs.Error) as e:
194 try:
195 self._remove_on_failure(subvol_path, retained)
196 except VolumeException as ve:
197 log.info("failed to cleanup subvolume '{0}' ({1})".format(self.subvolname, ve))
198
199 if isinstance(e, MetadataMgrException):
200 log.error("metadata manager exception: {0}".format(e))
201 e = VolumeException(-errno.EINVAL, "exception in subvolume metadata")
202 elif isinstance(e, cephfs.Error):
203 e = VolumeException(-e.args[0], e.args[1])
204 raise e
205
206 def create_clone(self, pool, source_volname, source_subvolume, snapname):
207 subvolume_type = SubvolumeTypes.TYPE_CLONE
208 try:
209 initial_state = SubvolumeOpSm.get_init_state(subvolume_type)
210 except OpSmException as oe:
211 raise VolumeException(-errno.EINVAL, "clone failed: internal error")
212
213 retained = self.retained
214 if retained and self.has_pending_purges:
215 raise VolumeException(-errno.EAGAIN, "asynchronous purge of subvolume in progress")
216 subvol_path = os.path.join(self.base_path, str(uuid.uuid4()).encode('utf-8'))
217 try:
218 # source snapshot attrs are used to create clone subvolume
219 # attributes of subvolume's content though, are synced during the cloning process.
220 attrs = source_subvolume.get_attrs(source_subvolume.snapshot_data_path(snapname))
221
222 # override snapshot pool setting, if one is provided for the clone
223 if pool is not None:
224 attrs["data_pool"] = pool
225 attrs["pool_namespace"] = None
226
227 # create directory and set attributes
228 self.fs.mkdirs(subvol_path, attrs.get("mode"))
229 self.mark_subvolume()
230 self.set_attrs(subvol_path, attrs)
231
232 # persist subvolume metadata and clone source
233 qpath = subvol_path.decode('utf-8')
234 if retained:
235 self._set_incarnation_metadata(subvolume_type, qpath, initial_state)
236 else:
237 self.metadata_mgr.init(SubvolumeV2.VERSION, subvolume_type.value, qpath, initial_state.value)
238 self.add_clone_source(source_volname, source_subvolume, snapname)
239 self.metadata_mgr.flush()
240 except (VolumeException, MetadataMgrException, cephfs.Error) as e:
241 try:
242 self._remove_on_failure(subvol_path, retained)
243 except VolumeException as ve:
244 log.info("failed to cleanup subvolume '{0}' ({1})".format(self.subvolname, ve))
245
246 if isinstance(e, MetadataMgrException):
247 log.error("metadata manager exception: {0}".format(e))
248 e = VolumeException(-errno.EINVAL, "exception in subvolume metadata")
249 elif isinstance(e, cephfs.Error):
250 e = VolumeException(-e.args[0], e.args[1])
251 raise e
252
253 def allowed_ops_by_type(self, vol_type):
254 if vol_type == SubvolumeTypes.TYPE_CLONE:
255 return {op_type for op_type in SubvolumeOpType}
256
257 if vol_type == SubvolumeTypes.TYPE_NORMAL:
258 return {op_type for op_type in SubvolumeOpType} - {SubvolumeOpType.CLONE_STATUS,
259 SubvolumeOpType.CLONE_CANCEL,
260 SubvolumeOpType.CLONE_INTERNAL}
261
262 return {}
263
264 def allowed_ops_by_state(self, vol_state):
265 if vol_state == SubvolumeStates.STATE_COMPLETE:
266 return {op_type for op_type in SubvolumeOpType}
267
268 if vol_state == SubvolumeStates.STATE_RETAINED:
269 return {
270 SubvolumeOpType.REMOVE,
271 SubvolumeOpType.REMOVE_FORCE,
272 SubvolumeOpType.LIST,
273 SubvolumeOpType.INFO,
274 SubvolumeOpType.SNAP_REMOVE,
275 SubvolumeOpType.SNAP_LIST,
276 SubvolumeOpType.SNAP_INFO,
277 SubvolumeOpType.SNAP_PROTECT,
278 SubvolumeOpType.SNAP_UNPROTECT,
279 SubvolumeOpType.CLONE_SOURCE
280 }
281
282 return {SubvolumeOpType.REMOVE_FORCE,
283 SubvolumeOpType.CLONE_CREATE,
284 SubvolumeOpType.CLONE_STATUS,
285 SubvolumeOpType.CLONE_CANCEL,
286 SubvolumeOpType.CLONE_INTERNAL,
287 SubvolumeOpType.CLONE_SOURCE}
288
289 def open(self, op_type):
290 if not isinstance(op_type, SubvolumeOpType):
291 raise VolumeException(-errno.ENOTSUP, "operation {0} not supported on subvolume '{1}'".format(
292 op_type.value, self.subvolname))
293 try:
294 self.metadata_mgr.refresh()
295 # unconditionally mark as subvolume, to handle pre-existing subvolumes without the mark
296 self.mark_subvolume()
297
298 etype = self.subvol_type
299 if op_type not in self.allowed_ops_by_type(etype):
300 raise VolumeException(-errno.ENOTSUP, "operation '{0}' is not allowed on subvolume '{1}' of type {2}".format(
301 op_type.value, self.subvolname, etype.value))
302
303 estate = self.state
304 if op_type not in self.allowed_ops_by_state(estate) and estate == SubvolumeStates.STATE_RETAINED:
305 raise VolumeException(-errno.ENOENT, "subvolume '{0}' is removed and has only snapshots retained".format(
306 self.subvolname))
307
308 if op_type not in self.allowed_ops_by_state(estate) and estate != SubvolumeStates.STATE_RETAINED:
309 raise VolumeException(-errno.EAGAIN, "subvolume '{0}' is not ready for operation {1}".format(
310 self.subvolname, op_type.value))
311
312 if estate != SubvolumeStates.STATE_RETAINED:
313 subvol_path = self.path
314 log.debug("refreshed metadata, checking subvolume path '{0}'".format(subvol_path))
315 st = self.fs.stat(subvol_path)
316
317 self.uid = int(st.st_uid)
318 self.gid = int(st.st_gid)
319 self.mode = int(st.st_mode & ~stat.S_IFMT(st.st_mode))
320 except MetadataMgrException as me:
321 if me.errno == -errno.ENOENT:
322 raise VolumeException(-errno.ENOENT, "subvolume '{0}' does not exist".format(self.subvolname))
323 raise VolumeException(me.args[0], me.args[1])
324 except cephfs.ObjectNotFound:
325 log.debug("missing subvolume path '{0}' for subvolume '{1}'".format(subvol_path, self.subvolname))
326 raise VolumeException(-errno.ENOENT, "mount path missing for subvolume '{0}'".format(self.subvolname))
327 except cephfs.Error as e:
328 raise VolumeException(-e.args[0], e.args[1])
329
330 def trash_incarnation_dir(self):
331 """rename subvolume (uuid component) to trash"""
332 self.create_trashcan()
333 try:
334 bname = os.path.basename(self.path)
335 tpath = os.path.join(self.trash_dir, bname)
336 log.debug("trash: {0} -> {1}".format(self.path, tpath))
337 self.fs.rename(self.path, tpath)
338 self._link_dir(tpath, bname)
339 except cephfs.Error as e:
340 raise VolumeException(-e.args[0], e.args[1])
341
342 def remove(self, retainsnaps=False):
343 if self.list_snapshots():
344 if not retainsnaps:
345 raise VolumeException(-errno.ENOTEMPTY, "subvolume '{0}' has snapshots".format(self.subvolname))
346 else:
347 if not self.has_pending_purges:
348 self.trash_base_dir()
349 # Delete the volume meta file, if it's not already deleted
350 self.auth_mdata_mgr.delete_subvolume_metadata_file(self.group.groupname, self.subvolname)
351 return
352 if self.state != SubvolumeStates.STATE_RETAINED:
353 self.trash_incarnation_dir()
354 self.metadata_mgr.update_global_section(MetadataManager.GLOBAL_META_KEY_PATH, "")
355 self.metadata_mgr.update_global_section(MetadataManager.GLOBAL_META_KEY_STATE, SubvolumeStates.STATE_RETAINED.value)
356 self.metadata_mgr.flush()
357 # Delete the volume meta file, if it's not already deleted
358 self.auth_mdata_mgr.delete_subvolume_metadata_file(self.group.groupname, self.subvolname)
359
360 def info(self):
361 if self.state != SubvolumeStates.STATE_RETAINED:
362 return super(SubvolumeV2, self).info()
363
364 return {'type': self.subvol_type.value, 'features': self.features, 'state': SubvolumeStates.STATE_RETAINED.value}
365
366 def remove_snapshot(self, snapname):
367 super(SubvolumeV2, self).remove_snapshot(snapname)
368 if self.purgeable:
369 self.trash_base_dir()
370 # tickle the volume purge job to purge this entry, using ESTALE
371 raise VolumeException(-errno.ESTALE, "subvolume '{0}' has been removed as the last retained snapshot is removed".format(self.subvolname))
372 # if not purgeable, subvol is not retained, or has snapshots, or already has purge jobs that will garbage collect this subvol