]> git.proxmox.com Git - ceph.git/blob - ceph/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v2.py
170c5dcab1618a8bd61119a453fa8cb0f3bc71a6
[ceph.git] / ceph / src / pybind / mgr / volumes / fs / operations / versions / subvolume_v2.py
1 import os
2 import stat
3 import uuid
4 import errno
5 import logging
6
7 import cephfs
8
9 from .metadata_manager import MetadataManager
10 from .subvolume_attrs import SubvolumeTypes, SubvolumeStates, SubvolumeFeatures
11 from .op_sm import SubvolumeOpSm
12 from .subvolume_v1 import SubvolumeV1
13 from ..template import SubvolumeTemplate
14 from ...exception import OpSmException, VolumeException, MetadataMgrException
15 from ...fs_util import listdir
16 from ..template import SubvolumeOpType
17
18 log = logging.getLogger(__name__)
19
20 class SubvolumeV2(SubvolumeV1):
21 """
22 Version 2 subvolumes creates a subvolume with path as follows,
23 volumes/<group-name>/<subvolume-name>/<uuid>/
24
25 The distinguishing feature of V2 subvolume as compared to V1 subvolumes is its ability to retain snapshots
26 of a subvolume on removal. This is done by creating snapshots under the <subvolume-name> directory,
27 rather than under the <uuid> directory, as is the case of V1 subvolumes.
28
29 - The directory under which user data resides is <uuid>
30 - Snapshots of the subvolume are taken within the <subvolume-name> directory
31 - A meta file is maintained under the <subvolume-name> directory as a metadata store, storing information similar
32 to V1 subvolumes
33 - On a request to remove subvolume but retain its snapshots, only the <uuid> directory is moved to trash, retaining
34 the rest of the subvolume and its meta file.
35 - The <uuid> directory, when present, is the current incarnation of the subvolume, which may have snapshots of
36 older incarnations of the same subvolume.
37 - V1 subvolumes that currently do not have any snapshots are upgraded to V2 subvolumes automatically, to support the
38 snapshot retention feature
39 """
40 VERSION = 2
41
42 @staticmethod
43 def version():
44 return SubvolumeV2.VERSION
45
46 @property
47 def features(self):
48 return [SubvolumeFeatures.FEATURE_SNAPSHOT_CLONE.value,
49 SubvolumeFeatures.FEATURE_SNAPSHOT_AUTOPROTECT.value,
50 SubvolumeFeatures.FEATURE_SNAPSHOT_RETENTION.value]
51
52 @property
53 def retained(self):
54 try:
55 self.metadata_mgr.refresh()
56 if self.state == SubvolumeStates.STATE_RETAINED:
57 return True
58 return False
59 except MetadataMgrException as me:
60 if me.errno != -errno.ENOENT:
61 raise VolumeException(me.errno, "internal error while processing subvolume '{0}'".format(self.subvolname))
62 return False
63
64 @property
65 def purgeable(self):
66 if not self.retained or self.list_snapshots() or self.has_pending_purges:
67 return False
68 return True
69
70 @property
71 def has_pending_purges(self):
72 try:
73 return not listdir(self.fs, self.trash_dir) == []
74 except VolumeException as ve:
75 if ve.errno == -errno.ENOENT:
76 return False
77 raise
78
79 @property
80 def trash_dir(self):
81 return os.path.join(self.base_path, b".trash")
82
83 def create_trashcan(self):
84 """per subvolume trash directory"""
85 try:
86 self.fs.stat(self.trash_dir)
87 except cephfs.Error as e:
88 if e.args[0] == errno.ENOENT:
89 try:
90 self.fs.mkdir(self.trash_dir, 0o700)
91 except cephfs.Error as ce:
92 raise VolumeException(-ce.args[0], ce.args[1])
93 else:
94 raise VolumeException(-e.args[0], e.args[1])
95
96 def mark_subvolume(self):
97 # set subvolume attr, on subvolume root, marking it as a CephFS subvolume
98 # subvolume root is where snapshots would be taken, and hence is the base_path for v2 subvolumes
99 try:
100 # MDS treats this as a noop for already marked subvolume
101 self.fs.setxattr(self.base_path, 'ceph.dir.subvolume', b'1', 0)
102 except cephfs.InvalidValue as e:
103 raise VolumeException(-errno.EINVAL, "invalid value specified for ceph.dir.subvolume")
104 except cephfs.Error as e:
105 raise VolumeException(-e.args[0], e.args[1])
106
107 @staticmethod
108 def is_valid_uuid(uuid_str):
109 try:
110 uuid.UUID(uuid_str)
111 return True
112 except ValueError:
113 return False
114
115 def snapshot_base_path(self):
116 return os.path.join(self.base_path, self.vol_spec.snapshot_dir_prefix.encode('utf-8'))
117
118 def snapshot_data_path(self, snapname):
119 snap_base_path = self.snapshot_path(snapname)
120 uuid_str = None
121 try:
122 with self.fs.opendir(snap_base_path) as dir_handle:
123 d = self.fs.readdir(dir_handle)
124 while d:
125 if d.d_name not in (b".", b".."):
126 d_full_path = os.path.join(snap_base_path, d.d_name)
127 stx = self.fs.statx(d_full_path, cephfs.CEPH_STATX_MODE, cephfs.AT_SYMLINK_NOFOLLOW)
128 if stat.S_ISDIR(stx.get('mode')):
129 if self.is_valid_uuid(d.d_name.decode('utf-8')):
130 uuid_str = d.d_name
131 d = self.fs.readdir(dir_handle)
132 except cephfs.Error as e:
133 if e.errno == errno.ENOENT:
134 raise VolumeException(-errno.ENOENT, "snapshot '{0}' does not exist".format(snapname))
135 raise VolumeException(-e.args[0], e.args[1])
136
137 if not uuid_str:
138 raise VolumeException(-errno.ENOENT, "snapshot '{0}' does not exist".format(snapname))
139
140 return os.path.join(snap_base_path, uuid_str)
141
142 def _remove_on_failure(self, subvol_path, retained):
143 if retained:
144 log.info("cleaning up subvolume incarnation with path: {0}".format(subvol_path))
145 try:
146 self.fs.rmdir(subvol_path)
147 except cephfs.Error as e:
148 raise VolumeException(-e.args[0], e.args[1])
149 else:
150 log.info("cleaning up subvolume with path: {0}".format(self.subvolname))
151 self.remove()
152
153 def _set_incarnation_metadata(self, subvolume_type, qpath, initial_state):
154 self.metadata_mgr.update_global_section(MetadataManager.GLOBAL_META_KEY_TYPE, subvolume_type.value)
155 self.metadata_mgr.update_global_section(MetadataManager.GLOBAL_META_KEY_PATH, qpath)
156 self.metadata_mgr.update_global_section(MetadataManager.GLOBAL_META_KEY_STATE, initial_state.value)
157
158 def create(self, size, isolate_nspace, pool, mode, uid, gid):
159 subvolume_type = SubvolumeTypes.TYPE_NORMAL
160 try:
161 initial_state = SubvolumeOpSm.get_init_state(subvolume_type)
162 except OpSmException as oe:
163 raise VolumeException(-errno.EINVAL, "subvolume creation failed: internal error")
164
165 retained = self.retained
166 if retained and self.has_pending_purges:
167 raise VolumeException(-errno.EAGAIN, "asynchronous purge of subvolume in progress")
168 subvol_path = os.path.join(self.base_path, str(uuid.uuid4()).encode('utf-8'))
169 try:
170 self.fs.mkdirs(subvol_path, mode)
171 self.mark_subvolume()
172 attrs = {
173 'uid': uid,
174 'gid': gid,
175 'data_pool': pool,
176 'pool_namespace': self.namespace if isolate_nspace else None,
177 'quota': size
178 }
179 self.set_attrs(subvol_path, attrs)
180
181 # persist subvolume metadata
182 qpath = subvol_path.decode('utf-8')
183 if retained:
184 self._set_incarnation_metadata(subvolume_type, qpath, initial_state)
185 self.metadata_mgr.flush()
186 else:
187 self.init_config(SubvolumeV2.VERSION, subvolume_type, qpath, initial_state)
188 except (VolumeException, MetadataMgrException, cephfs.Error) as e:
189 try:
190 self._remove_on_failure(subvol_path, retained)
191 except VolumeException as ve:
192 log.info("failed to cleanup subvolume '{0}' ({1})".format(self.subvolname, ve))
193
194 if isinstance(e, MetadataMgrException):
195 log.error("metadata manager exception: {0}".format(e))
196 e = VolumeException(-errno.EINVAL, "exception in subvolume metadata")
197 elif isinstance(e, cephfs.Error):
198 e = VolumeException(-e.args[0], e.args[1])
199 raise e
200
201 def create_clone(self, pool, source_volname, source_subvolume, snapname):
202 subvolume_type = SubvolumeTypes.TYPE_CLONE
203 try:
204 initial_state = SubvolumeOpSm.get_init_state(subvolume_type)
205 except OpSmException as oe:
206 raise VolumeException(-errno.EINVAL, "clone failed: internal error")
207
208 retained = self.retained
209 if retained and self.has_pending_purges:
210 raise VolumeException(-errno.EAGAIN, "asynchronous purge of subvolume in progress")
211 subvol_path = os.path.join(self.base_path, str(uuid.uuid4()).encode('utf-8'))
212 try:
213 # source snapshot attrs are used to create clone subvolume
214 # attributes of subvolume's content though, are synced during the cloning process.
215 attrs = source_subvolume.get_attrs(source_subvolume.snapshot_data_path(snapname))
216
217 # override snapshot pool setting, if one is provided for the clone
218 if pool is not None:
219 attrs["data_pool"] = pool
220 attrs["pool_namespace"] = None
221
222 # create directory and set attributes
223 self.fs.mkdirs(subvol_path, attrs.get("mode"))
224 self.mark_subvolume()
225 self.set_attrs(subvol_path, attrs)
226
227 # persist subvolume metadata and clone source
228 qpath = subvol_path.decode('utf-8')
229 if retained:
230 self._set_incarnation_metadata(subvolume_type, qpath, initial_state)
231 else:
232 self.metadata_mgr.init(SubvolumeV2.VERSION, subvolume_type.value, qpath, initial_state.value)
233 self.add_clone_source(source_volname, source_subvolume, snapname)
234 self.metadata_mgr.flush()
235 except (VolumeException, MetadataMgrException, cephfs.Error) as e:
236 try:
237 self._remove_on_failure(subvol_path, retained)
238 except VolumeException as ve:
239 log.info("failed to cleanup subvolume '{0}' ({1})".format(self.subvolname, ve))
240
241 if isinstance(e, MetadataMgrException):
242 log.error("metadata manager exception: {0}".format(e))
243 e = VolumeException(-errno.EINVAL, "exception in subvolume metadata")
244 elif isinstance(e, cephfs.Error):
245 e = VolumeException(-e.args[0], e.args[1])
246 raise e
247
248 def allowed_ops_by_type(self, vol_type):
249 if vol_type == SubvolumeTypes.TYPE_CLONE:
250 return {op_type for op_type in SubvolumeOpType}
251
252 if vol_type == SubvolumeTypes.TYPE_NORMAL:
253 return {op_type for op_type in SubvolumeOpType} - {SubvolumeOpType.CLONE_STATUS,
254 SubvolumeOpType.CLONE_CANCEL,
255 SubvolumeOpType.CLONE_INTERNAL}
256
257 return {}
258
259 def allowed_ops_by_state(self, vol_state):
260 if vol_state == SubvolumeStates.STATE_COMPLETE:
261 return {op_type for op_type in SubvolumeOpType}
262
263 if vol_state == SubvolumeStates.STATE_RETAINED:
264 return {
265 SubvolumeOpType.REMOVE,
266 SubvolumeOpType.REMOVE_FORCE,
267 SubvolumeOpType.LIST,
268 SubvolumeOpType.INFO,
269 SubvolumeOpType.SNAP_REMOVE,
270 SubvolumeOpType.SNAP_LIST,
271 SubvolumeOpType.SNAP_INFO,
272 SubvolumeOpType.SNAP_PROTECT,
273 SubvolumeOpType.SNAP_UNPROTECT,
274 SubvolumeOpType.CLONE_SOURCE
275 }
276
277 return {SubvolumeOpType.REMOVE_FORCE,
278 SubvolumeOpType.CLONE_CREATE,
279 SubvolumeOpType.CLONE_STATUS,
280 SubvolumeOpType.CLONE_CANCEL,
281 SubvolumeOpType.CLONE_INTERNAL,
282 SubvolumeOpType.CLONE_SOURCE}
283
284 def open(self, op_type):
285 if not isinstance(op_type, SubvolumeOpType):
286 raise VolumeException(-errno.ENOTSUP, "operation {0} not supported on subvolume '{1}'".format(
287 op_type.value, self.subvolname))
288 try:
289 self.metadata_mgr.refresh()
290 # unconditionally mark as subvolume, to handle pre-existing subvolumes without the mark
291 self.mark_subvolume()
292
293 etype = self.subvol_type
294 if op_type not in self.allowed_ops_by_type(etype):
295 raise VolumeException(-errno.ENOTSUP, "operation '{0}' is not allowed on subvolume '{1}' of type {2}".format(
296 op_type.value, self.subvolname, etype.value))
297
298 estate = self.state
299 if op_type not in self.allowed_ops_by_state(estate) and estate == SubvolumeStates.STATE_RETAINED:
300 raise VolumeException(-errno.ENOENT, "subvolume '{0}' is removed and has only snapshots retained".format(
301 self.subvolname))
302
303 if op_type not in self.allowed_ops_by_state(estate) and estate != SubvolumeStates.STATE_RETAINED:
304 raise VolumeException(-errno.EAGAIN, "subvolume '{0}' is not ready for operation {1}".format(
305 self.subvolname, op_type.value))
306
307 if estate != SubvolumeStates.STATE_RETAINED:
308 subvol_path = self.path
309 log.debug("refreshed metadata, checking subvolume path '{0}'".format(subvol_path))
310 st = self.fs.stat(subvol_path)
311
312 self.uid = int(st.st_uid)
313 self.gid = int(st.st_gid)
314 self.mode = int(st.st_mode & ~stat.S_IFMT(st.st_mode))
315 except MetadataMgrException as me:
316 if me.errno == -errno.ENOENT:
317 raise VolumeException(-errno.ENOENT, "subvolume '{0}' does not exist".format(self.subvolname))
318 raise VolumeException(me.args[0], me.args[1])
319 except cephfs.ObjectNotFound:
320 log.debug("missing subvolume path '{0}' for subvolume '{1}'".format(subvol_path, self.subvolname))
321 raise VolumeException(-errno.ENOENT, "mount path missing for subvolume '{0}'".format(self.subvolname))
322 except cephfs.Error as e:
323 raise VolumeException(-e.args[0], e.args[1])
324
325 def trash_incarnation_dir(self):
326 """rename subvolume (uuid component) to trash"""
327 self.create_trashcan()
328 try:
329 bname = os.path.basename(self.path)
330 tpath = os.path.join(self.trash_dir, bname)
331 log.debug("trash: {0} -> {1}".format(self.path, tpath))
332 self.fs.rename(self.path, tpath)
333 self._link_dir(tpath, bname)
334 except cephfs.Error as e:
335 raise VolumeException(-e.args[0], e.args[1])
336
337 def remove(self, retainsnaps=False):
338 if self.list_snapshots():
339 if not retainsnaps:
340 raise VolumeException(-errno.ENOTEMPTY, "subvolume '{0}' has snapshots".format(self.subvolname))
341 else:
342 if not self.has_pending_purges:
343 self.trash_base_dir()
344 return
345 if self.state != SubvolumeStates.STATE_RETAINED:
346 self.trash_incarnation_dir()
347 self.metadata_mgr.update_global_section(MetadataManager.GLOBAL_META_KEY_PATH, "")
348 self.metadata_mgr.update_global_section(MetadataManager.GLOBAL_META_KEY_STATE, SubvolumeStates.STATE_RETAINED.value)
349 self.metadata_mgr.flush()
350
351 def info(self):
352 if self.state != SubvolumeStates.STATE_RETAINED:
353 return super(SubvolumeV2, self).info()
354
355 return {'type': self.subvol_type.value, 'features': self.features, 'state': SubvolumeStates.STATE_RETAINED.value}
356
357 def remove_snapshot(self, snapname):
358 super(SubvolumeV2, self).remove_snapshot(snapname)
359 if self.purgeable:
360 self.trash_base_dir()
361 # tickle the volume purge job to purge this entry, using ESTALE
362 raise VolumeException(-errno.ESTALE, "subvolume '{0}' has been removed as the last retained snapshot is removed".format(self.subvolname))
363 # if not purgeable, subvol is not retained, or has snapshots, or already has purge jobs that will garbage collect this subvol