]>
Commit | Line | Data |
---|---|---|
adb31ebb TL |
1 | import os |
2 | import stat | |
3 | import uuid | |
4 | import errno | |
5 | import logging | |
6 | ||
7 | import cephfs | |
8 | ||
9 | from .metadata_manager import MetadataManager | |
10 | from .subvolume_attrs import SubvolumeTypes, SubvolumeStates, SubvolumeFeatures | |
11 | from .op_sm import SubvolumeOpSm | |
12 | from .subvolume_v1 import SubvolumeV1 | |
13 | from ..template import SubvolumeTemplate | |
14 | from ...exception import OpSmException, VolumeException, MetadataMgrException | |
15 | from ...fs_util import listdir | |
16 | from ..template import SubvolumeOpType | |
17 | ||
18 | log = logging.getLogger(__name__) | |
19 | ||
20 | class SubvolumeV2(SubvolumeV1): | |
21 | """ | |
22 | Version 2 subvolumes creates a subvolume with path as follows, | |
23 | volumes/<group-name>/<subvolume-name>/<uuid>/ | |
24 | ||
25 | The distinguishing feature of V2 subvolume as compared to V1 subvolumes is its ability to retain snapshots | |
26 | of a subvolume on removal. This is done by creating snapshots under the <subvolume-name> directory, | |
27 | rather than under the <uuid> directory, as is the case of V1 subvolumes. | |
28 | ||
29 | - The directory under which user data resides is <uuid> | |
30 | - Snapshots of the subvolume are taken within the <subvolume-name> directory | |
31 | - A meta file is maintained under the <subvolume-name> directory as a metadata store, storing information similar | |
32 | to V1 subvolumes | |
33 | - On a request to remove subvolume but retain its snapshots, only the <uuid> directory is moved to trash, retaining | |
34 | the rest of the subvolume and its meta file. | |
35 | - The <uuid> directory, when present, is the current incarnation of the subvolume, which may have snapshots of | |
36 | older incarnations of the same subvolume. | |
37 | - V1 subvolumes that currently do not have any snapshots are upgraded to V2 subvolumes automatically, to support the | |
38 | snapshot retention feature | |
39 | """ | |
40 | VERSION = 2 | |
41 | ||
42 | @staticmethod | |
43 | def version(): | |
44 | return SubvolumeV2.VERSION | |
45 | ||
46 | @property | |
47 | def features(self): | |
48 | return [SubvolumeFeatures.FEATURE_SNAPSHOT_CLONE.value, | |
49 | SubvolumeFeatures.FEATURE_SNAPSHOT_AUTOPROTECT.value, | |
50 | SubvolumeFeatures.FEATURE_SNAPSHOT_RETENTION.value] | |
51 | ||
52 | @property | |
53 | def retained(self): | |
54 | try: | |
55 | self.metadata_mgr.refresh() | |
56 | if self.state == SubvolumeStates.STATE_RETAINED: | |
57 | return True | |
58 | return False | |
59 | except MetadataMgrException as me: | |
60 | if me.errno != -errno.ENOENT: | |
61 | raise VolumeException(me.errno, "internal error while processing subvolume '{0}'".format(self.subvolname)) | |
62 | return False | |
63 | ||
64 | @property | |
65 | def purgeable(self): | |
66 | if not self.retained or self.list_snapshots() or self.has_pending_purges: | |
67 | return False | |
68 | return True | |
69 | ||
70 | @property | |
71 | def has_pending_purges(self): | |
72 | try: | |
73 | return not listdir(self.fs, self.trash_dir) == [] | |
74 | except VolumeException as ve: | |
75 | if ve.errno == -errno.ENOENT: | |
76 | return False | |
77 | raise | |
78 | ||
79 | @property | |
80 | def trash_dir(self): | |
81 | return os.path.join(self.base_path, b".trash") | |
82 | ||
83 | def create_trashcan(self): | |
84 | """per subvolume trash directory""" | |
85 | try: | |
86 | self.fs.stat(self.trash_dir) | |
87 | except cephfs.Error as e: | |
88 | if e.args[0] == errno.ENOENT: | |
89 | try: | |
90 | self.fs.mkdir(self.trash_dir, 0o700) | |
91 | except cephfs.Error as ce: | |
92 | raise VolumeException(-ce.args[0], ce.args[1]) | |
93 | else: | |
94 | raise VolumeException(-e.args[0], e.args[1]) | |
95 | ||
96 | def mark_subvolume(self): | |
97 | # set subvolume attr, on subvolume root, marking it as a CephFS subvolume | |
98 | # subvolume root is where snapshots would be taken, and hence is the base_path for v2 subvolumes | |
99 | try: | |
100 | # MDS treats this as a noop for already marked subvolume | |
101 | self.fs.setxattr(self.base_path, 'ceph.dir.subvolume', b'1', 0) | |
102 | except cephfs.InvalidValue as e: | |
103 | raise VolumeException(-errno.EINVAL, "invalid value specified for ceph.dir.subvolume") | |
104 | except cephfs.Error as e: | |
105 | raise VolumeException(-e.args[0], e.args[1]) | |
106 | ||
107 | @staticmethod | |
108 | def is_valid_uuid(uuid_str): | |
109 | try: | |
110 | uuid.UUID(uuid_str) | |
111 | return True | |
112 | except ValueError: | |
113 | return False | |
114 | ||
115 | def snapshot_base_path(self): | |
116 | return os.path.join(self.base_path, self.vol_spec.snapshot_dir_prefix.encode('utf-8')) | |
117 | ||
118 | def snapshot_data_path(self, snapname): | |
119 | snap_base_path = self.snapshot_path(snapname) | |
120 | uuid_str = None | |
121 | try: | |
122 | with self.fs.opendir(snap_base_path) as dir_handle: | |
123 | d = self.fs.readdir(dir_handle) | |
124 | while d: | |
125 | if d.d_name not in (b".", b".."): | |
126 | d_full_path = os.path.join(snap_base_path, d.d_name) | |
127 | stx = self.fs.statx(d_full_path, cephfs.CEPH_STATX_MODE, cephfs.AT_SYMLINK_NOFOLLOW) | |
128 | if stat.S_ISDIR(stx.get('mode')): | |
129 | if self.is_valid_uuid(d.d_name.decode('utf-8')): | |
130 | uuid_str = d.d_name | |
131 | d = self.fs.readdir(dir_handle) | |
132 | except cephfs.Error as e: | |
133 | if e.errno == errno.ENOENT: | |
134 | raise VolumeException(-errno.ENOENT, "snapshot '{0}' does not exist".format(snapname)) | |
135 | raise VolumeException(-e.args[0], e.args[1]) | |
136 | ||
137 | if not uuid_str: | |
138 | raise VolumeException(-errno.ENOENT, "snapshot '{0}' does not exist".format(snapname)) | |
139 | ||
140 | return os.path.join(snap_base_path, uuid_str) | |
141 | ||
142 | def _remove_on_failure(self, subvol_path, retained): | |
143 | if retained: | |
144 | log.info("cleaning up subvolume incarnation with path: {0}".format(subvol_path)) | |
145 | try: | |
146 | self.fs.rmdir(subvol_path) | |
147 | except cephfs.Error as e: | |
148 | raise VolumeException(-e.args[0], e.args[1]) | |
149 | else: | |
150 | log.info("cleaning up subvolume with path: {0}".format(self.subvolname)) | |
151 | self.remove() | |
152 | ||
153 | def _set_incarnation_metadata(self, subvolume_type, qpath, initial_state): | |
154 | self.metadata_mgr.update_global_section(MetadataManager.GLOBAL_META_KEY_TYPE, subvolume_type.value) | |
155 | self.metadata_mgr.update_global_section(MetadataManager.GLOBAL_META_KEY_PATH, qpath) | |
156 | self.metadata_mgr.update_global_section(MetadataManager.GLOBAL_META_KEY_STATE, initial_state.value) | |
157 | ||
158 | def create(self, size, isolate_nspace, pool, mode, uid, gid): | |
159 | subvolume_type = SubvolumeTypes.TYPE_NORMAL | |
160 | try: | |
161 | initial_state = SubvolumeOpSm.get_init_state(subvolume_type) | |
162 | except OpSmException as oe: | |
163 | raise VolumeException(-errno.EINVAL, "subvolume creation failed: internal error") | |
164 | ||
165 | retained = self.retained | |
166 | if retained and self.has_pending_purges: | |
167 | raise VolumeException(-errno.EAGAIN, "asynchronous purge of subvolume in progress") | |
168 | subvol_path = os.path.join(self.base_path, str(uuid.uuid4()).encode('utf-8')) | |
169 | try: | |
170 | self.fs.mkdirs(subvol_path, mode) | |
171 | self.mark_subvolume() | |
172 | attrs = { | |
173 | 'uid': uid, | |
174 | 'gid': gid, | |
175 | 'data_pool': pool, | |
176 | 'pool_namespace': self.namespace if isolate_nspace else None, | |
177 | 'quota': size | |
178 | } | |
179 | self.set_attrs(subvol_path, attrs) | |
180 | ||
181 | # persist subvolume metadata | |
182 | qpath = subvol_path.decode('utf-8') | |
183 | if retained: | |
184 | self._set_incarnation_metadata(subvolume_type, qpath, initial_state) | |
185 | self.metadata_mgr.flush() | |
186 | else: | |
187 | self.init_config(SubvolumeV2.VERSION, subvolume_type, qpath, initial_state) | |
cd265ab1 TL |
188 | |
189 | # Create the subvolume metadata file which manages auth-ids if it doesn't exist | |
190 | self.auth_mdata_mgr.create_subvolume_metadata_file(self.group.groupname, self.subvolname) | |
adb31ebb TL |
191 | except (VolumeException, MetadataMgrException, cephfs.Error) as e: |
192 | try: | |
193 | self._remove_on_failure(subvol_path, retained) | |
194 | except VolumeException as ve: | |
195 | log.info("failed to cleanup subvolume '{0}' ({1})".format(self.subvolname, ve)) | |
196 | ||
197 | if isinstance(e, MetadataMgrException): | |
198 | log.error("metadata manager exception: {0}".format(e)) | |
199 | e = VolumeException(-errno.EINVAL, "exception in subvolume metadata") | |
200 | elif isinstance(e, cephfs.Error): | |
201 | e = VolumeException(-e.args[0], e.args[1]) | |
202 | raise e | |
203 | ||
204 | def create_clone(self, pool, source_volname, source_subvolume, snapname): | |
205 | subvolume_type = SubvolumeTypes.TYPE_CLONE | |
206 | try: | |
207 | initial_state = SubvolumeOpSm.get_init_state(subvolume_type) | |
208 | except OpSmException as oe: | |
209 | raise VolumeException(-errno.EINVAL, "clone failed: internal error") | |
210 | ||
211 | retained = self.retained | |
212 | if retained and self.has_pending_purges: | |
213 | raise VolumeException(-errno.EAGAIN, "asynchronous purge of subvolume in progress") | |
214 | subvol_path = os.path.join(self.base_path, str(uuid.uuid4()).encode('utf-8')) | |
215 | try: | |
216 | # source snapshot attrs are used to create clone subvolume | |
217 | # attributes of subvolume's content though, are synced during the cloning process. | |
218 | attrs = source_subvolume.get_attrs(source_subvolume.snapshot_data_path(snapname)) | |
219 | ||
220 | # override snapshot pool setting, if one is provided for the clone | |
221 | if pool is not None: | |
222 | attrs["data_pool"] = pool | |
223 | attrs["pool_namespace"] = None | |
224 | ||
225 | # create directory and set attributes | |
226 | self.fs.mkdirs(subvol_path, attrs.get("mode")) | |
227 | self.mark_subvolume() | |
228 | self.set_attrs(subvol_path, attrs) | |
229 | ||
230 | # persist subvolume metadata and clone source | |
231 | qpath = subvol_path.decode('utf-8') | |
232 | if retained: | |
233 | self._set_incarnation_metadata(subvolume_type, qpath, initial_state) | |
234 | else: | |
235 | self.metadata_mgr.init(SubvolumeV2.VERSION, subvolume_type.value, qpath, initial_state.value) | |
236 | self.add_clone_source(source_volname, source_subvolume, snapname) | |
237 | self.metadata_mgr.flush() | |
238 | except (VolumeException, MetadataMgrException, cephfs.Error) as e: | |
239 | try: | |
240 | self._remove_on_failure(subvol_path, retained) | |
241 | except VolumeException as ve: | |
242 | log.info("failed to cleanup subvolume '{0}' ({1})".format(self.subvolname, ve)) | |
243 | ||
244 | if isinstance(e, MetadataMgrException): | |
245 | log.error("metadata manager exception: {0}".format(e)) | |
246 | e = VolumeException(-errno.EINVAL, "exception in subvolume metadata") | |
247 | elif isinstance(e, cephfs.Error): | |
248 | e = VolumeException(-e.args[0], e.args[1]) | |
249 | raise e | |
250 | ||
251 | def allowed_ops_by_type(self, vol_type): | |
252 | if vol_type == SubvolumeTypes.TYPE_CLONE: | |
253 | return {op_type for op_type in SubvolumeOpType} | |
254 | ||
255 | if vol_type == SubvolumeTypes.TYPE_NORMAL: | |
256 | return {op_type for op_type in SubvolumeOpType} - {SubvolumeOpType.CLONE_STATUS, | |
257 | SubvolumeOpType.CLONE_CANCEL, | |
258 | SubvolumeOpType.CLONE_INTERNAL} | |
259 | ||
260 | return {} | |
261 | ||
262 | def allowed_ops_by_state(self, vol_state): | |
263 | if vol_state == SubvolumeStates.STATE_COMPLETE: | |
264 | return {op_type for op_type in SubvolumeOpType} | |
265 | ||
266 | if vol_state == SubvolumeStates.STATE_RETAINED: | |
267 | return { | |
268 | SubvolumeOpType.REMOVE, | |
269 | SubvolumeOpType.REMOVE_FORCE, | |
270 | SubvolumeOpType.LIST, | |
271 | SubvolumeOpType.INFO, | |
272 | SubvolumeOpType.SNAP_REMOVE, | |
273 | SubvolumeOpType.SNAP_LIST, | |
274 | SubvolumeOpType.SNAP_INFO, | |
275 | SubvolumeOpType.SNAP_PROTECT, | |
276 | SubvolumeOpType.SNAP_UNPROTECT, | |
277 | SubvolumeOpType.CLONE_SOURCE | |
278 | } | |
279 | ||
280 | return {SubvolumeOpType.REMOVE_FORCE, | |
281 | SubvolumeOpType.CLONE_CREATE, | |
282 | SubvolumeOpType.CLONE_STATUS, | |
283 | SubvolumeOpType.CLONE_CANCEL, | |
284 | SubvolumeOpType.CLONE_INTERNAL, | |
285 | SubvolumeOpType.CLONE_SOURCE} | |
286 | ||
287 | def open(self, op_type): | |
288 | if not isinstance(op_type, SubvolumeOpType): | |
289 | raise VolumeException(-errno.ENOTSUP, "operation {0} not supported on subvolume '{1}'".format( | |
290 | op_type.value, self.subvolname)) | |
291 | try: | |
292 | self.metadata_mgr.refresh() | |
293 | # unconditionally mark as subvolume, to handle pre-existing subvolumes without the mark | |
294 | self.mark_subvolume() | |
295 | ||
296 | etype = self.subvol_type | |
297 | if op_type not in self.allowed_ops_by_type(etype): | |
298 | raise VolumeException(-errno.ENOTSUP, "operation '{0}' is not allowed on subvolume '{1}' of type {2}".format( | |
299 | op_type.value, self.subvolname, etype.value)) | |
300 | ||
301 | estate = self.state | |
302 | if op_type not in self.allowed_ops_by_state(estate) and estate == SubvolumeStates.STATE_RETAINED: | |
303 | raise VolumeException(-errno.ENOENT, "subvolume '{0}' is removed and has only snapshots retained".format( | |
304 | self.subvolname)) | |
305 | ||
306 | if op_type not in self.allowed_ops_by_state(estate) and estate != SubvolumeStates.STATE_RETAINED: | |
307 | raise VolumeException(-errno.EAGAIN, "subvolume '{0}' is not ready for operation {1}".format( | |
308 | self.subvolname, op_type.value)) | |
309 | ||
310 | if estate != SubvolumeStates.STATE_RETAINED: | |
311 | subvol_path = self.path | |
312 | log.debug("refreshed metadata, checking subvolume path '{0}'".format(subvol_path)) | |
313 | st = self.fs.stat(subvol_path) | |
314 | ||
315 | self.uid = int(st.st_uid) | |
316 | self.gid = int(st.st_gid) | |
317 | self.mode = int(st.st_mode & ~stat.S_IFMT(st.st_mode)) | |
318 | except MetadataMgrException as me: | |
319 | if me.errno == -errno.ENOENT: | |
320 | raise VolumeException(-errno.ENOENT, "subvolume '{0}' does not exist".format(self.subvolname)) | |
321 | raise VolumeException(me.args[0], me.args[1]) | |
322 | except cephfs.ObjectNotFound: | |
323 | log.debug("missing subvolume path '{0}' for subvolume '{1}'".format(subvol_path, self.subvolname)) | |
324 | raise VolumeException(-errno.ENOENT, "mount path missing for subvolume '{0}'".format(self.subvolname)) | |
325 | except cephfs.Error as e: | |
326 | raise VolumeException(-e.args[0], e.args[1]) | |
327 | ||
328 | def trash_incarnation_dir(self): | |
329 | """rename subvolume (uuid component) to trash""" | |
330 | self.create_trashcan() | |
331 | try: | |
332 | bname = os.path.basename(self.path) | |
333 | tpath = os.path.join(self.trash_dir, bname) | |
334 | log.debug("trash: {0} -> {1}".format(self.path, tpath)) | |
335 | self.fs.rename(self.path, tpath) | |
336 | self._link_dir(tpath, bname) | |
337 | except cephfs.Error as e: | |
338 | raise VolumeException(-e.args[0], e.args[1]) | |
339 | ||
340 | def remove(self, retainsnaps=False): | |
341 | if self.list_snapshots(): | |
342 | if not retainsnaps: | |
343 | raise VolumeException(-errno.ENOTEMPTY, "subvolume '{0}' has snapshots".format(self.subvolname)) | |
344 | else: | |
345 | if not self.has_pending_purges: | |
346 | self.trash_base_dir() | |
cd265ab1 TL |
347 | # Delete the volume meta file, if it's not already deleted |
348 | self.auth_mdata_mgr.delete_subvolume_metadata_file(self.group.groupname, self.subvolname) | |
adb31ebb TL |
349 | return |
350 | if self.state != SubvolumeStates.STATE_RETAINED: | |
351 | self.trash_incarnation_dir() | |
352 | self.metadata_mgr.update_global_section(MetadataManager.GLOBAL_META_KEY_PATH, "") | |
353 | self.metadata_mgr.update_global_section(MetadataManager.GLOBAL_META_KEY_STATE, SubvolumeStates.STATE_RETAINED.value) | |
354 | self.metadata_mgr.flush() | |
cd265ab1 TL |
355 | # Delete the volume meta file, if it's not already deleted |
356 | self.auth_mdata_mgr.delete_subvolume_metadata_file(self.group.groupname, self.subvolname) | |
adb31ebb TL |
357 | |
358 | def info(self): | |
359 | if self.state != SubvolumeStates.STATE_RETAINED: | |
360 | return super(SubvolumeV2, self).info() | |
361 | ||
362 | return {'type': self.subvol_type.value, 'features': self.features, 'state': SubvolumeStates.STATE_RETAINED.value} | |
363 | ||
364 | def remove_snapshot(self, snapname): | |
365 | super(SubvolumeV2, self).remove_snapshot(snapname) | |
366 | if self.purgeable: | |
367 | self.trash_base_dir() | |
368 | # tickle the volume purge job to purge this entry, using ESTALE | |
369 | raise VolumeException(-errno.ESTALE, "subvolume '{0}' has been removed as the last retained snapshot is removed".format(self.subvolname)) | |
370 | # if not purgeable, subvol is not retained, or has snapshots, or already has purge jobs that will garbage collect this subvol |