]>
Commit | Line | Data |
---|---|---|
ebe7e575 BB |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 | * or http://www.opensolaris.org/os/licensing. | |
10 | * See the License for the specific language governing permissions | |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | /* | |
22 | * | |
23 | * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. | |
24 | * Copyright (C) 2011 Lawrence Livermore National Security, LLC. | |
25 | * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). | |
26 | * LLNL-CODE-403049. | |
27 | * Rewritten for Linux by: | |
28 | * Rohan Puri <rohan.puri15@gmail.com> | |
29 | * Brian Behlendorf <behlendorf1@llnl.gov> | |
a08ee875 | 30 | * Copyright (c) 2013 by Delphix. All rights reserved. |
cae5b340 | 31 | * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved. |
ebe7e575 BB |
32 | */ |
33 | ||
34 | /* | |
35 | * ZFS control directory (a.k.a. ".zfs") | |
36 | * | |
37 | * This directory provides a common location for all ZFS meta-objects. | |
38 | * Currently, this is only the 'snapshot' and 'shares' directory, but this may | |
39 | * expand in the future. The elements are built dynamically, as the hierarchy | |
40 | * does not actually exist on disk. | |
41 | * | |
42 | * For 'snapshot', we don't want to have all snapshots always mounted, because | |
43 | * this would take up a huge amount of space in /etc/mnttab. We have three | |
44 | * types of objects: | |
45 | * | |
46 | * ctldir ------> snapshotdir -------> snapshot | |
47 | * | | |
48 | * | | |
49 | * V | |
50 | * mounted fs | |
51 | * | |
52 | * The 'snapshot' node contains just enough information to lookup '..' and act | |
53 | * as a mountpoint for the snapshot. Whenever we lookup a specific snapshot, we | |
54 | * perform an automount of the underlying filesystem and return the | |
55 | * corresponding inode. | |
56 | * | |
57 | * All mounts are handled automatically by an user mode helper which invokes | |
58 | * the mount mount procedure. Unmounts are handled by allowing the mount | |
59 | * point to expire so the kernel may automatically unmount it. | |
60 | * | |
61 | * The '.zfs', '.zfs/snapshot', and all directories created under | |
62 | * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') all share the same | |
cae5b340 | 63 | * share the same zfsvfs_t as the head filesystem (what '.zfs' lives under). |
ebe7e575 BB |
64 | * |
65 | * File systems mounted on top of the '.zfs/snapshot/<snapname>' paths | |
66 | * (ie: snapshots) are complete ZFS filesystems and have their own unique | |
cae5b340 AX |
67 | * zfsvfs_t. However, the fsid reported by these mounts will be the same |
68 | * as that used by the parent zfsvfs_t to make NFS happy. | |
ebe7e575 BB |
69 | */ |
70 | ||
71 | #include <sys/types.h> | |
72 | #include <sys/param.h> | |
73 | #include <sys/time.h> | |
74 | #include <sys/systm.h> | |
75 | #include <sys/sysmacros.h> | |
76 | #include <sys/pathname.h> | |
77 | #include <sys/vfs.h> | |
78 | #include <sys/vfs_opreg.h> | |
79 | #include <sys/zfs_ctldir.h> | |
80 | #include <sys/zfs_ioctl.h> | |
81 | #include <sys/zfs_vfsops.h> | |
82 | #include <sys/zfs_vnops.h> | |
83 | #include <sys/stat.h> | |
84 | #include <sys/dmu.h> | |
94a40997 | 85 | #include <sys/dmu_objset.h> |
a08ee875 | 86 | #include <sys/dsl_destroy.h> |
ebe7e575 BB |
87 | #include <sys/dsl_deleg.h> |
88 | #include <sys/mount.h> | |
89 | #include <sys/zpl.h> | |
90 | #include "zfs_namecheck.h" | |
91 | ||
e10b0808 AX |
92 | /* |
93 | * Two AVL trees are maintained which contain all currently automounted | |
94 | * snapshots. Every automounted snapshots maps to a single zfs_snapentry_t | |
95 | * entry which MUST: | |
96 | * | |
97 | * - be attached to both trees, and | |
98 | * - be unique, no duplicate entries are allowed. | |
99 | * | |
100 | * The zfs_snapshots_by_name tree is indexed by the full dataset name | |
101 | * while the zfs_snapshots_by_objsetid tree is indexed by the unique | |
102 | * objsetid. This allows for fast lookups either by name or objsetid. | |
103 | */ | |
104 | static avl_tree_t zfs_snapshots_by_name; | |
105 | static avl_tree_t zfs_snapshots_by_objsetid; | |
94a40997 | 106 | static krwlock_t zfs_snapshot_lock; |
e10b0808 | 107 | |
ebe7e575 BB |
108 | /* |
109 | * Control Directory Tunables (.zfs) | |
110 | */ | |
111 | int zfs_expire_snapshot = ZFSCTL_EXPIRE_SNAPSHOT; | |
cae5b340 | 112 | int zfs_admin_snapshot = 1; |
2ae10319 | 113 | |
e10b0808 AX |
114 | typedef struct { |
115 | char *se_name; /* full snapshot name */ | |
116 | char *se_path; /* full mount path */ | |
94a40997 | 117 | spa_t *se_spa; /* pool spa */ |
e10b0808 AX |
118 | uint64_t se_objsetid; /* snapshot objset id */ |
119 | struct dentry *se_root_dentry; /* snapshot root dentry */ | |
120 | taskqid_t se_taskqid; /* scheduled unmount taskqid */ | |
121 | avl_node_t se_node_name; /* zfs_snapshots_by_name link */ | |
122 | avl_node_t se_node_objsetid; /* zfs_snapshots_by_objsetid link */ | |
123 | refcount_t se_refcount; /* reference count */ | |
124 | } zfs_snapentry_t; | |
125 | ||
126 | static void zfsctl_snapshot_unmount_delay_impl(zfs_snapentry_t *se, int delay); | |
127 | ||
128 | /* | |
129 | * Allocate a new zfs_snapentry_t being careful to make a copy of the | |
130 | * the snapshot name and provided mount point. No reference is taken. | |
131 | */ | |
ebe7e575 | 132 | static zfs_snapentry_t * |
94a40997 AX |
133 | zfsctl_snapshot_alloc(char *full_name, char *full_path, spa_t *spa, |
134 | uint64_t objsetid, struct dentry *root_dentry) | |
ebe7e575 | 135 | { |
e10b0808 AX |
136 | zfs_snapentry_t *se; |
137 | ||
138 | se = kmem_zalloc(sizeof (zfs_snapentry_t), KM_SLEEP); | |
139 | ||
140 | se->se_name = strdup(full_name); | |
141 | se->se_path = strdup(full_path); | |
94a40997 | 142 | se->se_spa = spa; |
e10b0808 AX |
143 | se->se_objsetid = objsetid; |
144 | se->se_root_dentry = root_dentry; | |
cae5b340 | 145 | se->se_taskqid = TASKQID_INVALID; |
e10b0808 AX |
146 | |
147 | refcount_create(&se->se_refcount); | |
148 | ||
149 | return (se); | |
ebe7e575 BB |
150 | } |
151 | ||
e10b0808 AX |
152 | /* |
153 | * Free a zfs_snapentry_t the called must ensure there are no active | |
154 | * references. | |
155 | */ | |
156 | static void | |
157 | zfsctl_snapshot_free(zfs_snapentry_t *se) | |
ebe7e575 | 158 | { |
e10b0808 AX |
159 | refcount_destroy(&se->se_refcount); |
160 | strfree(se->se_name); | |
161 | strfree(se->se_path); | |
162 | ||
163 | kmem_free(se, sizeof (zfs_snapentry_t)); | |
ebe7e575 BB |
164 | } |
165 | ||
166 | /* | |
e10b0808 | 167 | * Hold a reference on the zfs_snapentry_t. |
ebe7e575 BB |
168 | */ |
169 | static void | |
e10b0808 | 170 | zfsctl_snapshot_hold(zfs_snapentry_t *se) |
ebe7e575 | 171 | { |
e10b0808 AX |
172 | refcount_add(&se->se_refcount, NULL); |
173 | } | |
ebe7e575 | 174 | |
e10b0808 AX |
175 | /* |
176 | * Release a reference on the zfs_snapentry_t. When the number of | |
177 | * references drops to zero the structure will be freed. | |
178 | */ | |
179 | static void | |
180 | zfsctl_snapshot_rele(zfs_snapentry_t *se) | |
181 | { | |
182 | if (refcount_remove(&se->se_refcount, NULL) == 0) | |
183 | zfsctl_snapshot_free(se); | |
ebe7e575 BB |
184 | } |
185 | ||
e10b0808 AX |
186 | /* |
187 | * Add a zfs_snapentry_t to both the zfs_snapshots_by_name and | |
188 | * zfs_snapshots_by_objsetid trees. While the zfs_snapentry_t is part | |
189 | * of the trees a reference is held. | |
190 | */ | |
191 | static void | |
192 | zfsctl_snapshot_add(zfs_snapentry_t *se) | |
193 | { | |
94a40997 | 194 | ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock)); |
e10b0808 AX |
195 | refcount_add(&se->se_refcount, NULL); |
196 | avl_add(&zfs_snapshots_by_name, se); | |
197 | avl_add(&zfs_snapshots_by_objsetid, se); | |
198 | } | |
199 | ||
200 | /* | |
201 | * Remove a zfs_snapentry_t from both the zfs_snapshots_by_name and | |
202 | * zfs_snapshots_by_objsetid trees. Upon removal a reference is dropped, | |
203 | * this can result in the structure being freed if that was the last | |
204 | * remaining reference. | |
205 | */ | |
206 | static void | |
207 | zfsctl_snapshot_remove(zfs_snapentry_t *se) | |
208 | { | |
94a40997 | 209 | ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock)); |
e10b0808 AX |
210 | avl_remove(&zfs_snapshots_by_name, se); |
211 | avl_remove(&zfs_snapshots_by_objsetid, se); | |
212 | zfsctl_snapshot_rele(se); | |
213 | } | |
214 | ||
215 | /* | |
216 | * Snapshot name comparison function for the zfs_snapshots_by_name. | |
217 | */ | |
218 | static int | |
219 | snapentry_compare_by_name(const void *a, const void *b) | |
ebe7e575 | 220 | { |
e10b0808 AX |
221 | const zfs_snapentry_t *se_a = a; |
222 | const zfs_snapentry_t *se_b = b; | |
223 | int ret; | |
224 | ||
225 | ret = strcmp(se_a->se_name, se_b->se_name); | |
ebe7e575 BB |
226 | |
227 | if (ret < 0) | |
228 | return (-1); | |
229 | else if (ret > 0) | |
230 | return (1); | |
231 | else | |
232 | return (0); | |
233 | } | |
234 | ||
e10b0808 AX |
235 | /* |
236 | * Snapshot name comparison function for the zfs_snapshots_by_objsetid. | |
237 | */ | |
238 | static int | |
239 | snapentry_compare_by_objsetid(const void *a, const void *b) | |
240 | { | |
241 | const zfs_snapentry_t *se_a = a; | |
242 | const zfs_snapentry_t *se_b = b; | |
243 | ||
94a40997 AX |
244 | if (se_a->se_spa != se_b->se_spa) |
245 | return ((ulong_t)se_a->se_spa < (ulong_t)se_b->se_spa ? -1 : 1); | |
246 | ||
e10b0808 AX |
247 | if (se_a->se_objsetid < se_b->se_objsetid) |
248 | return (-1); | |
249 | else if (se_a->se_objsetid > se_b->se_objsetid) | |
250 | return (1); | |
251 | else | |
252 | return (0); | |
253 | } | |
254 | ||
255 | /* | |
256 | * Find a zfs_snapentry_t in zfs_snapshots_by_name. If the snapname | |
257 | * is found a pointer to the zfs_snapentry_t is returned and a reference | |
258 | * taken on the structure. The caller is responsible for dropping the | |
259 | * reference with zfsctl_snapshot_rele(). If the snapname is not found | |
260 | * NULL will be returned. | |
261 | */ | |
262 | static zfs_snapentry_t * | |
263 | zfsctl_snapshot_find_by_name(char *snapname) | |
264 | { | |
265 | zfs_snapentry_t *se, search; | |
266 | ||
94a40997 | 267 | ASSERT(RW_LOCK_HELD(&zfs_snapshot_lock)); |
e10b0808 AX |
268 | |
269 | search.se_name = snapname; | |
270 | se = avl_find(&zfs_snapshots_by_name, &search, NULL); | |
271 | if (se) | |
272 | refcount_add(&se->se_refcount, NULL); | |
273 | ||
274 | return (se); | |
275 | } | |
276 | ||
277 | /* | |
278 | * Find a zfs_snapentry_t in zfs_snapshots_by_objsetid given the objset id | |
279 | * rather than the snapname. In all other respects it behaves the same | |
280 | * as zfsctl_snapshot_find_by_name(). | |
281 | */ | |
282 | static zfs_snapentry_t * | |
94a40997 | 283 | zfsctl_snapshot_find_by_objsetid(spa_t *spa, uint64_t objsetid) |
e10b0808 AX |
284 | { |
285 | zfs_snapentry_t *se, search; | |
286 | ||
94a40997 | 287 | ASSERT(RW_LOCK_HELD(&zfs_snapshot_lock)); |
e10b0808 | 288 | |
94a40997 | 289 | search.se_spa = spa; |
e10b0808 AX |
290 | search.se_objsetid = objsetid; |
291 | se = avl_find(&zfs_snapshots_by_objsetid, &search, NULL); | |
292 | if (se) | |
293 | refcount_add(&se->se_refcount, NULL); | |
294 | ||
295 | return (se); | |
296 | } | |
297 | ||
298 | /* | |
299 | * Rename a zfs_snapentry_t in the zfs_snapshots_by_name. The structure is | |
300 | * removed, renamed, and added back to the new correct location in the tree. | |
301 | */ | |
302 | static int | |
303 | zfsctl_snapshot_rename(char *old_snapname, char *new_snapname) | |
304 | { | |
305 | zfs_snapentry_t *se; | |
306 | ||
94a40997 | 307 | ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock)); |
e10b0808 AX |
308 | |
309 | se = zfsctl_snapshot_find_by_name(old_snapname); | |
310 | if (se == NULL) | |
311 | return (ENOENT); | |
312 | ||
313 | zfsctl_snapshot_remove(se); | |
314 | strfree(se->se_name); | |
315 | se->se_name = strdup(new_snapname); | |
316 | zfsctl_snapshot_add(se); | |
317 | zfsctl_snapshot_rele(se); | |
318 | ||
319 | return (0); | |
320 | } | |
321 | ||
322 | /* | |
323 | * Delayed task responsible for unmounting an expired automounted snapshot. | |
324 | */ | |
325 | static void | |
326 | snapentry_expire(void *data) | |
327 | { | |
328 | zfs_snapentry_t *se = (zfs_snapentry_t *)data; | |
94a40997 | 329 | spa_t *spa = se->se_spa; |
e10b0808 AX |
330 | uint64_t objsetid = se->se_objsetid; |
331 | ||
94a40997 AX |
332 | if (zfs_expire_snapshot <= 0) { |
333 | zfsctl_snapshot_rele(se); | |
334 | return; | |
335 | } | |
336 | ||
cae5b340 | 337 | se->se_taskqid = TASKQID_INVALID; |
e10b0808 AX |
338 | (void) zfsctl_snapshot_unmount(se->se_name, MNT_EXPIRE); |
339 | zfsctl_snapshot_rele(se); | |
340 | ||
341 | /* | |
342 | * Reschedule the unmount if the zfs_snapentry_t wasn't removed. | |
343 | * This can occur when the snapshot is busy. | |
344 | */ | |
94a40997 AX |
345 | rw_enter(&zfs_snapshot_lock, RW_READER); |
346 | if ((se = zfsctl_snapshot_find_by_objsetid(spa, objsetid)) != NULL) { | |
e10b0808 AX |
347 | zfsctl_snapshot_unmount_delay_impl(se, zfs_expire_snapshot); |
348 | zfsctl_snapshot_rele(se); | |
349 | } | |
94a40997 | 350 | rw_exit(&zfs_snapshot_lock); |
e10b0808 AX |
351 | } |
352 | ||
353 | /* | |
354 | * Cancel an automatic unmount of a snapname. This callback is responsible | |
355 | * for dropping the reference on the zfs_snapentry_t which was taken when | |
356 | * during dispatch. | |
357 | */ | |
358 | static void | |
359 | zfsctl_snapshot_unmount_cancel(zfs_snapentry_t *se) | |
360 | { | |
94a40997 | 361 | ASSERT(RW_LOCK_HELD(&zfs_snapshot_lock)); |
e10b0808 | 362 | |
cae5b340 AX |
363 | if (taskq_cancel_id(system_delay_taskq, se->se_taskqid) == 0) { |
364 | se->se_taskqid = TASKQID_INVALID; | |
e10b0808 AX |
365 | zfsctl_snapshot_rele(se); |
366 | } | |
367 | } | |
368 | ||
369 | /* | |
370 | * Dispatch the unmount task for delayed handling with a hold protecting it. | |
371 | */ | |
372 | static void | |
373 | zfsctl_snapshot_unmount_delay_impl(zfs_snapentry_t *se, int delay) | |
374 | { | |
cae5b340 | 375 | ASSERT3S(se->se_taskqid, ==, TASKQID_INVALID); |
e10b0808 | 376 | |
94a40997 AX |
377 | if (delay <= 0) |
378 | return; | |
379 | ||
380 | zfsctl_snapshot_hold(se); | |
cae5b340 | 381 | se->se_taskqid = taskq_dispatch_delay(system_delay_taskq, |
e10b0808 | 382 | snapentry_expire, se, TQ_SLEEP, ddi_get_lbolt() + delay * HZ); |
e10b0808 AX |
383 | } |
384 | ||
385 | /* | |
386 | * Schedule an automatic unmount of objset id to occur in delay seconds from | |
387 | * now. Any previous delayed unmount will be cancelled in favor of the | |
388 | * updated deadline. A reference is taken by zfsctl_snapshot_find_by_name() | |
389 | * and held until the outstanding task is handled or cancelled. | |
390 | */ | |
391 | int | |
94a40997 | 392 | zfsctl_snapshot_unmount_delay(spa_t *spa, uint64_t objsetid, int delay) |
e10b0808 AX |
393 | { |
394 | zfs_snapentry_t *se; | |
395 | int error = ENOENT; | |
396 | ||
94a40997 AX |
397 | rw_enter(&zfs_snapshot_lock, RW_READER); |
398 | if ((se = zfsctl_snapshot_find_by_objsetid(spa, objsetid)) != NULL) { | |
e10b0808 AX |
399 | zfsctl_snapshot_unmount_cancel(se); |
400 | zfsctl_snapshot_unmount_delay_impl(se, delay); | |
401 | zfsctl_snapshot_rele(se); | |
402 | error = 0; | |
403 | } | |
94a40997 | 404 | rw_exit(&zfs_snapshot_lock); |
e10b0808 AX |
405 | |
406 | return (error); | |
407 | } | |
408 | ||
409 | /* | |
410 | * Check if snapname is currently mounted. Returned non-zero when mounted | |
411 | * and zero when unmounted. | |
412 | */ | |
413 | static boolean_t | |
414 | zfsctl_snapshot_ismounted(char *snapname) | |
415 | { | |
416 | zfs_snapentry_t *se; | |
417 | boolean_t ismounted = B_FALSE; | |
418 | ||
94a40997 | 419 | rw_enter(&zfs_snapshot_lock, RW_READER); |
e10b0808 AX |
420 | if ((se = zfsctl_snapshot_find_by_name(snapname)) != NULL) { |
421 | zfsctl_snapshot_rele(se); | |
422 | ismounted = B_TRUE; | |
423 | } | |
94a40997 | 424 | rw_exit(&zfs_snapshot_lock); |
e10b0808 AX |
425 | |
426 | return (ismounted); | |
427 | } | |
428 | ||
429 | /* | |
430 | * Check if the given inode is a part of the virtual .zfs directory. | |
431 | */ | |
ebe7e575 BB |
432 | boolean_t |
433 | zfsctl_is_node(struct inode *ip) | |
434 | { | |
435 | return (ITOZ(ip)->z_is_ctldir); | |
436 | } | |
437 | ||
e10b0808 AX |
438 | /* |
439 | * Check if the given inode is a .zfs/snapshots/snapname directory. | |
440 | */ | |
ebe7e575 BB |
441 | boolean_t |
442 | zfsctl_is_snapdir(struct inode *ip) | |
443 | { | |
444 | return (zfsctl_is_node(ip) && (ip->i_ino <= ZFSCTL_INO_SNAPDIRS)); | |
445 | } | |
446 | ||
447 | /* | |
448 | * Allocate a new inode with the passed id and ops. | |
449 | */ | |
450 | static struct inode * | |
cae5b340 | 451 | zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id, |
ebe7e575 BB |
452 | const struct file_operations *fops, const struct inode_operations *ops) |
453 | { | |
a07c8b41 | 454 | inode_timespec_t now; |
ebe7e575 BB |
455 | struct inode *ip; |
456 | znode_t *zp; | |
457 | ||
cae5b340 | 458 | ip = new_inode(zfsvfs->z_sb); |
ebe7e575 BB |
459 | if (ip == NULL) |
460 | return (NULL); | |
461 | ||
22929307 | 462 | now = current_time(ip); |
ebe7e575 BB |
463 | zp = ITOZ(ip); |
464 | ASSERT3P(zp->z_dirlocks, ==, NULL); | |
465 | ASSERT3P(zp->z_acl_cached, ==, NULL); | |
466 | ASSERT3P(zp->z_xattr_cached, ==, NULL); | |
467 | zp->z_id = id; | |
468 | zp->z_unlinked = 0; | |
469 | zp->z_atime_dirty = 0; | |
470 | zp->z_zn_prefetch = 0; | |
471 | zp->z_moved = 0; | |
472 | zp->z_sa_hdl = NULL; | |
473 | zp->z_blksz = 0; | |
474 | zp->z_seq = 0; | |
475 | zp->z_mapcnt = 0; | |
ebe7e575 | 476 | zp->z_size = 0; |
ebe7e575 | 477 | zp->z_pflags = 0; |
ebe7e575 BB |
478 | zp->z_mode = 0; |
479 | zp->z_sync_cnt = 0; | |
ebe7e575 BB |
480 | zp->z_is_mapped = B_FALSE; |
481 | zp->z_is_ctldir = B_TRUE; | |
482 | zp->z_is_sa = B_FALSE; | |
7b3e34ba | 483 | zp->z_is_stale = B_FALSE; |
cae5b340 | 484 | ip->i_generation = 0; |
ebe7e575 | 485 | ip->i_ino = id; |
cae5b340 | 486 | ip->i_mode = (S_IFDIR | S_IRWXUGO); |
c06d4368 AX |
487 | ip->i_uid = SUID_TO_KUID(0); |
488 | ip->i_gid = SGID_TO_KGID(0); | |
ebe7e575 BB |
489 | ip->i_blkbits = SPA_MINBLOCKSHIFT; |
490 | ip->i_atime = now; | |
491 | ip->i_mtime = now; | |
492 | ip->i_ctime = now; | |
493 | ip->i_fop = fops; | |
494 | ip->i_op = ops; | |
22929307 AX |
495 | #if defined(IOP_XATTR) |
496 | ip->i_opflags &= ~IOP_XATTR; | |
497 | #endif | |
ebe7e575 BB |
498 | |
499 | if (insert_inode_locked(ip)) { | |
500 | unlock_new_inode(ip); | |
501 | iput(ip); | |
502 | return (NULL); | |
503 | } | |
504 | ||
cae5b340 AX |
505 | mutex_enter(&zfsvfs->z_znodes_lock); |
506 | list_insert_tail(&zfsvfs->z_all_znodes, zp); | |
507 | zfsvfs->z_nr_znodes++; | |
ebe7e575 | 508 | membar_producer(); |
cae5b340 | 509 | mutex_exit(&zfsvfs->z_znodes_lock); |
ebe7e575 BB |
510 | |
511 | unlock_new_inode(ip); | |
512 | ||
513 | return (ip); | |
514 | } | |
515 | ||
516 | /* | |
517 | * Lookup the inode with given id, it will be allocated if needed. | |
518 | */ | |
519 | static struct inode * | |
cae5b340 | 520 | zfsctl_inode_lookup(zfsvfs_t *zfsvfs, uint64_t id, |
ebe7e575 BB |
521 | const struct file_operations *fops, const struct inode_operations *ops) |
522 | { | |
523 | struct inode *ip = NULL; | |
524 | ||
525 | while (ip == NULL) { | |
cae5b340 | 526 | ip = ilookup(zfsvfs->z_sb, (unsigned long)id); |
ebe7e575 BB |
527 | if (ip) |
528 | break; | |
529 | ||
530 | /* May fail due to concurrent zfsctl_inode_alloc() */ | |
cae5b340 | 531 | ip = zfsctl_inode_alloc(zfsvfs, id, fops, ops); |
ebe7e575 BB |
532 | } |
533 | ||
534 | return (ip); | |
535 | } | |
536 | ||
ebe7e575 BB |
537 | /* |
538 | * Create the '.zfs' directory. This directory is cached as part of the VFS | |
cae5b340 | 539 | * structure. This results in a hold on the zfsvfs_t. The code in zfs_umount() |
ebe7e575 BB |
540 | * therefore checks against a vfs_count of 2 instead of 1. This reference |
541 | * is removed when the ctldir is destroyed in the unmount. All other entities | |
542 | * under the '.zfs' directory are created dynamically as needed. | |
fc173c85 BB |
543 | * |
544 | * Because the dynamically created '.zfs' directory entries assume the use | |
545 | * of 64-bit inode numbers this support must be disabled on 32-bit systems. | |
ebe7e575 BB |
546 | */ |
547 | int | |
cae5b340 | 548 | zfsctl_create(zfsvfs_t *zfsvfs) |
ebe7e575 | 549 | { |
cae5b340 | 550 | ASSERT(zfsvfs->z_ctldir == NULL); |
ebe7e575 | 551 | |
cae5b340 | 552 | zfsvfs->z_ctldir = zfsctl_inode_alloc(zfsvfs, ZFSCTL_INO_ROOT, |
ebe7e575 | 553 | &zpl_fops_root, &zpl_ops_root); |
cae5b340 | 554 | if (zfsvfs->z_ctldir == NULL) |
a08ee875 | 555 | return (SET_ERROR(ENOENT)); |
ebe7e575 BB |
556 | |
557 | return (0); | |
558 | } | |
559 | ||
560 | /* | |
e10b0808 AX |
561 | * Destroy the '.zfs' directory or remove a snapshot from zfs_snapshots_by_name. |
562 | * Only called when the filesystem is unmounted. | |
ebe7e575 BB |
563 | */ |
564 | void | |
cae5b340 | 565 | zfsctl_destroy(zfsvfs_t *zfsvfs) |
ebe7e575 | 566 | { |
cae5b340 | 567 | if (zfsvfs->z_issnap) { |
e10b0808 | 568 | zfs_snapentry_t *se; |
cae5b340 AX |
569 | spa_t *spa = zfsvfs->z_os->os_spa; |
570 | uint64_t objsetid = dmu_objset_id(zfsvfs->z_os); | |
e10b0808 | 571 | |
94a40997 AX |
572 | rw_enter(&zfs_snapshot_lock, RW_WRITER); |
573 | if ((se = zfsctl_snapshot_find_by_objsetid(spa, objsetid)) | |
574 | != NULL) { | |
e10b0808 AX |
575 | zfsctl_snapshot_unmount_cancel(se); |
576 | zfsctl_snapshot_remove(se); | |
577 | zfsctl_snapshot_rele(se); | |
578 | } | |
94a40997 | 579 | rw_exit(&zfs_snapshot_lock); |
cae5b340 AX |
580 | } else if (zfsvfs->z_ctldir) { |
581 | iput(zfsvfs->z_ctldir); | |
582 | zfsvfs->z_ctldir = NULL; | |
e10b0808 | 583 | } |
ebe7e575 BB |
584 | } |
585 | ||
586 | /* | |
587 | * Given a root znode, retrieve the associated .zfs directory. | |
588 | * Add a hold to the vnode and return it. | |
589 | */ | |
590 | struct inode * | |
591 | zfsctl_root(znode_t *zp) | |
592 | { | |
593 | ASSERT(zfs_has_ctldir(zp)); | |
594 | igrab(ZTOZSB(zp)->z_ctldir); | |
595 | return (ZTOZSB(zp)->z_ctldir); | |
596 | } | |
cae5b340 | 597 | |
e10b0808 | 598 | /* |
cae5b340 AX |
599 | * Generate a long fid to indicate a snapdir. We encode whether snapdir is |
600 | * already monunted in gen field. We do this because nfsd lookup will not | |
601 | * trigger automount. Next time the nfsd does fh_to_dentry, we will notice | |
602 | * this and do automount and return ESTALE to force nfsd revalidate and follow | |
603 | * mount. | |
e10b0808 AX |
604 | */ |
605 | static int | |
606 | zfsctl_snapdir_fid(struct inode *ip, fid_t *fidp) | |
607 | { | |
e10b0808 AX |
608 | zfid_short_t *zfid = (zfid_short_t *)fidp; |
609 | zfid_long_t *zlfid = (zfid_long_t *)fidp; | |
610 | uint32_t gen = 0; | |
611 | uint64_t object; | |
612 | uint64_t objsetid; | |
613 | int i; | |
cae5b340 AX |
614 | struct dentry *dentry; |
615 | ||
616 | if (fidp->fid_len < LONG_FID_LEN) { | |
617 | fidp->fid_len = LONG_FID_LEN; | |
618 | return (SET_ERROR(ENOSPC)); | |
619 | } | |
e10b0808 | 620 | |
cae5b340 | 621 | object = ip->i_ino; |
e10b0808 AX |
622 | objsetid = ZFSCTL_INO_SNAPDIRS - ip->i_ino; |
623 | zfid->zf_len = LONG_FID_LEN; | |
624 | ||
cae5b340 AX |
625 | dentry = d_obtain_alias(igrab(ip)); |
626 | if (!IS_ERR(dentry)) { | |
627 | gen = !!d_mountpoint(dentry); | |
628 | dput(dentry); | |
629 | } | |
630 | ||
e10b0808 AX |
631 | for (i = 0; i < sizeof (zfid->zf_object); i++) |
632 | zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); | |
633 | ||
634 | for (i = 0; i < sizeof (zfid->zf_gen); i++) | |
635 | zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); | |
636 | ||
637 | for (i = 0; i < sizeof (zlfid->zf_setid); i++) | |
638 | zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i)); | |
639 | ||
640 | for (i = 0; i < sizeof (zlfid->zf_setgen); i++) | |
641 | zlfid->zf_setgen[i] = 0; | |
ebe7e575 | 642 | |
e10b0808 AX |
643 | return (0); |
644 | } | |
645 | ||
646 | /* | |
647 | * Generate an appropriate fid for an entry in the .zfs directory. | |
648 | */ | |
ebe7e575 BB |
649 | int |
650 | zfsctl_fid(struct inode *ip, fid_t *fidp) | |
651 | { | |
652 | znode_t *zp = ITOZ(ip); | |
cae5b340 | 653 | zfsvfs_t *zfsvfs = ITOZSB(ip); |
ebe7e575 BB |
654 | uint64_t object = zp->z_id; |
655 | zfid_short_t *zfid; | |
656 | int i; | |
657 | ||
cae5b340 AX |
658 | ZFS_ENTER(zfsvfs); |
659 | ||
660 | if (zfsctl_is_snapdir(ip)) { | |
661 | ZFS_EXIT(zfsvfs); | |
662 | return (zfsctl_snapdir_fid(ip, fidp)); | |
663 | } | |
ebe7e575 BB |
664 | |
665 | if (fidp->fid_len < SHORT_FID_LEN) { | |
666 | fidp->fid_len = SHORT_FID_LEN; | |
cae5b340 | 667 | ZFS_EXIT(zfsvfs); |
a08ee875 | 668 | return (SET_ERROR(ENOSPC)); |
ebe7e575 BB |
669 | } |
670 | ||
671 | zfid = (zfid_short_t *)fidp; | |
672 | ||
673 | zfid->zf_len = SHORT_FID_LEN; | |
674 | ||
675 | for (i = 0; i < sizeof (zfid->zf_object); i++) | |
676 | zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); | |
677 | ||
678 | /* .zfs znodes always have a generation number of 0 */ | |
679 | for (i = 0; i < sizeof (zfid->zf_gen); i++) | |
680 | zfid->zf_gen[i] = 0; | |
681 | ||
cae5b340 | 682 | ZFS_EXIT(zfsvfs); |
ebe7e575 BB |
683 | return (0); |
684 | } | |
685 | ||
e10b0808 AX |
686 | /* |
687 | * Construct a full dataset name in full_name: "pool/dataset@snap_name" | |
688 | */ | |
ebe7e575 | 689 | static int |
cae5b340 | 690 | zfsctl_snapshot_name(zfsvfs_t *zfsvfs, const char *snap_name, int len, |
e10b0808 | 691 | char *full_name) |
ebe7e575 | 692 | { |
cae5b340 | 693 | objset_t *os = zfsvfs->z_os; |
ebe7e575 | 694 | |
e10b0808 | 695 | if (zfs_component_namecheck(snap_name, NULL, NULL) != 0) |
a08ee875 | 696 | return (SET_ERROR(EILSEQ)); |
ebe7e575 | 697 | |
e10b0808 AX |
698 | dmu_objset_name(os, full_name); |
699 | if ((strlen(full_name) + 1 + strlen(snap_name)) >= len) | |
a08ee875 | 700 | return (SET_ERROR(ENAMETOOLONG)); |
ebe7e575 | 701 | |
e10b0808 AX |
702 | (void) strcat(full_name, "@"); |
703 | (void) strcat(full_name, snap_name); | |
ebe7e575 BB |
704 | |
705 | return (0); | |
706 | } | |
707 | ||
a08ee875 | 708 | /* |
e10b0808 | 709 | * Returns full path in full_path: "/pool/dataset/.zfs/snapshot/snap_name/" |
a08ee875 | 710 | */ |
ebe7e575 | 711 | static int |
e10b0808 | 712 | zfsctl_snapshot_path(struct path *path, int len, char *full_path) |
ebe7e575 BB |
713 | { |
714 | char *path_buffer, *path_ptr; | |
715 | int path_len, error = 0; | |
716 | ||
717 | path_buffer = kmem_alloc(len, KM_SLEEP); | |
718 | ||
719 | path_ptr = d_path(path, path_buffer, len); | |
720 | if (IS_ERR(path_ptr)) { | |
721 | error = -PTR_ERR(path_ptr); | |
722 | goto out; | |
723 | } | |
724 | ||
725 | path_len = path_buffer + len - 1 - path_ptr; | |
726 | if (path_len > len) { | |
a08ee875 | 727 | error = SET_ERROR(EFAULT); |
ebe7e575 BB |
728 | goto out; |
729 | } | |
730 | ||
e10b0808 AX |
731 | memcpy(full_path, path_ptr, path_len); |
732 | full_path[path_len] = '\0'; | |
ebe7e575 BB |
733 | out: |
734 | kmem_free(path_buffer, len); | |
735 | ||
736 | return (error); | |
737 | } | |
738 | ||
e10b0808 AX |
739 | /* |
740 | * Returns full path in full_path: "/pool/dataset/.zfs/snapshot/snap_name/" | |
741 | */ | |
742 | static int | |
cae5b340 | 743 | zfsctl_snapshot_path_objset(zfsvfs_t *zfsvfs, uint64_t objsetid, |
e10b0808 AX |
744 | int path_len, char *full_path) |
745 | { | |
cae5b340 | 746 | objset_t *os = zfsvfs->z_os; |
e10b0808 AX |
747 | fstrans_cookie_t cookie; |
748 | char *snapname; | |
749 | boolean_t case_conflict; | |
750 | uint64_t id, pos = 0; | |
751 | int error = 0; | |
752 | ||
cae5b340 | 753 | if (zfsvfs->z_vfs->vfs_mntpoint == NULL) |
e10b0808 AX |
754 | return (ENOENT); |
755 | ||
756 | cookie = spl_fstrans_mark(); | |
cae5b340 | 757 | snapname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); |
e10b0808 AX |
758 | |
759 | while (error == 0) { | |
760 | dsl_pool_config_enter(dmu_objset_pool(os), FTAG); | |
cae5b340 AX |
761 | error = dmu_snapshot_list_next(zfsvfs->z_os, |
762 | ZFS_MAX_DATASET_NAME_LEN, snapname, &id, &pos, | |
763 | &case_conflict); | |
e10b0808 AX |
764 | dsl_pool_config_exit(dmu_objset_pool(os), FTAG); |
765 | if (error) | |
766 | goto out; | |
767 | ||
768 | if (id == objsetid) | |
769 | break; | |
770 | } | |
771 | ||
772 | memset(full_path, 0, path_len); | |
773 | snprintf(full_path, path_len - 1, "%s/.zfs/snapshot/%s", | |
cae5b340 | 774 | zfsvfs->z_vfs->vfs_mntpoint, snapname); |
e10b0808 | 775 | out: |
cae5b340 | 776 | kmem_free(snapname, ZFS_MAX_DATASET_NAME_LEN); |
e10b0808 AX |
777 | spl_fstrans_unmark(cookie); |
778 | ||
779 | return (error); | |
780 | } | |
781 | ||
ebe7e575 BB |
782 | /* |
783 | * Special case the handling of "..". | |
784 | */ | |
ebe7e575 BB |
785 | int |
786 | zfsctl_root_lookup(struct inode *dip, char *name, struct inode **ipp, | |
787 | int flags, cred_t *cr, int *direntflags, pathname_t *realpnp) | |
788 | { | |
cae5b340 | 789 | zfsvfs_t *zfsvfs = ITOZSB(dip); |
ebe7e575 BB |
790 | int error = 0; |
791 | ||
cae5b340 | 792 | ZFS_ENTER(zfsvfs); |
ebe7e575 BB |
793 | |
794 | if (strcmp(name, "..") == 0) { | |
795 | *ipp = dip->i_sb->s_root->d_inode; | |
796 | } else if (strcmp(name, ZFS_SNAPDIR_NAME) == 0) { | |
cae5b340 | 797 | *ipp = zfsctl_inode_lookup(zfsvfs, ZFSCTL_INO_SNAPDIR, |
ebe7e575 BB |
798 | &zpl_fops_snapdir, &zpl_ops_snapdir); |
799 | } else if (strcmp(name, ZFS_SHAREDIR_NAME) == 0) { | |
cae5b340 | 800 | *ipp = zfsctl_inode_lookup(zfsvfs, ZFSCTL_INO_SHARES, |
ebe7e575 BB |
801 | &zpl_fops_shares, &zpl_ops_shares); |
802 | } else { | |
803 | *ipp = NULL; | |
804 | } | |
805 | ||
806 | if (*ipp == NULL) | |
a08ee875 | 807 | error = SET_ERROR(ENOENT); |
ebe7e575 | 808 | |
cae5b340 | 809 | ZFS_EXIT(zfsvfs); |
ebe7e575 BB |
810 | |
811 | return (error); | |
812 | } | |
813 | ||
814 | /* | |
815 | * Lookup entry point for the 'snapshot' directory. Try to open the | |
816 | * snapshot if it exist, creating the pseudo filesystem inode as necessary. | |
817 | * Perform a mount of the associated dataset on top of the inode. | |
818 | */ | |
ebe7e575 BB |
819 | int |
820 | zfsctl_snapdir_lookup(struct inode *dip, char *name, struct inode **ipp, | |
821 | int flags, cred_t *cr, int *direntflags, pathname_t *realpnp) | |
822 | { | |
cae5b340 | 823 | zfsvfs_t *zfsvfs = ITOZSB(dip); |
ebe7e575 BB |
824 | uint64_t id; |
825 | int error; | |
826 | ||
cae5b340 | 827 | ZFS_ENTER(zfsvfs); |
ebe7e575 | 828 | |
cae5b340 | 829 | error = dmu_snapshot_lookup(zfsvfs->z_os, name, &id); |
ebe7e575 | 830 | if (error) { |
cae5b340 | 831 | ZFS_EXIT(zfsvfs); |
ebe7e575 BB |
832 | return (error); |
833 | } | |
834 | ||
cae5b340 | 835 | *ipp = zfsctl_inode_lookup(zfsvfs, ZFSCTL_INO_SNAPDIRS - id, |
ebe7e575 | 836 | &simple_dir_operations, &simple_dir_inode_operations); |
e10b0808 | 837 | if (*ipp == NULL) |
a08ee875 | 838 | error = SET_ERROR(ENOENT); |
ebe7e575 | 839 | |
cae5b340 | 840 | ZFS_EXIT(zfsvfs); |
ebe7e575 BB |
841 | |
842 | return (error); | |
843 | } | |
844 | ||
ebe7e575 BB |
845 | /* |
846 | * Renaming a directory under '.zfs/snapshot' will automatically trigger | |
847 | * a rename of the snapshot to the new given name. The rename is confined | |
848 | * to the '.zfs/snapshot' directory snapshots cannot be moved elsewhere. | |
849 | */ | |
ebe7e575 | 850 | int |
a08ee875 LG |
851 | zfsctl_snapdir_rename(struct inode *sdip, char *snm, |
852 | struct inode *tdip, char *tnm, cred_t *cr, int flags) | |
ebe7e575 | 853 | { |
cae5b340 | 854 | zfsvfs_t *zfsvfs = ITOZSB(sdip); |
a08ee875 | 855 | char *to, *from, *real, *fsname; |
ebe7e575 BB |
856 | int error; |
857 | ||
e10b0808 AX |
858 | if (!zfs_admin_snapshot) |
859 | return (EACCES); | |
860 | ||
cae5b340 | 861 | ZFS_ENTER(zfsvfs); |
ebe7e575 | 862 | |
cae5b340 AX |
863 | to = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); |
864 | from = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); | |
865 | real = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); | |
866 | fsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); | |
ebe7e575 | 867 | |
cae5b340 AX |
868 | if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) { |
869 | error = dmu_snapshot_realname(zfsvfs->z_os, snm, real, | |
870 | ZFS_MAX_DATASET_NAME_LEN, NULL); | |
ebe7e575 | 871 | if (error == 0) { |
a08ee875 | 872 | snm = real; |
ebe7e575 BB |
873 | } else if (error != ENOTSUP) { |
874 | goto out; | |
875 | } | |
876 | } | |
877 | ||
cae5b340 | 878 | dmu_objset_name(zfsvfs->z_os, fsname); |
a08ee875 | 879 | |
cae5b340 AX |
880 | error = zfsctl_snapshot_name(ITOZSB(sdip), snm, |
881 | ZFS_MAX_DATASET_NAME_LEN, from); | |
a08ee875 | 882 | if (error == 0) |
cae5b340 AX |
883 | error = zfsctl_snapshot_name(ITOZSB(tdip), tnm, |
884 | ZFS_MAX_DATASET_NAME_LEN, to); | |
a08ee875 | 885 | if (error == 0) |
ebe7e575 | 886 | error = zfs_secpolicy_rename_perms(from, to, cr); |
a08ee875 | 887 | if (error != 0) |
ebe7e575 BB |
888 | goto out; |
889 | ||
890 | /* | |
891 | * Cannot move snapshots out of the snapdir. | |
892 | */ | |
893 | if (sdip != tdip) { | |
a08ee875 | 894 | error = SET_ERROR(EINVAL); |
ebe7e575 BB |
895 | goto out; |
896 | } | |
897 | ||
898 | /* | |
899 | * No-op when names are identical. | |
900 | */ | |
a08ee875 | 901 | if (strcmp(snm, tnm) == 0) { |
ebe7e575 BB |
902 | error = 0; |
903 | goto out; | |
904 | } | |
905 | ||
94a40997 | 906 | rw_enter(&zfs_snapshot_lock, RW_WRITER); |
ebe7e575 | 907 | |
a08ee875 | 908 | error = dsl_dataset_rename_snapshot(fsname, snm, tnm, B_FALSE); |
e10b0808 AX |
909 | if (error == 0) |
910 | (void) zfsctl_snapshot_rename(snm, tnm); | |
ebe7e575 | 911 | |
94a40997 | 912 | rw_exit(&zfs_snapshot_lock); |
ebe7e575 | 913 | out: |
cae5b340 AX |
914 | kmem_free(from, ZFS_MAX_DATASET_NAME_LEN); |
915 | kmem_free(to, ZFS_MAX_DATASET_NAME_LEN); | |
916 | kmem_free(real, ZFS_MAX_DATASET_NAME_LEN); | |
917 | kmem_free(fsname, ZFS_MAX_DATASET_NAME_LEN); | |
ebe7e575 | 918 | |
cae5b340 | 919 | ZFS_EXIT(zfsvfs); |
ebe7e575 BB |
920 | |
921 | return (error); | |
922 | } | |
923 | ||
924 | /* | |
925 | * Removing a directory under '.zfs/snapshot' will automatically trigger | |
926 | * the removal of the snapshot with the given name. | |
927 | */ | |
ebe7e575 BB |
928 | int |
929 | zfsctl_snapdir_remove(struct inode *dip, char *name, cred_t *cr, int flags) | |
930 | { | |
cae5b340 | 931 | zfsvfs_t *zfsvfs = ITOZSB(dip); |
ebe7e575 BB |
932 | char *snapname, *real; |
933 | int error; | |
934 | ||
e10b0808 AX |
935 | if (!zfs_admin_snapshot) |
936 | return (EACCES); | |
937 | ||
cae5b340 | 938 | ZFS_ENTER(zfsvfs); |
ebe7e575 | 939 | |
cae5b340 AX |
940 | snapname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); |
941 | real = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); | |
ebe7e575 | 942 | |
cae5b340 AX |
943 | if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) { |
944 | error = dmu_snapshot_realname(zfsvfs->z_os, name, real, | |
945 | ZFS_MAX_DATASET_NAME_LEN, NULL); | |
ebe7e575 BB |
946 | if (error == 0) { |
947 | name = real; | |
948 | } else if (error != ENOTSUP) { | |
949 | goto out; | |
950 | } | |
951 | } | |
952 | ||
cae5b340 AX |
953 | error = zfsctl_snapshot_name(ITOZSB(dip), name, |
954 | ZFS_MAX_DATASET_NAME_LEN, snapname); | |
a08ee875 | 955 | if (error == 0) |
ebe7e575 | 956 | error = zfs_secpolicy_destroy_perms(snapname, cr); |
a08ee875 | 957 | if (error != 0) |
ebe7e575 BB |
958 | goto out; |
959 | ||
e10b0808 | 960 | error = zfsctl_snapshot_unmount(snapname, MNT_FORCE); |
ebe7e575 | 961 | if ((error == 0) || (error == ENOENT)) |
a08ee875 | 962 | error = dsl_destroy_snapshot(snapname, B_FALSE); |
ebe7e575 | 963 | out: |
cae5b340 AX |
964 | kmem_free(snapname, ZFS_MAX_DATASET_NAME_LEN); |
965 | kmem_free(real, ZFS_MAX_DATASET_NAME_LEN); | |
ebe7e575 | 966 | |
cae5b340 | 967 | ZFS_EXIT(zfsvfs); |
ebe7e575 BB |
968 | |
969 | return (error); | |
970 | } | |
971 | ||
972 | /* | |
973 | * Creating a directory under '.zfs/snapshot' will automatically trigger | |
974 | * the creation of a new snapshot with the given name. | |
975 | */ | |
ebe7e575 BB |
976 | int |
977 | zfsctl_snapdir_mkdir(struct inode *dip, char *dirname, vattr_t *vap, | |
cae5b340 | 978 | struct inode **ipp, cred_t *cr, int flags) |
ebe7e575 | 979 | { |
cae5b340 | 980 | zfsvfs_t *zfsvfs = ITOZSB(dip); |
ebe7e575 BB |
981 | char *dsname; |
982 | int error; | |
983 | ||
e10b0808 AX |
984 | if (!zfs_admin_snapshot) |
985 | return (EACCES); | |
986 | ||
cae5b340 | 987 | dsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); |
ebe7e575 | 988 | |
ea04106b | 989 | if (zfs_component_namecheck(dirname, NULL, NULL) != 0) { |
a08ee875 | 990 | error = SET_ERROR(EILSEQ); |
ebe7e575 BB |
991 | goto out; |
992 | } | |
993 | ||
cae5b340 | 994 | dmu_objset_name(zfsvfs->z_os, dsname); |
ebe7e575 BB |
995 | |
996 | error = zfs_secpolicy_snapshot_perms(dsname, cr); | |
a08ee875 | 997 | if (error != 0) |
ebe7e575 BB |
998 | goto out; |
999 | ||
1000 | if (error == 0) { | |
a08ee875 LG |
1001 | error = dmu_objset_snapshot_one(dsname, dirname); |
1002 | if (error != 0) | |
ebe7e575 BB |
1003 | goto out; |
1004 | ||
1005 | error = zfsctl_snapdir_lookup(dip, dirname, ipp, | |
1006 | 0, cr, NULL, NULL); | |
1007 | } | |
1008 | out: | |
cae5b340 | 1009 | kmem_free(dsname, ZFS_MAX_DATASET_NAME_LEN); |
ebe7e575 BB |
1010 | |
1011 | return (error); | |
1012 | } | |
1013 | ||
ebe7e575 BB |
1014 | /* |
1015 | * Attempt to unmount a snapshot by making a call to user space. | |
1016 | * There is no assurance that this can or will succeed, is just a | |
1017 | * best effort. In the case where it does fail, perhaps because | |
1018 | * it's in use, the unmount will fail harmlessly. | |
1019 | */ | |
e10b0808 AX |
1020 | int |
1021 | zfsctl_snapshot_unmount(char *snapname, int flags) | |
ebe7e575 | 1022 | { |
68d83c55 AX |
1023 | char *argv[] = { "/usr/bin/env", "umount", "-t", "zfs", "-n", NULL, |
1024 | NULL }; | |
ebe7e575 | 1025 | char *envp[] = { NULL }; |
e10b0808 | 1026 | zfs_snapentry_t *se; |
ebe7e575 BB |
1027 | int error; |
1028 | ||
94a40997 | 1029 | rw_enter(&zfs_snapshot_lock, RW_READER); |
e10b0808 | 1030 | if ((se = zfsctl_snapshot_find_by_name(snapname)) == NULL) { |
94a40997 | 1031 | rw_exit(&zfs_snapshot_lock); |
e10b0808 AX |
1032 | return (ENOENT); |
1033 | } | |
94a40997 | 1034 | rw_exit(&zfs_snapshot_lock); |
e10b0808 | 1035 | |
68d83c55 AX |
1036 | if (flags & MNT_FORCE) |
1037 | argv[4] = "-fn"; | |
1038 | argv[5] = se->se_path; | |
e10b0808 | 1039 | dprintf("unmount; path=%s\n", se->se_path); |
761394b3 | 1040 | error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); |
68d83c55 | 1041 | zfsctl_snapshot_rele(se); |
ebe7e575 | 1042 | |
e10b0808 | 1043 | |
ebe7e575 BB |
1044 | /* |
1045 | * The umount system utility will return 256 on error. We must | |
1046 | * assume this error is because the file system is busy so it is | |
1047 | * converted to the more sensible EBUSY. | |
1048 | */ | |
1049 | if (error) | |
a08ee875 | 1050 | error = SET_ERROR(EBUSY); |
ebe7e575 | 1051 | |
ebe7e575 BB |
1052 | return (error); |
1053 | } | |
1054 | ||
a08ee875 | 1055 | #define MOUNT_BUSY 0x80 /* Mount failed due to EBUSY (from mntent.h) */ |
ebe7e575 BB |
1056 | |
1057 | int | |
e10b0808 | 1058 | zfsctl_snapshot_mount(struct path *path, int flags) |
ebe7e575 BB |
1059 | { |
1060 | struct dentry *dentry = path->dentry; | |
1061 | struct inode *ip = dentry->d_inode; | |
cae5b340 AX |
1062 | zfsvfs_t *zfsvfs; |
1063 | zfsvfs_t *snap_zfsvfs; | |
e10b0808 | 1064 | zfs_snapentry_t *se; |
ebe7e575 | 1065 | char *full_name, *full_path; |
68d83c55 AX |
1066 | char *argv[] = { "/usr/bin/env", "mount", "-t", "zfs", "-n", NULL, NULL, |
1067 | NULL }; | |
ebe7e575 BB |
1068 | char *envp[] = { NULL }; |
1069 | int error; | |
94a40997 | 1070 | struct path spath; |
ebe7e575 | 1071 | |
e10b0808 AX |
1072 | if (ip == NULL) |
1073 | return (EISDIR); | |
1074 | ||
cae5b340 AX |
1075 | zfsvfs = ITOZSB(ip); |
1076 | ZFS_ENTER(zfsvfs); | |
ebe7e575 | 1077 | |
cae5b340 | 1078 | full_name = kmem_zalloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); |
e10b0808 | 1079 | full_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP); |
ebe7e575 | 1080 | |
cae5b340 AX |
1081 | error = zfsctl_snapshot_name(zfsvfs, dname(dentry), |
1082 | ZFS_MAX_DATASET_NAME_LEN, full_name); | |
ebe7e575 BB |
1083 | if (error) |
1084 | goto error; | |
1085 | ||
e10b0808 | 1086 | error = zfsctl_snapshot_path(path, MAXPATHLEN, full_path); |
ebe7e575 BB |
1087 | if (error) |
1088 | goto error; | |
1089 | ||
e10b0808 AX |
1090 | /* |
1091 | * Multiple concurrent automounts of a snapshot are never allowed. | |
1092 | * The snapshot may be manually mounted as many times as desired. | |
1093 | */ | |
1094 | if (zfsctl_snapshot_ismounted(full_name)) { | |
94a40997 | 1095 | error = 0; |
e10b0808 AX |
1096 | goto error; |
1097 | } | |
1098 | ||
ebe7e575 BB |
1099 | /* |
1100 | * Attempt to mount the snapshot from user space. Normally this | |
1101 | * would be done using the vfs_kern_mount() function, however that | |
1102 | * function is marked GPL-only and cannot be used. On error we | |
1103 | * careful to log the real error to the console and return EISDIR | |
1104 | * to safely abort the automount. This should be very rare. | |
a08ee875 LG |
1105 | * |
1106 | * If the user mode helper happens to return EBUSY, a concurrent | |
1107 | * mount is already in progress in which case the error is ignored. | |
1108 | * Take note that if the program was executed successfully the return | |
1109 | * value from call_usermodehelper() will be (exitcode << 8 + signal). | |
ebe7e575 | 1110 | */ |
e10b0808 | 1111 | dprintf("mount; name=%s path=%s\n", full_name, full_path); |
68d83c55 AX |
1112 | argv[5] = full_name; |
1113 | argv[6] = full_path; | |
761394b3 | 1114 | error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); |
94a40997 AX |
1115 | if (error) { |
1116 | if (!(error & MOUNT_BUSY << 8)) { | |
1117 | cmn_err(CE_WARN, "Unable to automount %s/%s: %d", | |
1118 | full_path, full_name, error); | |
1119 | error = SET_ERROR(EISDIR); | |
1120 | } else { | |
1121 | /* | |
1122 | * EBUSY, this could mean a concurrent mount, or the | |
1123 | * snapshot has already been mounted at completely | |
1124 | * different place. We return 0 so VFS will retry. For | |
1125 | * the latter case the VFS will retry several times | |
1126 | * and return ELOOP, which is probably not a very good | |
1127 | * behavior. | |
1128 | */ | |
1129 | error = 0; | |
1130 | } | |
ebe7e575 BB |
1131 | goto error; |
1132 | } | |
1133 | ||
ebe7e575 | 1134 | /* |
e10b0808 AX |
1135 | * Follow down in to the mounted snapshot and set MNT_SHRINKABLE |
1136 | * to identify this as an automounted filesystem. | |
ebe7e575 | 1137 | */ |
94a40997 AX |
1138 | spath = *path; |
1139 | path_get(&spath); | |
1140 | if (zpl_follow_down_one(&spath)) { | |
cae5b340 AX |
1141 | snap_zfsvfs = ITOZSB(spath.dentry->d_inode); |
1142 | snap_zfsvfs->z_parent = zfsvfs; | |
94a40997 AX |
1143 | dentry = spath.dentry; |
1144 | spath.mnt->mnt_flags |= MNT_SHRINKABLE; | |
1145 | ||
1146 | rw_enter(&zfs_snapshot_lock, RW_WRITER); | |
1147 | se = zfsctl_snapshot_alloc(full_name, full_path, | |
cae5b340 | 1148 | snap_zfsvfs->z_os->os_spa, dmu_objset_id(snap_zfsvfs->z_os), |
94a40997 AX |
1149 | dentry); |
1150 | zfsctl_snapshot_add(se); | |
1151 | zfsctl_snapshot_unmount_delay_impl(se, zfs_expire_snapshot); | |
1152 | rw_exit(&zfs_snapshot_lock); | |
1153 | } | |
1154 | path_put(&spath); | |
ebe7e575 | 1155 | error: |
cae5b340 | 1156 | kmem_free(full_name, ZFS_MAX_DATASET_NAME_LEN); |
e10b0808 | 1157 | kmem_free(full_path, MAXPATHLEN); |
ebe7e575 | 1158 | |
cae5b340 | 1159 | ZFS_EXIT(zfsvfs); |
ebe7e575 BB |
1160 | |
1161 | return (error); | |
1162 | } | |
1163 | ||
1164 | /* | |
cae5b340 | 1165 | * Get the snapdir inode from fid |
ebe7e575 | 1166 | */ |
ebe7e575 | 1167 | int |
cae5b340 AX |
1168 | zfsctl_snapdir_vget(struct super_block *sb, uint64_t objsetid, int gen, |
1169 | struct inode **ipp) | |
ebe7e575 | 1170 | { |
ebe7e575 | 1171 | int error; |
cae5b340 AX |
1172 | struct path path; |
1173 | char *mnt; | |
1174 | struct dentry *dentry; | |
ebe7e575 | 1175 | |
cae5b340 | 1176 | mnt = kmem_alloc(MAXPATHLEN, KM_SLEEP); |
e10b0808 | 1177 | |
cae5b340 AX |
1178 | error = zfsctl_snapshot_path_objset(sb->s_fs_info, objsetid, |
1179 | MAXPATHLEN, mnt); | |
1180 | if (error) | |
1181 | goto out; | |
ebe7e575 | 1182 | |
cae5b340 | 1183 | /* Trigger automount */ |
a07c8b41 | 1184 | error = -kern_path(mnt, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &path); |
cae5b340 AX |
1185 | if (error) |
1186 | goto out; | |
ebe7e575 | 1187 | |
cae5b340 | 1188 | path_put(&path); |
e10b0808 | 1189 | /* |
cae5b340 AX |
1190 | * Get the snapdir inode. Note, we don't want to use the above |
1191 | * path because it contains the root of the snapshot rather | |
1192 | * than the snapdir. | |
e10b0808 | 1193 | */ |
cae5b340 AX |
1194 | *ipp = ilookup(sb, ZFSCTL_INO_SNAPDIRS - objsetid); |
1195 | if (*ipp == NULL) { | |
1196 | error = SET_ERROR(ENOENT); | |
1197 | goto out; | |
ebe7e575 | 1198 | } |
ebe7e575 | 1199 | |
cae5b340 AX |
1200 | /* check gen, see zfsctl_snapdir_fid */ |
1201 | dentry = d_obtain_alias(igrab(*ipp)); | |
1202 | if (gen != (!IS_ERR(dentry) && d_mountpoint(dentry))) { | |
1203 | iput(*ipp); | |
1204 | *ipp = NULL; | |
1205 | error = SET_ERROR(ENOENT); | |
1206 | } | |
1207 | if (!IS_ERR(dentry)) | |
1208 | dput(dentry); | |
1209 | out: | |
1210 | kmem_free(mnt, MAXPATHLEN); | |
ebe7e575 BB |
1211 | return (error); |
1212 | } | |
1213 | ||
ebe7e575 BB |
1214 | int |
1215 | zfsctl_shares_lookup(struct inode *dip, char *name, struct inode **ipp, | |
1216 | int flags, cred_t *cr, int *direntflags, pathname_t *realpnp) | |
1217 | { | |
cae5b340 | 1218 | zfsvfs_t *zfsvfs = ITOZSB(dip); |
ebe7e575 BB |
1219 | struct inode *ip; |
1220 | znode_t *dzp; | |
1221 | int error; | |
1222 | ||
cae5b340 | 1223 | ZFS_ENTER(zfsvfs); |
ebe7e575 | 1224 | |
cae5b340 AX |
1225 | if (zfsvfs->z_shares_dir == 0) { |
1226 | ZFS_EXIT(zfsvfs); | |
a08ee875 | 1227 | return (SET_ERROR(ENOTSUP)); |
ebe7e575 BB |
1228 | } |
1229 | ||
cae5b340 AX |
1230 | if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) { |
1231 | error = zfs_lookup(ZTOI(dzp), name, &ip, 0, cr, NULL, NULL); | |
1232 | iput(ZTOI(dzp)); | |
ebe7e575 BB |
1233 | } |
1234 | ||
cae5b340 | 1235 | ZFS_EXIT(zfsvfs); |
ebe7e575 BB |
1236 | |
1237 | return (error); | |
1238 | } | |
1239 | ||
ebe7e575 BB |
1240 | /* |
1241 | * Initialize the various pieces we'll need to create and manipulate .zfs | |
1242 | * directories. Currently this is unused but available. | |
1243 | */ | |
1244 | void | |
1245 | zfsctl_init(void) | |
1246 | { | |
e10b0808 AX |
1247 | avl_create(&zfs_snapshots_by_name, snapentry_compare_by_name, |
1248 | sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t, | |
1249 | se_node_name)); | |
1250 | avl_create(&zfs_snapshots_by_objsetid, snapentry_compare_by_objsetid, | |
1251 | sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t, | |
1252 | se_node_objsetid)); | |
94a40997 | 1253 | rw_init(&zfs_snapshot_lock, NULL, RW_DEFAULT, NULL); |
ebe7e575 BB |
1254 | } |
1255 | ||
1256 | /* | |
1257 | * Cleanup the various pieces we needed for .zfs directories. In particular | |
1258 | * ensure the expiry timer is canceled safely. | |
1259 | */ | |
1260 | void | |
1261 | zfsctl_fini(void) | |
1262 | { | |
e10b0808 AX |
1263 | avl_destroy(&zfs_snapshots_by_name); |
1264 | avl_destroy(&zfs_snapshots_by_objsetid); | |
94a40997 | 1265 | rw_destroy(&zfs_snapshot_lock); |
ebe7e575 BB |
1266 | } |
1267 | ||
e10b0808 AX |
1268 | module_param(zfs_admin_snapshot, int, 0644); |
1269 | MODULE_PARM_DESC(zfs_admin_snapshot, "Enable mkdir/rmdir/mv in .zfs/snapshot"); | |
1270 | ||
ebe7e575 BB |
1271 | module_param(zfs_expire_snapshot, int, 0644); |
1272 | MODULE_PARM_DESC(zfs_expire_snapshot, "Seconds to expire .zfs/snapshot"); |