4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (C) 2011 Lawrence Livermore National Security, LLC.
25 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
27 * Rewritten for Linux by:
28 * Rohan Puri <rohan.puri15@gmail.com>
29 * Brian Behlendorf <behlendorf1@llnl.gov>
30 * Copyright (c) 2013 by Delphix. All rights reserved.
34 * ZFS control directory (a.k.a. ".zfs")
36 * This directory provides a common location for all ZFS meta-objects.
37 * Currently, this is only the 'snapshot' and 'shares' directory, but this may
38 * expand in the future. The elements are built dynamically, as the hierarchy
39 * does not actually exist on disk.
41 * For 'snapshot', we don't want to have all snapshots always mounted, because
42 * this would take up a huge amount of space in /etc/mnttab. We have three
45 * ctldir ------> snapshotdir -------> snapshot
51 * The 'snapshot' node contains just enough information to lookup '..' and act
52 * as a mountpoint for the snapshot. Whenever we lookup a specific snapshot, we
53 * perform an automount of the underlying filesystem and return the
54 * corresponding inode.
56 * All mounts are handled automatically by an user mode helper which invokes
57 * the mount mount procedure. Unmounts are handled by allowing the mount
58 * point to expire so the kernel may automatically unmount it.
60 * The '.zfs', '.zfs/snapshot', and all directories created under
61 * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') all share the same
62 * share the same zfs_sb_t as the head filesystem (what '.zfs' lives under).
64 * File systems mounted on top of the '.zfs/snapshot/<snapname>' paths
65 * (ie: snapshots) are complete ZFS filesystems and have their own unique
66 * zfs_sb_t. However, the fsid reported by these mounts will be the same
67 * as that used by the parent zfs_sb_t to make NFS happy.
70 #include <sys/types.h>
71 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/sysmacros.h>
75 #include <sys/pathname.h>
77 #include <sys/vfs_opreg.h>
78 #include <sys/zfs_ctldir.h>
79 #include <sys/zfs_ioctl.h>
80 #include <sys/zfs_vfsops.h>
81 #include <sys/zfs_vnops.h>
84 #include <sys/dsl_destroy.h>
85 #include <sys/dsl_deleg.h>
86 #include <sys/mount.h>
88 #include "zfs_namecheck.h"
91 * Two AVL trees are maintained which contain all currently automounted
92 * snapshots. Every automounted snapshots maps to a single zfs_snapentry_t
95 * - be attached to both trees, and
96 * - be unique, no duplicate entries are allowed.
98 * The zfs_snapshots_by_name tree is indexed by the full dataset name
99 * while the zfs_snapshots_by_objsetid tree is indexed by the unique
100 * objsetid. This allows for fast lookups either by name or objsetid.
102 static avl_tree_t zfs_snapshots_by_name
;
103 static avl_tree_t zfs_snapshots_by_objsetid
;
104 static kmutex_t zfs_snapshot_lock
;
107 * Control Directory Tunables (.zfs)
109 int zfs_expire_snapshot
= ZFSCTL_EXPIRE_SNAPSHOT
;
110 int zfs_admin_snapshot
= 0;
113 * Dedicated task queue for unmounting snapshots.
115 static taskq_t
*zfs_expire_taskq
;
118 char *se_name
; /* full snapshot name */
119 char *se_path
; /* full mount path */
120 uint64_t se_objsetid
; /* snapshot objset id */
121 struct dentry
*se_root_dentry
; /* snapshot root dentry */
122 taskqid_t se_taskqid
; /* scheduled unmount taskqid */
123 avl_node_t se_node_name
; /* zfs_snapshots_by_name link */
124 avl_node_t se_node_objsetid
; /* zfs_snapshots_by_objsetid link */
125 refcount_t se_refcount
; /* reference count */
128 static void zfsctl_snapshot_unmount_delay_impl(zfs_snapentry_t
*se
, int delay
);
131 * Allocate a new zfs_snapentry_t being careful to make a copy of the
132 * the snapshot name and provided mount point. No reference is taken.
134 static zfs_snapentry_t
*
135 zfsctl_snapshot_alloc(char *full_name
, char *full_path
, uint64_t objsetid
,
136 struct dentry
*root_dentry
)
140 se
= kmem_zalloc(sizeof (zfs_snapentry_t
), KM_SLEEP
);
142 se
->se_name
= strdup(full_name
);
143 se
->se_path
= strdup(full_path
);
144 se
->se_objsetid
= objsetid
;
145 se
->se_root_dentry
= root_dentry
;
148 refcount_create(&se
->se_refcount
);
154 * Free a zfs_snapentry_t the called must ensure there are no active
158 zfsctl_snapshot_free(zfs_snapentry_t
*se
)
160 refcount_destroy(&se
->se_refcount
);
161 strfree(se
->se_name
);
162 strfree(se
->se_path
);
164 kmem_free(se
, sizeof (zfs_snapentry_t
));
168 * Hold a reference on the zfs_snapentry_t.
171 zfsctl_snapshot_hold(zfs_snapentry_t
*se
)
173 refcount_add(&se
->se_refcount
, NULL
);
177 * Release a reference on the zfs_snapentry_t. When the number of
178 * references drops to zero the structure will be freed.
181 zfsctl_snapshot_rele(zfs_snapentry_t
*se
)
183 if (refcount_remove(&se
->se_refcount
, NULL
) == 0)
184 zfsctl_snapshot_free(se
);
188 * Add a zfs_snapentry_t to both the zfs_snapshots_by_name and
189 * zfs_snapshots_by_objsetid trees. While the zfs_snapentry_t is part
190 * of the trees a reference is held.
193 zfsctl_snapshot_add(zfs_snapentry_t
*se
)
195 ASSERT(MUTEX_HELD(&zfs_snapshot_lock
));
196 refcount_add(&se
->se_refcount
, NULL
);
197 avl_add(&zfs_snapshots_by_name
, se
);
198 avl_add(&zfs_snapshots_by_objsetid
, se
);
202 * Remove a zfs_snapentry_t from both the zfs_snapshots_by_name and
203 * zfs_snapshots_by_objsetid trees. Upon removal a reference is dropped,
204 * this can result in the structure being freed if that was the last
205 * remaining reference.
208 zfsctl_snapshot_remove(zfs_snapentry_t
*se
)
210 ASSERT(MUTEX_HELD(&zfs_snapshot_lock
));
211 avl_remove(&zfs_snapshots_by_name
, se
);
212 avl_remove(&zfs_snapshots_by_objsetid
, se
);
213 zfsctl_snapshot_rele(se
);
217 * Snapshot name comparison function for the zfs_snapshots_by_name.
220 snapentry_compare_by_name(const void *a
, const void *b
)
222 const zfs_snapentry_t
*se_a
= a
;
223 const zfs_snapentry_t
*se_b
= b
;
226 ret
= strcmp(se_a
->se_name
, se_b
->se_name
);
237 * Snapshot name comparison function for the zfs_snapshots_by_objsetid.
240 snapentry_compare_by_objsetid(const void *a
, const void *b
)
242 const zfs_snapentry_t
*se_a
= a
;
243 const zfs_snapentry_t
*se_b
= b
;
245 if (se_a
->se_objsetid
< se_b
->se_objsetid
)
247 else if (se_a
->se_objsetid
> se_b
->se_objsetid
)
254 * Find a zfs_snapentry_t in zfs_snapshots_by_name. If the snapname
255 * is found a pointer to the zfs_snapentry_t is returned and a reference
256 * taken on the structure. The caller is responsible for dropping the
257 * reference with zfsctl_snapshot_rele(). If the snapname is not found
258 * NULL will be returned.
260 static zfs_snapentry_t
*
261 zfsctl_snapshot_find_by_name(char *snapname
)
263 zfs_snapentry_t
*se
, search
;
265 ASSERT(MUTEX_HELD(&zfs_snapshot_lock
));
267 search
.se_name
= snapname
;
268 se
= avl_find(&zfs_snapshots_by_name
, &search
, NULL
);
270 refcount_add(&se
->se_refcount
, NULL
);
276 * Find a zfs_snapentry_t in zfs_snapshots_by_objsetid given the objset id
277 * rather than the snapname. In all other respects it behaves the same
278 * as zfsctl_snapshot_find_by_name().
280 static zfs_snapentry_t
*
281 zfsctl_snapshot_find_by_objsetid(uint64_t objsetid
)
283 zfs_snapentry_t
*se
, search
;
285 ASSERT(MUTEX_HELD(&zfs_snapshot_lock
));
287 search
.se_objsetid
= objsetid
;
288 se
= avl_find(&zfs_snapshots_by_objsetid
, &search
, NULL
);
290 refcount_add(&se
->se_refcount
, NULL
);
296 * Rename a zfs_snapentry_t in the zfs_snapshots_by_name. The structure is
297 * removed, renamed, and added back to the new correct location in the tree.
300 zfsctl_snapshot_rename(char *old_snapname
, char *new_snapname
)
304 ASSERT(MUTEX_HELD(&zfs_snapshot_lock
));
306 se
= zfsctl_snapshot_find_by_name(old_snapname
);
310 zfsctl_snapshot_remove(se
);
311 strfree(se
->se_name
);
312 se
->se_name
= strdup(new_snapname
);
313 zfsctl_snapshot_add(se
);
314 zfsctl_snapshot_rele(se
);
320 * Delayed task responsible for unmounting an expired automounted snapshot.
323 snapentry_expire(void *data
)
325 zfs_snapentry_t
*se
= (zfs_snapentry_t
*)data
;
326 uint64_t objsetid
= se
->se_objsetid
;
329 (void) zfsctl_snapshot_unmount(se
->se_name
, MNT_EXPIRE
);
330 zfsctl_snapshot_rele(se
);
333 * Reschedule the unmount if the zfs_snapentry_t wasn't removed.
334 * This can occur when the snapshot is busy.
336 mutex_enter(&zfs_snapshot_lock
);
337 if ((se
= zfsctl_snapshot_find_by_objsetid(objsetid
)) != NULL
) {
338 zfsctl_snapshot_unmount_delay_impl(se
, zfs_expire_snapshot
);
339 zfsctl_snapshot_rele(se
);
341 mutex_exit(&zfs_snapshot_lock
);
345 * Cancel an automatic unmount of a snapname. This callback is responsible
346 * for dropping the reference on the zfs_snapentry_t which was taken when
350 zfsctl_snapshot_unmount_cancel(zfs_snapentry_t
*se
)
352 ASSERT(MUTEX_HELD(&zfs_snapshot_lock
));
354 if (taskq_cancel_id(zfs_expire_taskq
, se
->se_taskqid
) == 0) {
356 zfsctl_snapshot_rele(se
);
361 * Dispatch the unmount task for delayed handling with a hold protecting it.
364 zfsctl_snapshot_unmount_delay_impl(zfs_snapentry_t
*se
, int delay
)
366 ASSERT3S(se
->se_taskqid
, ==, -1);
368 se
->se_taskqid
= taskq_dispatch_delay(zfs_expire_taskq
,
369 snapentry_expire
, se
, TQ_SLEEP
, ddi_get_lbolt() + delay
* HZ
);
370 zfsctl_snapshot_hold(se
);
374 * Schedule an automatic unmount of objset id to occur in delay seconds from
375 * now. Any previous delayed unmount will be cancelled in favor of the
376 * updated deadline. A reference is taken by zfsctl_snapshot_find_by_name()
377 * and held until the outstanding task is handled or cancelled.
380 zfsctl_snapshot_unmount_delay(uint64_t objsetid
, int delay
)
385 mutex_enter(&zfs_snapshot_lock
);
386 if ((se
= zfsctl_snapshot_find_by_objsetid(objsetid
)) != NULL
) {
387 zfsctl_snapshot_unmount_cancel(se
);
388 zfsctl_snapshot_unmount_delay_impl(se
, delay
);
389 zfsctl_snapshot_rele(se
);
392 mutex_exit(&zfs_snapshot_lock
);
398 * Check if snapname is currently mounted. Returned non-zero when mounted
399 * and zero when unmounted.
402 zfsctl_snapshot_ismounted(char *snapname
)
405 boolean_t ismounted
= B_FALSE
;
407 mutex_enter(&zfs_snapshot_lock
);
408 if ((se
= zfsctl_snapshot_find_by_name(snapname
)) != NULL
) {
409 zfsctl_snapshot_rele(se
);
412 mutex_exit(&zfs_snapshot_lock
);
418 * Check if the given inode is a part of the virtual .zfs directory.
421 zfsctl_is_node(struct inode
*ip
)
423 return (ITOZ(ip
)->z_is_ctldir
);
427 * Check if the given inode is a .zfs/snapshots/snapname directory.
430 zfsctl_is_snapdir(struct inode
*ip
)
432 return (zfsctl_is_node(ip
) && (ip
->i_ino
<= ZFSCTL_INO_SNAPDIRS
));
436 * Allocate a new inode with the passed id and ops.
438 static struct inode
*
439 zfsctl_inode_alloc(zfs_sb_t
*zsb
, uint64_t id
,
440 const struct file_operations
*fops
, const struct inode_operations
*ops
)
442 struct timespec now
= current_fs_time(zsb
->z_sb
);
446 ip
= new_inode(zsb
->z_sb
);
451 ASSERT3P(zp
->z_dirlocks
, ==, NULL
);
452 ASSERT3P(zp
->z_acl_cached
, ==, NULL
);
453 ASSERT3P(zp
->z_xattr_cached
, ==, NULL
);
456 zp
->z_atime_dirty
= 0;
457 zp
->z_zn_prefetch
= 0;
473 zp
->z_is_zvol
= B_FALSE
;
474 zp
->z_is_mapped
= B_FALSE
;
475 zp
->z_is_ctldir
= B_TRUE
;
476 zp
->z_is_sa
= B_FALSE
;
477 zp
->z_is_stale
= B_FALSE
;
479 ip
->i_mode
= (S_IFDIR
| S_IRUGO
| S_IXUGO
);
480 ip
->i_uid
= SUID_TO_KUID(0);
481 ip
->i_gid
= SGID_TO_KGID(0);
482 ip
->i_blkbits
= SPA_MINBLOCKSHIFT
;
489 if (insert_inode_locked(ip
)) {
490 unlock_new_inode(ip
);
495 mutex_enter(&zsb
->z_znodes_lock
);
496 list_insert_tail(&zsb
->z_all_znodes
, zp
);
499 mutex_exit(&zsb
->z_znodes_lock
);
501 unlock_new_inode(ip
);
507 * Lookup the inode with given id, it will be allocated if needed.
509 static struct inode
*
510 zfsctl_inode_lookup(zfs_sb_t
*zsb
, uint64_t id
,
511 const struct file_operations
*fops
, const struct inode_operations
*ops
)
513 struct inode
*ip
= NULL
;
516 ip
= ilookup(zsb
->z_sb
, (unsigned long)id
);
520 /* May fail due to concurrent zfsctl_inode_alloc() */
521 ip
= zfsctl_inode_alloc(zsb
, id
, fops
, ops
);
528 * Create the '.zfs' directory. This directory is cached as part of the VFS
529 * structure. This results in a hold on the zfs_sb_t. The code in zfs_umount()
530 * therefore checks against a vfs_count of 2 instead of 1. This reference
531 * is removed when the ctldir is destroyed in the unmount. All other entities
532 * under the '.zfs' directory are created dynamically as needed.
534 * Because the dynamically created '.zfs' directory entries assume the use
535 * of 64-bit inode numbers this support must be disabled on 32-bit systems.
538 zfsctl_create(zfs_sb_t
*zsb
)
540 #if defined(CONFIG_64BIT)
541 ASSERT(zsb
->z_ctldir
== NULL
);
543 zsb
->z_ctldir
= zfsctl_inode_alloc(zsb
, ZFSCTL_INO_ROOT
,
544 &zpl_fops_root
, &zpl_ops_root
);
545 if (zsb
->z_ctldir
== NULL
)
546 return (SET_ERROR(ENOENT
));
550 return (SET_ERROR(EOPNOTSUPP
));
551 #endif /* CONFIG_64BIT */
555 * Destroy the '.zfs' directory or remove a snapshot from zfs_snapshots_by_name.
556 * Only called when the filesystem is unmounted.
559 zfsctl_destroy(zfs_sb_t
*zsb
)
563 uint64_t objsetid
= dmu_objset_id(zsb
->z_os
);
565 mutex_enter(&zfs_snapshot_lock
);
566 if ((se
= zfsctl_snapshot_find_by_objsetid(objsetid
)) != NULL
) {
567 zfsctl_snapshot_unmount_cancel(se
);
568 zfsctl_snapshot_remove(se
);
569 zfsctl_snapshot_rele(se
);
571 mutex_exit(&zfs_snapshot_lock
);
572 } else if (zsb
->z_ctldir
) {
574 zsb
->z_ctldir
= NULL
;
579 * Given a root znode, retrieve the associated .zfs directory.
580 * Add a hold to the vnode and return it.
583 zfsctl_root(znode_t
*zp
)
585 ASSERT(zfs_has_ctldir(zp
));
586 igrab(ZTOZSB(zp
)->z_ctldir
);
587 return (ZTOZSB(zp
)->z_ctldir
);
590 * Generate a long fid which includes the root object and objset of a
591 * snapshot but not the generation number. For the root object the
592 * generation number is ignored when zero to avoid needing to open
593 * the dataset when generating fids for the snapshot names.
596 zfsctl_snapdir_fid(struct inode
*ip
, fid_t
*fidp
)
598 zfs_sb_t
*zsb
= ITOZSB(ip
);
599 zfid_short_t
*zfid
= (zfid_short_t
*)fidp
;
600 zfid_long_t
*zlfid
= (zfid_long_t
*)fidp
;
606 object
= zsb
->z_root
;
607 objsetid
= ZFSCTL_INO_SNAPDIRS
- ip
->i_ino
;
608 zfid
->zf_len
= LONG_FID_LEN
;
610 for (i
= 0; i
< sizeof (zfid
->zf_object
); i
++)
611 zfid
->zf_object
[i
] = (uint8_t)(object
>> (8 * i
));
613 for (i
= 0; i
< sizeof (zfid
->zf_gen
); i
++)
614 zfid
->zf_gen
[i
] = (uint8_t)(gen
>> (8 * i
));
616 for (i
= 0; i
< sizeof (zlfid
->zf_setid
); i
++)
617 zlfid
->zf_setid
[i
] = (uint8_t)(objsetid
>> (8 * i
));
619 for (i
= 0; i
< sizeof (zlfid
->zf_setgen
); i
++)
620 zlfid
->zf_setgen
[i
] = 0;
626 * Generate an appropriate fid for an entry in the .zfs directory.
629 zfsctl_fid(struct inode
*ip
, fid_t
*fidp
)
631 znode_t
*zp
= ITOZ(ip
);
632 zfs_sb_t
*zsb
= ITOZSB(ip
);
633 uint64_t object
= zp
->z_id
;
639 if (fidp
->fid_len
< SHORT_FID_LEN
) {
640 fidp
->fid_len
= SHORT_FID_LEN
;
642 return (SET_ERROR(ENOSPC
));
645 if (zfsctl_is_snapdir(ip
)) {
647 return (zfsctl_snapdir_fid(ip
, fidp
));
650 zfid
= (zfid_short_t
*)fidp
;
652 zfid
->zf_len
= SHORT_FID_LEN
;
654 for (i
= 0; i
< sizeof (zfid
->zf_object
); i
++)
655 zfid
->zf_object
[i
] = (uint8_t)(object
>> (8 * i
));
657 /* .zfs znodes always have a generation number of 0 */
658 for (i
= 0; i
< sizeof (zfid
->zf_gen
); i
++)
666 * Construct a full dataset name in full_name: "pool/dataset@snap_name"
669 zfsctl_snapshot_name(zfs_sb_t
*zsb
, const char *snap_name
, int len
,
672 objset_t
*os
= zsb
->z_os
;
674 if (zfs_component_namecheck(snap_name
, NULL
, NULL
) != 0)
675 return (SET_ERROR(EILSEQ
));
677 dmu_objset_name(os
, full_name
);
678 if ((strlen(full_name
) + 1 + strlen(snap_name
)) >= len
)
679 return (SET_ERROR(ENAMETOOLONG
));
681 (void) strcat(full_name
, "@");
682 (void) strcat(full_name
, snap_name
);
688 * Returns full path in full_path: "/pool/dataset/.zfs/snapshot/snap_name/"
691 zfsctl_snapshot_path(struct path
*path
, int len
, char *full_path
)
693 char *path_buffer
, *path_ptr
;
694 int path_len
, error
= 0;
696 path_buffer
= kmem_alloc(len
, KM_SLEEP
);
698 path_ptr
= d_path(path
, path_buffer
, len
);
699 if (IS_ERR(path_ptr
)) {
700 error
= -PTR_ERR(path_ptr
);
704 path_len
= path_buffer
+ len
- 1 - path_ptr
;
705 if (path_len
> len
) {
706 error
= SET_ERROR(EFAULT
);
710 memcpy(full_path
, path_ptr
, path_len
);
711 full_path
[path_len
] = '\0';
713 kmem_free(path_buffer
, len
);
719 * Returns full path in full_path: "/pool/dataset/.zfs/snapshot/snap_name/"
722 zfsctl_snapshot_path_objset(zfs_sb_t
*zsb
, uint64_t objsetid
,
723 int path_len
, char *full_path
)
725 objset_t
*os
= zsb
->z_os
;
726 fstrans_cookie_t cookie
;
728 boolean_t case_conflict
;
729 uint64_t id
, pos
= 0;
732 if (zsb
->z_mntopts
->z_mntpoint
== NULL
)
735 cookie
= spl_fstrans_mark();
736 snapname
= kmem_alloc(MAXNAMELEN
, KM_SLEEP
);
739 dsl_pool_config_enter(dmu_objset_pool(os
), FTAG
);
740 error
= dmu_snapshot_list_next(zsb
->z_os
, MAXNAMELEN
,
741 snapname
, &id
, &pos
, &case_conflict
);
742 dsl_pool_config_exit(dmu_objset_pool(os
), FTAG
);
750 memset(full_path
, 0, path_len
);
751 snprintf(full_path
, path_len
- 1, "%s/.zfs/snapshot/%s",
752 zsb
->z_mntopts
->z_mntpoint
, snapname
);
754 kmem_free(snapname
, MAXNAMELEN
);
755 spl_fstrans_unmark(cookie
);
761 * Special case the handling of "..".
764 zfsctl_root_lookup(struct inode
*dip
, char *name
, struct inode
**ipp
,
765 int flags
, cred_t
*cr
, int *direntflags
, pathname_t
*realpnp
)
767 zfs_sb_t
*zsb
= ITOZSB(dip
);
772 if (strcmp(name
, "..") == 0) {
773 *ipp
= dip
->i_sb
->s_root
->d_inode
;
774 } else if (strcmp(name
, ZFS_SNAPDIR_NAME
) == 0) {
775 *ipp
= zfsctl_inode_lookup(zsb
, ZFSCTL_INO_SNAPDIR
,
776 &zpl_fops_snapdir
, &zpl_ops_snapdir
);
777 } else if (strcmp(name
, ZFS_SHAREDIR_NAME
) == 0) {
778 *ipp
= zfsctl_inode_lookup(zsb
, ZFSCTL_INO_SHARES
,
779 &zpl_fops_shares
, &zpl_ops_shares
);
785 error
= SET_ERROR(ENOENT
);
793 * Lookup entry point for the 'snapshot' directory. Try to open the
794 * snapshot if it exist, creating the pseudo filesystem inode as necessary.
795 * Perform a mount of the associated dataset on top of the inode.
798 zfsctl_snapdir_lookup(struct inode
*dip
, char *name
, struct inode
**ipp
,
799 int flags
, cred_t
*cr
, int *direntflags
, pathname_t
*realpnp
)
801 zfs_sb_t
*zsb
= ITOZSB(dip
);
807 error
= dmu_snapshot_lookup(zsb
->z_os
, name
, &id
);
813 *ipp
= zfsctl_inode_lookup(zsb
, ZFSCTL_INO_SNAPDIRS
- id
,
814 &simple_dir_operations
, &simple_dir_inode_operations
);
816 error
= SET_ERROR(ENOENT
);
824 * Renaming a directory under '.zfs/snapshot' will automatically trigger
825 * a rename of the snapshot to the new given name. The rename is confined
826 * to the '.zfs/snapshot' directory snapshots cannot be moved elsewhere.
829 zfsctl_snapdir_rename(struct inode
*sdip
, char *snm
,
830 struct inode
*tdip
, char *tnm
, cred_t
*cr
, int flags
)
832 zfs_sb_t
*zsb
= ITOZSB(sdip
);
833 char *to
, *from
, *real
, *fsname
;
836 if (!zfs_admin_snapshot
)
841 to
= kmem_alloc(MAXNAMELEN
, KM_SLEEP
);
842 from
= kmem_alloc(MAXNAMELEN
, KM_SLEEP
);
843 real
= kmem_alloc(MAXNAMELEN
, KM_SLEEP
);
844 fsname
= kmem_alloc(MAXNAMELEN
, KM_SLEEP
);
846 if (zsb
->z_case
== ZFS_CASE_INSENSITIVE
) {
847 error
= dmu_snapshot_realname(zsb
->z_os
, snm
, real
,
851 } else if (error
!= ENOTSUP
) {
856 dmu_objset_name(zsb
->z_os
, fsname
);
858 error
= zfsctl_snapshot_name(ITOZSB(sdip
), snm
, MAXNAMELEN
, from
);
860 error
= zfsctl_snapshot_name(ITOZSB(tdip
), tnm
, MAXNAMELEN
, to
);
862 error
= zfs_secpolicy_rename_perms(from
, to
, cr
);
867 * Cannot move snapshots out of the snapdir.
870 error
= SET_ERROR(EINVAL
);
875 * No-op when names are identical.
877 if (strcmp(snm
, tnm
) == 0) {
882 mutex_enter(&zfs_snapshot_lock
);
884 error
= dsl_dataset_rename_snapshot(fsname
, snm
, tnm
, B_FALSE
);
886 (void) zfsctl_snapshot_rename(snm
, tnm
);
888 mutex_exit(&zfs_snapshot_lock
);
890 kmem_free(from
, MAXNAMELEN
);
891 kmem_free(to
, MAXNAMELEN
);
892 kmem_free(real
, MAXNAMELEN
);
893 kmem_free(fsname
, MAXNAMELEN
);
901 * Removing a directory under '.zfs/snapshot' will automatically trigger
902 * the removal of the snapshot with the given name.
905 zfsctl_snapdir_remove(struct inode
*dip
, char *name
, cred_t
*cr
, int flags
)
907 zfs_sb_t
*zsb
= ITOZSB(dip
);
908 char *snapname
, *real
;
911 if (!zfs_admin_snapshot
)
916 snapname
= kmem_alloc(MAXNAMELEN
, KM_SLEEP
);
917 real
= kmem_alloc(MAXNAMELEN
, KM_SLEEP
);
919 if (zsb
->z_case
== ZFS_CASE_INSENSITIVE
) {
920 error
= dmu_snapshot_realname(zsb
->z_os
, name
, real
,
924 } else if (error
!= ENOTSUP
) {
929 error
= zfsctl_snapshot_name(ITOZSB(dip
), name
, MAXNAMELEN
, snapname
);
931 error
= zfs_secpolicy_destroy_perms(snapname
, cr
);
935 error
= zfsctl_snapshot_unmount(snapname
, MNT_FORCE
);
936 if ((error
== 0) || (error
== ENOENT
))
937 error
= dsl_destroy_snapshot(snapname
, B_FALSE
);
939 kmem_free(snapname
, MAXNAMELEN
);
940 kmem_free(real
, MAXNAMELEN
);
948 * Creating a directory under '.zfs/snapshot' will automatically trigger
949 * the creation of a new snapshot with the given name.
952 zfsctl_snapdir_mkdir(struct inode
*dip
, char *dirname
, vattr_t
*vap
,
953 struct inode
**ipp
, cred_t
*cr
, int flags
)
955 zfs_sb_t
*zsb
= ITOZSB(dip
);
959 if (!zfs_admin_snapshot
)
962 dsname
= kmem_alloc(MAXNAMELEN
, KM_SLEEP
);
964 if (zfs_component_namecheck(dirname
, NULL
, NULL
) != 0) {
965 error
= SET_ERROR(EILSEQ
);
969 dmu_objset_name(zsb
->z_os
, dsname
);
971 error
= zfs_secpolicy_snapshot_perms(dsname
, cr
);
976 error
= dmu_objset_snapshot_one(dsname
, dirname
);
980 error
= zfsctl_snapdir_lookup(dip
, dirname
, ipp
,
984 kmem_free(dsname
, MAXNAMELEN
);
990 * Attempt to unmount a snapshot by making a call to user space.
991 * There is no assurance that this can or will succeed, is just a
992 * best effort. In the case where it does fail, perhaps because
993 * it's in use, the unmount will fail harmlessly.
995 #define SET_UNMOUNT_CMD \
996 "exec 0</dev/null " \
999 "umount -t zfs -n %s'%s'"
1002 zfsctl_snapshot_unmount(char *snapname
, int flags
)
1004 char *argv
[] = { "/bin/sh", "-c", NULL
, NULL
};
1005 char *envp
[] = { NULL
};
1006 zfs_snapentry_t
*se
;
1009 mutex_enter(&zfs_snapshot_lock
);
1010 if ((se
= zfsctl_snapshot_find_by_name(snapname
)) == NULL
) {
1011 mutex_exit(&zfs_snapshot_lock
);
1014 mutex_exit(&zfs_snapshot_lock
);
1016 argv
[2] = kmem_asprintf(SET_UNMOUNT_CMD
,
1017 flags
& MNT_FORCE
? "-f " : "", se
->se_path
);
1018 zfsctl_snapshot_rele(se
);
1019 dprintf("unmount; path=%s\n", se
->se_path
);
1020 error
= call_usermodehelper(argv
[0], argv
, envp
, UMH_WAIT_PROC
);
1025 * The umount system utility will return 256 on error. We must
1026 * assume this error is because the file system is busy so it is
1027 * converted to the more sensible EBUSY.
1030 error
= SET_ERROR(EBUSY
);
1035 #define MOUNT_BUSY 0x80 /* Mount failed due to EBUSY (from mntent.h) */
1036 #define SET_MOUNT_CMD \
1037 "exec 0</dev/null " \
1040 "mount -t zfs -n '%s' '%s'"
1043 zfsctl_snapshot_mount(struct path
*path
, int flags
)
1045 struct dentry
*dentry
= path
->dentry
;
1046 struct inode
*ip
= dentry
->d_inode
;
1049 zfs_snapentry_t
*se
;
1050 char *full_name
, *full_path
;
1051 char *argv
[] = { "/bin/sh", "-c", NULL
, NULL
};
1052 char *envp
[] = { NULL
};
1062 full_name
= kmem_zalloc(MAXNAMELEN
, KM_SLEEP
);
1063 full_path
= kmem_zalloc(MAXPATHLEN
, KM_SLEEP
);
1065 error
= zfsctl_snapshot_name(zsb
, dname(dentry
),
1066 MAXNAMELEN
, full_name
);
1070 error
= zfsctl_snapshot_path(path
, MAXPATHLEN
, full_path
);
1075 * Multiple concurrent automounts of a snapshot are never allowed.
1076 * The snapshot may be manually mounted as many times as desired.
1078 if (zfsctl_snapshot_ismounted(full_name
)) {
1079 error
= SET_ERROR(EISDIR
);
1084 * Attempt to mount the snapshot from user space. Normally this
1085 * would be done using the vfs_kern_mount() function, however that
1086 * function is marked GPL-only and cannot be used. On error we
1087 * careful to log the real error to the console and return EISDIR
1088 * to safely abort the automount. This should be very rare.
1090 * If the user mode helper happens to return EBUSY, a concurrent
1091 * mount is already in progress in which case the error is ignored.
1092 * Take note that if the program was executed successfully the return
1093 * value from call_usermodehelper() will be (exitcode << 8 + signal).
1095 dprintf("mount; name=%s path=%s\n", full_name
, full_path
);
1096 argv
[2] = kmem_asprintf(SET_MOUNT_CMD
, full_name
, full_path
);
1097 error
= call_usermodehelper(argv
[0], argv
, envp
, UMH_WAIT_PROC
);
1100 if (!(error
& MOUNT_BUSY
<< 8)) {
1101 cmn_err(CE_WARN
, "Unable to automount %s/%s: %d",
1102 full_path
, full_name
, error
);
1103 error
= SET_ERROR(EISDIR
);
1106 * EBUSY, this could mean a concurrent mount, or the
1107 * snapshot has already been mounted at completely
1108 * different place. We return 0 so VFS will retry. For
1109 * the latter case the VFS will retry several times
1110 * and return ELOOP, which is probably not a very good
1119 * Follow down in to the mounted snapshot and set MNT_SHRINKABLE
1120 * to identify this as an automounted filesystem.
1124 if (zpl_follow_down_one(&spath
)) {
1125 snap_zsb
= ITOZSB(spath
.dentry
->d_inode
);
1126 snap_zsb
->z_parent
= zsb
;
1127 dentry
= spath
.dentry
;
1128 spath
.mnt
->mnt_flags
|= MNT_SHRINKABLE
;
1130 mutex_enter(&zfs_snapshot_lock
);
1131 se
= zfsctl_snapshot_alloc(full_name
, full_path
,
1132 dmu_objset_id(snap_zsb
->z_os
), dentry
);
1133 zfsctl_snapshot_add(se
);
1134 zfsctl_snapshot_unmount_delay_impl(se
, zfs_expire_snapshot
);
1135 mutex_exit(&zfs_snapshot_lock
);
1139 kmem_free(full_name
, MAXNAMELEN
);
1140 kmem_free(full_path
, MAXPATHLEN
);
1148 * Given the objset id of the snapshot return its zfs_sb_t as zsbp.
1151 zfsctl_lookup_objset(struct super_block
*sb
, uint64_t objsetid
, zfs_sb_t
**zsbp
)
1153 zfs_snapentry_t
*se
;
1157 * Verify that the snapshot is mounted then lookup the mounted root
1158 * rather than the covered mount point. This may fail if the
1159 * snapshot has just been unmounted by an unrelated user space
1160 * process. This race cannot occur to an expired mount point
1161 * because we hold the zfs_snapshot_lock to prevent the race.
1163 mutex_enter(&zfs_snapshot_lock
);
1164 if ((se
= zfsctl_snapshot_find_by_objsetid(objsetid
)) != NULL
) {
1167 zsb
= ITOZSB(se
->se_root_dentry
->d_inode
);
1168 ASSERT3U(dmu_objset_id(zsb
->z_os
), ==, objsetid
);
1170 if (time_after(jiffies
, zsb
->z_snap_defer_time
+
1171 MAX(zfs_expire_snapshot
* HZ
/ 2, HZ
))) {
1172 zsb
->z_snap_defer_time
= jiffies
;
1173 zfsctl_snapshot_unmount_delay(objsetid
,
1174 zfs_expire_snapshot
);
1178 zfsctl_snapshot_rele(se
);
1179 error
= SET_ERROR(0);
1181 error
= SET_ERROR(ENOENT
);
1183 mutex_exit(&zfs_snapshot_lock
);
1186 * Automount the snapshot given the objset id by constructing the
1187 * full mount point and performing a traversal.
1189 if (error
== ENOENT
) {
1193 mnt
= kmem_alloc(MAXPATHLEN
, KM_SLEEP
);
1194 error
= zfsctl_snapshot_path_objset(sb
->s_fs_info
, objsetid
,
1197 kmem_free(mnt
, MAXPATHLEN
);
1198 return (SET_ERROR(error
));
1201 error
= kern_path(mnt
, LOOKUP_FOLLOW
|LOOKUP_DIRECTORY
, &path
);
1203 *zsbp
= ITOZSB(path
.dentry
->d_inode
);
1207 kmem_free(mnt
, MAXPATHLEN
);
1214 zfsctl_shares_lookup(struct inode
*dip
, char *name
, struct inode
**ipp
,
1215 int flags
, cred_t
*cr
, int *direntflags
, pathname_t
*realpnp
)
1217 zfs_sb_t
*zsb
= ITOZSB(dip
);
1224 if (zsb
->z_shares_dir
== 0) {
1226 return (SET_ERROR(ENOTSUP
));
1229 error
= zfs_zget(zsb
, zsb
->z_shares_dir
, &dzp
);
1235 error
= zfs_lookup(ZTOI(dzp
), name
, &ip
, 0, cr
, NULL
, NULL
);
1245 * Initialize the various pieces we'll need to create and manipulate .zfs
1246 * directories. Currently this is unused but available.
1251 avl_create(&zfs_snapshots_by_name
, snapentry_compare_by_name
,
1252 sizeof (zfs_snapentry_t
), offsetof(zfs_snapentry_t
,
1254 avl_create(&zfs_snapshots_by_objsetid
, snapentry_compare_by_objsetid
,
1255 sizeof (zfs_snapentry_t
), offsetof(zfs_snapentry_t
,
1257 mutex_init(&zfs_snapshot_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
1259 zfs_expire_taskq
= taskq_create("z_unmount", 1, defclsyspri
,
1260 1, 8, TASKQ_PREPOPULATE
);
1264 * Cleanup the various pieces we needed for .zfs directories. In particular
1265 * ensure the expiry timer is canceled safely.
1270 taskq_destroy(zfs_expire_taskq
);
1272 avl_destroy(&zfs_snapshots_by_name
);
1273 avl_destroy(&zfs_snapshots_by_objsetid
);
1274 mutex_destroy(&zfs_snapshot_lock
);
1277 module_param(zfs_admin_snapshot
, int, 0644);
1278 MODULE_PARM_DESC(zfs_admin_snapshot
, "Enable mkdir/rmdir/mv in .zfs/snapshot");
1280 module_param(zfs_expire_snapshot
, int, 0644);
1281 MODULE_PARM_DESC(zfs_expire_snapshot
, "Seconds to expire .zfs/snapshot");