]> git.proxmox.com Git - mirror_zfs-debian.git/blobdiff - module/zfs/zfs_ctldir.c
Imported Upstream version 0.6.5.4
[mirror_zfs-debian.git] / module / zfs / zfs_ctldir.c
index b70eb66239489751383bd32b3f5aa852785d2290..fc46a7307f441ce78fa6976c10a8028703d72975 100644 (file)
@@ -81,6 +81,7 @@
 #include <sys/zfs_vnops.h>
 #include <sys/stat.h>
 #include <sys/dmu.h>
+#include <sys/dmu_objset.h>
 #include <sys/dsl_destroy.h>
 #include <sys/dsl_deleg.h>
 #include <sys/mount.h>
  */
 static avl_tree_t zfs_snapshots_by_name;
 static avl_tree_t zfs_snapshots_by_objsetid;
-static kmutex_t zfs_snapshot_lock;
+static krwlock_t zfs_snapshot_lock;
 
 /*
  * Control Directory Tunables (.zfs)
@@ -117,6 +118,7 @@ static taskq_t *zfs_expire_taskq;
 typedef struct {
        char            *se_name;       /* full snapshot name */
        char            *se_path;       /* full mount path */
+       spa_t           *se_spa;        /* pool spa */
        uint64_t        se_objsetid;    /* snapshot objset id */
        struct dentry   *se_root_dentry; /* snapshot root dentry */
        taskqid_t       se_taskqid;     /* scheduled unmount taskqid */
@@ -132,8 +134,8 @@ static void zfsctl_snapshot_unmount_delay_impl(zfs_snapentry_t *se, int delay);
  * the snapshot name and provided mount point.  No reference is taken.
  */
 static zfs_snapentry_t *
-zfsctl_snapshot_alloc(char *full_name, char *full_path, uint64_t objsetid,
-    struct dentry *root_dentry)
+zfsctl_snapshot_alloc(char *full_name, char *full_path, spa_t *spa,
+    uint64_t objsetid, struct dentry *root_dentry)
 {
        zfs_snapentry_t *se;
 
@@ -141,6 +143,7 @@ zfsctl_snapshot_alloc(char *full_name, char *full_path, uint64_t objsetid,
 
        se->se_name = strdup(full_name);
        se->se_path = strdup(full_path);
+       se->se_spa = spa;
        se->se_objsetid = objsetid;
        se->se_root_dentry = root_dentry;
        se->se_taskqid = -1;
@@ -192,7 +195,7 @@ zfsctl_snapshot_rele(zfs_snapentry_t *se)
 static void
 zfsctl_snapshot_add(zfs_snapentry_t *se)
 {
-       ASSERT(MUTEX_HELD(&zfs_snapshot_lock));
+       ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock));
        refcount_add(&se->se_refcount, NULL);
        avl_add(&zfs_snapshots_by_name, se);
        avl_add(&zfs_snapshots_by_objsetid, se);
@@ -207,7 +210,7 @@ zfsctl_snapshot_add(zfs_snapentry_t *se)
 static void
 zfsctl_snapshot_remove(zfs_snapentry_t *se)
 {
-       ASSERT(MUTEX_HELD(&zfs_snapshot_lock));
+       ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock));
        avl_remove(&zfs_snapshots_by_name, se);
        avl_remove(&zfs_snapshots_by_objsetid, se);
        zfsctl_snapshot_rele(se);
@@ -242,6 +245,9 @@ snapentry_compare_by_objsetid(const void *a, const void *b)
        const zfs_snapentry_t *se_a = a;
        const zfs_snapentry_t *se_b = b;
 
+       if (se_a->se_spa != se_b->se_spa)
+               return ((ulong_t)se_a->se_spa < (ulong_t)se_b->se_spa ? -1 : 1);
+
        if (se_a->se_objsetid < se_b->se_objsetid)
                return (-1);
        else if (se_a->se_objsetid > se_b->se_objsetid)
@@ -262,7 +268,7 @@ zfsctl_snapshot_find_by_name(char *snapname)
 {
        zfs_snapentry_t *se, search;
 
-       ASSERT(MUTEX_HELD(&zfs_snapshot_lock));
+       ASSERT(RW_LOCK_HELD(&zfs_snapshot_lock));
 
        search.se_name = snapname;
        se = avl_find(&zfs_snapshots_by_name, &search, NULL);
@@ -278,12 +284,13 @@ zfsctl_snapshot_find_by_name(char *snapname)
  * as zfsctl_snapshot_find_by_name().
  */
 static zfs_snapentry_t *
-zfsctl_snapshot_find_by_objsetid(uint64_t objsetid)
+zfsctl_snapshot_find_by_objsetid(spa_t *spa, uint64_t objsetid)
 {
        zfs_snapentry_t *se, search;
 
-       ASSERT(MUTEX_HELD(&zfs_snapshot_lock));
+       ASSERT(RW_LOCK_HELD(&zfs_snapshot_lock));
 
+       search.se_spa = spa;
        search.se_objsetid = objsetid;
        se = avl_find(&zfs_snapshots_by_objsetid, &search, NULL);
        if (se)
@@ -301,7 +308,7 @@ zfsctl_snapshot_rename(char *old_snapname, char *new_snapname)
 {
        zfs_snapentry_t *se;
 
-       ASSERT(MUTEX_HELD(&zfs_snapshot_lock));
+       ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock));
 
        se = zfsctl_snapshot_find_by_name(old_snapname);
        if (se == NULL)
@@ -323,8 +330,14 @@ static void
 snapentry_expire(void *data)
 {
        zfs_snapentry_t *se = (zfs_snapentry_t *)data;
+       spa_t *spa = se->se_spa;
        uint64_t objsetid = se->se_objsetid;
 
+       if (zfs_expire_snapshot <= 0) {
+               zfsctl_snapshot_rele(se);
+               return;
+       }
+
        se->se_taskqid = -1;
        (void) zfsctl_snapshot_unmount(se->se_name, MNT_EXPIRE);
        zfsctl_snapshot_rele(se);
@@ -333,12 +346,12 @@ snapentry_expire(void *data)
         * Reschedule the unmount if the zfs_snapentry_t wasn't removed.
         * This can occur when the snapshot is busy.
         */
-       mutex_enter(&zfs_snapshot_lock);
-       if ((se = zfsctl_snapshot_find_by_objsetid(objsetid)) != NULL) {
+       rw_enter(&zfs_snapshot_lock, RW_READER);
+       if ((se = zfsctl_snapshot_find_by_objsetid(spa, objsetid)) != NULL) {
                zfsctl_snapshot_unmount_delay_impl(se, zfs_expire_snapshot);
                zfsctl_snapshot_rele(se);
        }
-       mutex_exit(&zfs_snapshot_lock);
+       rw_exit(&zfs_snapshot_lock);
 }
 
 /*
@@ -349,7 +362,7 @@ snapentry_expire(void *data)
 static void
 zfsctl_snapshot_unmount_cancel(zfs_snapentry_t *se)
 {
-       ASSERT(MUTEX_HELD(&zfs_snapshot_lock));
+       ASSERT(RW_LOCK_HELD(&zfs_snapshot_lock));
 
        if (taskq_cancel_id(zfs_expire_taskq, se->se_taskqid) == 0) {
                se->se_taskqid = -1;
@@ -365,9 +378,12 @@ zfsctl_snapshot_unmount_delay_impl(zfs_snapentry_t *se, int delay)
 {
        ASSERT3S(se->se_taskqid, ==, -1);
 
+       if (delay <= 0)
+               return;
+
+       zfsctl_snapshot_hold(se);
        se->se_taskqid = taskq_dispatch_delay(zfs_expire_taskq,
            snapentry_expire, se, TQ_SLEEP, ddi_get_lbolt() + delay * HZ);
-       zfsctl_snapshot_hold(se);
 }
 
 /*
@@ -377,19 +393,19 @@ zfsctl_snapshot_unmount_delay_impl(zfs_snapentry_t *se, int delay)
  * and held until the outstanding task is handled or cancelled.
  */
 int
-zfsctl_snapshot_unmount_delay(uint64_t objsetid, int delay)
+zfsctl_snapshot_unmount_delay(spa_t *spa, uint64_t objsetid, int delay)
 {
        zfs_snapentry_t *se;
        int error = ENOENT;
 
-       mutex_enter(&zfs_snapshot_lock);
-       if ((se = zfsctl_snapshot_find_by_objsetid(objsetid)) != NULL) {
+       rw_enter(&zfs_snapshot_lock, RW_READER);
+       if ((se = zfsctl_snapshot_find_by_objsetid(spa, objsetid)) != NULL) {
                zfsctl_snapshot_unmount_cancel(se);
                zfsctl_snapshot_unmount_delay_impl(se, delay);
                zfsctl_snapshot_rele(se);
                error = 0;
        }
-       mutex_exit(&zfs_snapshot_lock);
+       rw_exit(&zfs_snapshot_lock);
 
        return (error);
 }
@@ -404,12 +420,12 @@ zfsctl_snapshot_ismounted(char *snapname)
        zfs_snapentry_t *se;
        boolean_t ismounted = B_FALSE;
 
-       mutex_enter(&zfs_snapshot_lock);
+       rw_enter(&zfs_snapshot_lock, RW_READER);
        if ((se = zfsctl_snapshot_find_by_name(snapname)) != NULL) {
                zfsctl_snapshot_rele(se);
                ismounted = B_TRUE;
        }
-       mutex_exit(&zfs_snapshot_lock);
+       rw_exit(&zfs_snapshot_lock);
 
        return (ismounted);
 }
@@ -560,15 +576,17 @@ zfsctl_destroy(zfs_sb_t *zsb)
 {
        if (zsb->z_issnap) {
                zfs_snapentry_t *se;
+               spa_t *spa = zsb->z_os->os_spa;
                uint64_t objsetid = dmu_objset_id(zsb->z_os);
 
-               mutex_enter(&zfs_snapshot_lock);
-               if ((se = zfsctl_snapshot_find_by_objsetid(objsetid)) != NULL) {
+               rw_enter(&zfs_snapshot_lock, RW_WRITER);
+               if ((se = zfsctl_snapshot_find_by_objsetid(spa, objsetid))
+                   != NULL) {
                        zfsctl_snapshot_unmount_cancel(se);
                        zfsctl_snapshot_remove(se);
                        zfsctl_snapshot_rele(se);
                }
-               mutex_exit(&zfs_snapshot_lock);
+               rw_exit(&zfs_snapshot_lock);
        } else if (zsb->z_ctldir) {
                iput(zsb->z_ctldir);
                zsb->z_ctldir = NULL;
@@ -879,13 +897,13 @@ zfsctl_snapdir_rename(struct inode *sdip, char *snm,
                goto out;
        }
 
-       mutex_enter(&zfs_snapshot_lock);
+       rw_enter(&zfs_snapshot_lock, RW_WRITER);
 
        error = dsl_dataset_rename_snapshot(fsname, snm, tnm, B_FALSE);
        if (error == 0)
                (void) zfsctl_snapshot_rename(snm, tnm);
 
-       mutex_exit(&zfs_snapshot_lock);
+       rw_exit(&zfs_snapshot_lock);
 out:
        kmem_free(from, MAXNAMELEN);
        kmem_free(to, MAXNAMELEN);
@@ -1006,12 +1024,12 @@ zfsctl_snapshot_unmount(char *snapname, int flags)
        zfs_snapentry_t *se;
        int error;
 
-       mutex_enter(&zfs_snapshot_lock);
+       rw_enter(&zfs_snapshot_lock, RW_READER);
        if ((se = zfsctl_snapshot_find_by_name(snapname)) == NULL) {
-               mutex_exit(&zfs_snapshot_lock);
+               rw_exit(&zfs_snapshot_lock);
                return (ENOENT);
        }
-       mutex_exit(&zfs_snapshot_lock);
+       rw_exit(&zfs_snapshot_lock);
 
        argv[2] = kmem_asprintf(SET_UNMOUNT_CMD,
            flags & MNT_FORCE ? "-f " : "", se->se_path);
@@ -1051,6 +1069,7 @@ zfsctl_snapshot_mount(struct path *path, int flags)
        char *argv[] = { "/bin/sh", "-c", NULL, NULL };
        char *envp[] = { NULL };
        int error;
+       struct path spath;
 
        if (ip == NULL)
                return (EISDIR);
@@ -1075,7 +1094,7 @@ zfsctl_snapshot_mount(struct path *path, int flags)
         * The snapshot may be manually mounted as many times as desired.
         */
        if (zfsctl_snapshot_ismounted(full_name)) {
-               error = SET_ERROR(EISDIR);
+               error = 0;
                goto error;
        }
 
@@ -1095,10 +1114,22 @@ zfsctl_snapshot_mount(struct path *path, int flags)
        argv[2] = kmem_asprintf(SET_MOUNT_CMD, full_name, full_path);
        error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
        strfree(argv[2]);
-       if (error && !(error & MOUNT_BUSY << 8)) {
-               cmn_err(CE_WARN, "Unable to automount %s/%s: %d",
-                   full_path, full_name, error);
-               error = SET_ERROR(EISDIR);
+       if (error) {
+               if (!(error & MOUNT_BUSY << 8)) {
+                       cmn_err(CE_WARN, "Unable to automount %s/%s: %d",
+                           full_path, full_name, error);
+                       error = SET_ERROR(EISDIR);
+               } else {
+                       /*
+                        * EBUSY, this could mean a concurrent mount, or the
+                        * snapshot has already been mounted at completely
+                        * different place. We return 0 so VFS will retry. For
+                        * the latter case the VFS will retry several times
+                        * and return ELOOP, which is probably not a very good
+                        * behavior.
+                        */
+                       error = 0;
+               }
                goto error;
        }
 
@@ -1106,20 +1137,23 @@ zfsctl_snapshot_mount(struct path *path, int flags)
         * Follow down in to the mounted snapshot and set MNT_SHRINKABLE
         * to identify this as an automounted filesystem.
         */
-       zpl_follow_down_one(path);
-       snap_zsb = ITOZSB(path->dentry->d_inode);
-       snap_zsb->z_parent = zsb;
-       dentry = path->dentry;
-       path->mnt->mnt_flags |= MNT_SHRINKABLE;
-       zpl_follow_up(path);
-       error = 0;
-
-       mutex_enter(&zfs_snapshot_lock);
-       se = zfsctl_snapshot_alloc(full_name, full_path,
-           dmu_objset_id(snap_zsb->z_os), dentry);
-       zfsctl_snapshot_add(se);
-       zfsctl_snapshot_unmount_delay_impl(se, zfs_expire_snapshot);
-       mutex_exit(&zfs_snapshot_lock);
+       spath = *path;
+       path_get(&spath);
+       if (zpl_follow_down_one(&spath)) {
+               snap_zsb = ITOZSB(spath.dentry->d_inode);
+               snap_zsb->z_parent = zsb;
+               dentry = spath.dentry;
+               spath.mnt->mnt_flags |= MNT_SHRINKABLE;
+
+               rw_enter(&zfs_snapshot_lock, RW_WRITER);
+               se = zfsctl_snapshot_alloc(full_name, full_path,
+                   snap_zsb->z_os->os_spa, dmu_objset_id(snap_zsb->z_os),
+                   dentry);
+               zfsctl_snapshot_add(se);
+               zfsctl_snapshot_unmount_delay_impl(se, zfs_expire_snapshot);
+               rw_exit(&zfs_snapshot_lock);
+       }
+       path_put(&spath);
 error:
        kmem_free(full_name, MAXNAMELEN);
        kmem_free(full_path, MAXPATHLEN);
@@ -1137,6 +1171,7 @@ zfsctl_lookup_objset(struct super_block *sb, uint64_t objsetid, zfs_sb_t **zsbp)
 {
        zfs_snapentry_t *se;
        int error;
+       spa_t *spa = ((zfs_sb_t *)(sb->s_fs_info))->z_os->os_spa;
 
        /*
         * Verify that the snapshot is mounted then lookup the mounted root
@@ -1145,8 +1180,8 @@ zfsctl_lookup_objset(struct super_block *sb, uint64_t objsetid, zfs_sb_t **zsbp)
         * process.  This race cannot occur to an expired mount point
         * because we hold the zfs_snapshot_lock to prevent the race.
         */
-       mutex_enter(&zfs_snapshot_lock);
-       if ((se = zfsctl_snapshot_find_by_objsetid(objsetid)) != NULL) {
+       rw_enter(&zfs_snapshot_lock, RW_READER);
+       if ((se = zfsctl_snapshot_find_by_objsetid(spa, objsetid)) != NULL) {
                zfs_sb_t *zsb;
 
                zsb = ITOZSB(se->se_root_dentry->d_inode);
@@ -1155,7 +1190,8 @@ zfsctl_lookup_objset(struct super_block *sb, uint64_t objsetid, zfs_sb_t **zsbp)
                if (time_after(jiffies, zsb->z_snap_defer_time +
                    MAX(zfs_expire_snapshot * HZ / 2, HZ))) {
                        zsb->z_snap_defer_time = jiffies;
-                       zfsctl_snapshot_unmount_delay(objsetid,
+                       zfsctl_snapshot_unmount_cancel(se);
+                       zfsctl_snapshot_unmount_delay_impl(se,
                            zfs_expire_snapshot);
                }
 
@@ -1165,7 +1201,7 @@ zfsctl_lookup_objset(struct super_block *sb, uint64_t objsetid, zfs_sb_t **zsbp)
        } else {
                error = SET_ERROR(ENOENT);
        }
-       mutex_exit(&zfs_snapshot_lock);
+       rw_exit(&zfs_snapshot_lock);
 
        /*
         * Automount the snapshot given the objset id by constructing the
@@ -1239,7 +1275,7 @@ zfsctl_init(void)
        avl_create(&zfs_snapshots_by_objsetid, snapentry_compare_by_objsetid,
            sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t,
            se_node_objsetid));
-       mutex_init(&zfs_snapshot_lock, NULL, MUTEX_DEFAULT, NULL);
+       rw_init(&zfs_snapshot_lock, NULL, RW_DEFAULT, NULL);
 
        zfs_expire_taskq = taskq_create("z_unmount", 1, defclsyspri,
            1, 8, TASKQ_PREPOPULATE);
@@ -1256,7 +1292,7 @@ zfsctl_fini(void)
 
        avl_destroy(&zfs_snapshots_by_name);
        avl_destroy(&zfs_snapshots_by_objsetid);
-       mutex_destroy(&zfs_snapshot_lock);
+       rw_destroy(&zfs_snapshot_lock);
 }
 
 module_param(zfs_admin_snapshot, int, 0644);