]> git.proxmox.com Git - mirror_zfs.git/commitdiff
port async unlinked drain from illumos-nexenta
authorAlek P <alek-p@users.noreply.github.com>
Tue, 12 Feb 2019 18:41:15 +0000 (10:41 -0800)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Tue, 12 Feb 2019 18:41:15 +0000 (10:41 -0800)
This patch is an async implementation of the existing sync
zfs_unlinked_drain() function. This function is called at mount time and
is responsible for freeing znodes that we didn't get to freeing before.
We don't have to hold mounting of the dataset until the unlinked list is
fully drained as is done now. Since we can process the unlinked set
asynchronously this results in a better user experience when mounting a
dataset with entries in the unlinked set.

Reviewed by: Jorgen Lundman <lundman@lundman.net>
Reviewed by: Tom Caputi <tcaputi@datto.com>
Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Matt Ahrens <mahrens@delphix.com>
Reviewed by: Paul Dagnelie <pcd@delphix.com>
Signed-off-by: Alek Pinchuk <apinchuk@datto.com>
Closes #8142

13 files changed:
include/sys/dataset_kstats.h
include/sys/dsl_pool.h
include/sys/zfs_dir.h
include/sys/zfs_vfsops.h
man/man5/zfs-module-parameters.5
module/zfs/dataset_kstats.c
module/zfs/dsl_pool.c
module/zfs/zfs_dir.c
module/zfs/zfs_vfsops.c
module/zfs/zfs_znode.c
tests/runfiles/linux.run
tests/zfs-tests/tests/functional/mount/Makefile.am
tests/zfs-tests/tests/functional/mount/umount_unlinked_drain.ksh [new file with mode: 0755]

index 5dd9a8e61fe378c39ade464d9b27a36f018735ed..667d1b85fa2c1e6edd7c90dbe12af2a81529a1b3 100644 (file)
@@ -21,6 +21,7 @@
 
 /*
  * Copyright (c) 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2018 Datto Inc.
  */
 
 #ifndef _SYS_DATASET_KSTATS_H
@@ -35,6 +36,8 @@ typedef struct dataset_aggsum_stats_t {
        aggsum_t das_nwritten;
        aggsum_t das_reads;
        aggsum_t das_nread;
+       aggsum_t das_nunlinks;
+       aggsum_t das_nunlinked;
 } dataset_aggsum_stats_t;
 
 typedef struct dataset_kstat_values {
@@ -43,6 +46,16 @@ typedef struct dataset_kstat_values {
        kstat_named_t dkv_nwritten;
        kstat_named_t dkv_reads;
        kstat_named_t dkv_nread;
+       /*
+        * nunlinks is initialized to the unlinked set size on mount and
+        * is incremented whenever a new entry is added to the unlinked set
+        */
+       kstat_named_t dkv_nunlinks;
+       /*
+        * nunlinked is initialized to zero on mount and is incremented when an
+        * entry is removed from the unlinked set
+        */
+       kstat_named_t dkv_nunlinked;
 } dataset_kstat_values_t;
 
 typedef struct dataset_kstats {
@@ -56,4 +69,7 @@ void dataset_kstats_destroy(dataset_kstats_t *);
 void dataset_kstats_update_write_kstats(dataset_kstats_t *, int64_t);
 void dataset_kstats_update_read_kstats(dataset_kstats_t *, int64_t);
 
+void dataset_kstats_update_nunlinks_kstat(dataset_kstats_t *, int64_t);
+void dataset_kstats_update_nunlinked_kstat(dataset_kstats_t *, int64_t);
+
 #endif /* _SYS_DATASET_KSTATS_H */
index 56317cf733f91cc18f02e9ed16941aa616ca10b2..63ba3509a5326046d53e54b17a924db52bba3627 100644 (file)
@@ -96,6 +96,7 @@ typedef struct dsl_pool {
        struct dsl_dataset *dp_origin_snap;
        uint64_t dp_root_dir_obj;
        struct taskq *dp_iput_taskq;
+       struct taskq *dp_unlinked_drain_taskq;
 
        /* No lock needed - sync context only */
        blkptr_t dp_meta_rootbp;
@@ -176,6 +177,7 @@ boolean_t dsl_pool_config_held(dsl_pool_t *dp);
 boolean_t dsl_pool_config_held_writer(dsl_pool_t *dp);
 
 taskq_t *dsl_pool_iput_taskq(dsl_pool_t *dp);
+taskq_t *dsl_pool_unlinked_drain_taskq(dsl_pool_t *dp);
 
 int dsl_pool_user_hold(dsl_pool_t *dp, uint64_t dsobj,
     const char *tag, uint64_t now, dmu_tx_t *tx);
index 9ce3accfce7022c73d3e84b12e11c49e9a0c0612..bcd4ec2c1de5c94f602cda38b5f13c85feb17ca0 100644 (file)
@@ -64,6 +64,7 @@ extern void zfs_dl_name_switch(zfs_dirlock_t *dl, char *new, char **old);
 extern boolean_t zfs_dirempty(znode_t *);
 extern void zfs_unlinked_add(znode_t *, dmu_tx_t *);
 extern void zfs_unlinked_drain(zfsvfs_t *zfsvfs);
+extern void zfs_unlinked_drain_stop_wait(zfsvfs_t *zfsvfs);
 extern int zfs_sticky_remove_access(znode_t *, znode_t *, cred_t *cr);
 extern int zfs_get_xattrdir(znode_t *, struct inode **, cred_t *, int);
 extern int zfs_make_xattrdir(znode_t *, vattr_t *, struct inode **, cred_t *);
index 0a4f52f2f5b62953c94dae11081c4525122eecd0..cad0aaece4b20433c8952c8ca3b40982e89a15a8 100644 (file)
@@ -117,6 +117,8 @@ struct zfsvfs {
        boolean_t       z_replay;       /* set during ZIL replay */
        boolean_t       z_use_sa;       /* version allow system attributes */
        boolean_t       z_xattr_sa;     /* allow xattrs to be stores as SA */
+       boolean_t       z_draining;     /* is true when drain is active */
+       boolean_t       z_drain_cancel; /* signal the unlinked drain to stop */
        uint64_t        z_version;      /* ZPL version */
        uint64_t        z_shares_dir;   /* hidden shares dir */
        dataset_kstats_t        z_kstat;        /* fs kstats */
@@ -132,6 +134,7 @@ struct zfsvfs {
        uint64_t        z_hold_size;    /* znode hold array size */
        avl_tree_t      *z_hold_trees;  /* znode hold trees */
        kmutex_t        *z_hold_locks;  /* znode hold locks */
+       taskqid_t       z_drain_task;   /* task id for the unlink drain task */
 };
 
 #define        ZSB_XATTR       0x0001          /* Enable user xattrs */
index 7dd333f0485620c75e7205b46c71c9129df45a7e..f6b04318307c7bd07262360a7748bc98ba9d6e0a 100644 (file)
@@ -1149,6 +1149,21 @@ Rate limit delay zevents (which report slow I/Os) to this many per second.
 Default value: 20
 .RE
 
+.sp
+.ne 2
+.na
+\fBzfs_unlink_suspend_progress\fR (uint)
+.ad
+.RS 12n
+When enabled, files will not be asynchronously removed from the list of pending
+unlinks and the space they consume will be leaked. Once this option has been
+disabled and the dataset is remounted, the pending unlinks will be processed
+and the freed space returned to the pool.
+This option is used by the test suite to facilitate testing.
+.sp
+Uses \fB0\fR (default) to allow progress and \fB1\fR to pause progress.
+.RE
+
 .sp
 .ne 2
 .na
index ac0ad84ed63f4b8e499a6cdf833e277b4dac5f9a..522825c42ccfbb2c7e0b2c3b9bc6fa3f92f48e53 100644 (file)
@@ -21,6 +21,7 @@
 
 /*
  * Copyright (c) 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2018 Datto Inc.
  */
 
 #include <sys/dataset_kstats.h>
@@ -34,6 +35,8 @@ static dataset_kstat_values_t empty_dataset_kstats = {
        { "nwritten",   KSTAT_DATA_UINT64 },
        { "reads",      KSTAT_DATA_UINT64 },
        { "nread",      KSTAT_DATA_UINT64 },
+       { "nunlinks",   KSTAT_DATA_UINT64 },
+       { "nunlinked",  KSTAT_DATA_UINT64 },
 };
 
 static int
@@ -54,6 +57,10 @@ dataset_kstats_update(kstat_t *ksp, int rw)
            aggsum_value(&dk->dk_aggsums.das_reads);
        dkv->dkv_nread.value.ui64 =
            aggsum_value(&dk->dk_aggsums.das_nread);
+       dkv->dkv_nunlinks.value.ui64 =
+           aggsum_value(&dk->dk_aggsums.das_nunlinks);
+       dkv->dkv_nunlinked.value.ui64 =
+           aggsum_value(&dk->dk_aggsums.das_nunlinked);
 
        return (0);
 }
@@ -136,6 +143,8 @@ dataset_kstats_create(dataset_kstats_t *dk, objset_t *objset)
        aggsum_init(&dk->dk_aggsums.das_nwritten, 0);
        aggsum_init(&dk->dk_aggsums.das_reads, 0);
        aggsum_init(&dk->dk_aggsums.das_nread, 0);
+       aggsum_init(&dk->dk_aggsums.das_nunlinks, 0);
+       aggsum_init(&dk->dk_aggsums.das_nunlinked, 0);
 }
 
 void
@@ -156,6 +165,8 @@ dataset_kstats_destroy(dataset_kstats_t *dk)
        aggsum_fini(&dk->dk_aggsums.das_nwritten);
        aggsum_fini(&dk->dk_aggsums.das_reads);
        aggsum_fini(&dk->dk_aggsums.das_nread);
+       aggsum_fini(&dk->dk_aggsums.das_nunlinks);
+       aggsum_fini(&dk->dk_aggsums.das_nunlinked);
 }
 
 void
@@ -183,3 +194,21 @@ dataset_kstats_update_read_kstats(dataset_kstats_t *dk,
        aggsum_add(&dk->dk_aggsums.das_reads, 1);
        aggsum_add(&dk->dk_aggsums.das_nread, nread);
 }
+
+void
+dataset_kstats_update_nunlinks_kstat(dataset_kstats_t *dk, int64_t delta)
+{
+       if (dk->dk_kstats == NULL)
+               return;
+
+       aggsum_add(&dk->dk_aggsums.das_nunlinks, delta);
+}
+
+void
+dataset_kstats_update_nunlinked_kstat(dataset_kstats_t *dk, int64_t delta)
+{
+       if (dk->dk_kstats == NULL)
+               return;
+
+       aggsum_add(&dk->dk_aggsums.das_nunlinked, delta);
+}
index 78e782c81e1fce121c725ea17203e5325ee9bd74..10e967ab91ed9218415c2cfa5db3c3628b20cfd4 100644 (file)
@@ -223,6 +223,9 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg)
 
        dp->dp_iput_taskq = taskq_create("z_iput", max_ncpus, defclsyspri,
            max_ncpus * 8, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
+       dp->dp_unlinked_drain_taskq = taskq_create("z_unlinked_drain",
+           max_ncpus, defclsyspri, max_ncpus, INT_MAX,
+           TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
 
        return (dp);
 }
@@ -413,6 +416,7 @@ dsl_pool_close(dsl_pool_t *dp)
        rrw_destroy(&dp->dp_config_rwlock);
        mutex_destroy(&dp->dp_lock);
        cv_destroy(&dp->dp_spaceavail_cv);
+       taskq_destroy(dp->dp_unlinked_drain_taskq);
        taskq_destroy(dp->dp_iput_taskq);
        if (dp->dp_blkstats != NULL) {
                mutex_destroy(&dp->dp_blkstats->zab_lock);
@@ -1097,6 +1101,12 @@ dsl_pool_iput_taskq(dsl_pool_t *dp)
        return (dp->dp_iput_taskq);
 }
 
+taskq_t *
+dsl_pool_unlinked_drain_taskq(dsl_pool_t *dp)
+{
+       return (dp->dp_unlinked_drain_taskq);
+}
+
 /*
  * Walk through the pool-wide zap object of temporary snapshot user holds
  * and release them.
index bd173e7c357c8a6ad53463ad2d2a5ddf95620af2..63ac97754d375bb60c572242d8d9ee9d28927a95 100644 (file)
@@ -458,26 +458,31 @@ zfs_unlinked_add(znode_t *zp, dmu_tx_t *tx)
 
        VERIFY3U(0, ==,
            zap_add_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx));
+
+       dataset_kstats_update_nunlinks_kstat(&zfsvfs->z_kstat, 1);
 }
 
 /*
  * Clean up any znodes that had no links when we either crashed or
  * (force) umounted the file system.
  */
-void
-zfs_unlinked_drain(zfsvfs_t *zfsvfs)
+static void
+zfs_unlinked_drain_task(void *arg)
 {
+       zfsvfs_t *zfsvfs = arg;
        zap_cursor_t    zc;
        zap_attribute_t zap;
        dmu_object_info_t doi;
        znode_t         *zp;
        int             error;
 
+       ASSERT3B(zfsvfs->z_draining, ==, B_TRUE);
+
        /*
         * Iterate over the contents of the unlinked set.
         */
        for (zap_cursor_init(&zc, zfsvfs->z_os, zfsvfs->z_unlinkedobj);
-           zap_cursor_retrieve(&zc, &zap) == 0;
+           zap_cursor_retrieve(&zc, &zap) == 0 && !zfsvfs->z_drain_cancel;
            zap_cursor_advance(&zc)) {
 
                /*
@@ -507,9 +512,61 @@ zfs_unlinked_drain(zfsvfs_t *zfsvfs)
                        continue;
 
                zp->z_unlinked = B_TRUE;
+
+               /*
+                * iput() is Linux's equivalent to illumos' VN_RELE(). It will
+                * decrement the inode's ref count and may cause the inode to be
+                * synchronously freed. We interrupt freeing of this inode, by
+                * checking the return value of dmu_objset_zfs_unmounting() in
+                * dmu_free_long_range(), when an unmount is requested.
+                */
                iput(ZTOI(zp));
+               ASSERT3B(zfsvfs->z_unmounted, ==, B_FALSE);
        }
        zap_cursor_fini(&zc);
+
+       zfsvfs->z_draining = B_FALSE;
+       zfsvfs->z_drain_task = TASKQID_INVALID;
+}
+
+/*
+ * Sets z_draining then tries to dispatch async unlinked drain.
+ * If that fails executes synchronous unlinked drain.
+ */
+void
+zfs_unlinked_drain(zfsvfs_t *zfsvfs)
+{
+       ASSERT3B(zfsvfs->z_unmounted, ==, B_FALSE);
+       ASSERT3B(zfsvfs->z_draining, ==, B_FALSE);
+
+       zfsvfs->z_draining = B_TRUE;
+       zfsvfs->z_drain_cancel = B_FALSE;
+
+       zfsvfs->z_drain_task = taskq_dispatch(
+           dsl_pool_unlinked_drain_taskq(dmu_objset_pool(zfsvfs->z_os)),
+           zfs_unlinked_drain_task, zfsvfs, TQ_SLEEP);
+       if (zfsvfs->z_drain_task == TASKQID_INVALID) {
+               zfs_dbgmsg("async zfs_unlinked_drain dispatch failed");
+               zfs_unlinked_drain_task(zfsvfs);
+       }
+}
+
+/*
+ * Wait for the unlinked drain taskq task to stop. This will interrupt the
+ * unlinked set processing if it is in progress.
+ */
+void
+zfs_unlinked_drain_stop_wait(zfsvfs_t *zfsvfs)
+{
+       ASSERT3B(zfsvfs->z_unmounted, ==, B_FALSE);
+
+       if (zfsvfs->z_draining) {
+               zfsvfs->z_drain_cancel = B_TRUE;
+               taskq_cancel_id(dsl_pool_unlinked_drain_taskq(
+                   dmu_objset_pool(zfsvfs->z_os)), zfsvfs->z_drain_task);
+               zfsvfs->z_drain_task = TASKQID_INVALID;
+               zfsvfs->z_draining = B_FALSE;
+       }
 }
 
 /*
@@ -684,6 +741,8 @@ zfs_rmnode(znode_t *zp)
        VERIFY3U(0, ==,
            zap_remove_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx));
 
+       dataset_kstats_update_nunlinked_kstat(&zfsvfs->z_kstat, 1);
+
        zfs_znode_delete(zp, tx);
 
        dmu_tx_commit(tx);
index 766cbab7436fc372d2a557452b5ffb843ee470db..cdc1bc70758136fb47c2bb68dded69cf7d432f4b 100644 (file)
@@ -1178,6 +1178,10 @@ zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os)
                return (error);
        }
 
+       zfsvfs->z_drain_task = TASKQID_INVALID;
+       zfsvfs->z_draining = B_FALSE;
+       zfsvfs->z_drain_cancel = B_TRUE;
+
        *zfvp = zfsvfs;
        return (0);
 }
@@ -1200,14 +1204,27 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
         * operations out since we closed the ZIL.
         */
        if (mounting) {
+               ASSERT3P(zfsvfs->z_kstat.dk_kstats, ==, NULL);
+               dataset_kstats_create(&zfsvfs->z_kstat, zfsvfs->z_os);
+
                /*
                 * During replay we remove the read only flag to
                 * allow replays to succeed.
                 */
-               if (readonly != 0)
+               if (readonly != 0) {
                        readonly_changed_cb(zfsvfs, B_FALSE);
-               else
+               } else {
+                       zap_stats_t zs;
+                       if (zap_get_stats(zfsvfs->z_os, zfsvfs->z_unlinkedobj,
+                           &zs) == 0) {
+                               dataset_kstats_update_nunlinks_kstat(
+                                   &zfsvfs->z_kstat, zs.zs_num_entries);
+                       }
+                       dprintf_ds(zfsvfs->z_os->os_dsl_dataset,
+                           "num_entries in unlinked set: %llu",
+                           zs.zs_num_entries);
                        zfs_unlinked_drain(zfsvfs);
+               }
 
                /*
                 * Parse and replay the intent log.
@@ -1250,9 +1267,6 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
                /* restore readonly bit */
                if (readonly != 0)
                        readonly_changed_cb(zfsvfs, B_TRUE);
-
-               ASSERT3P(zfsvfs->z_kstat.dk_kstats, ==, NULL);
-               dataset_kstats_create(&zfsvfs->z_kstat, zfsvfs->z_os);
        }
 
        /*
@@ -1633,6 +1647,8 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
 {
        znode_t *zp;
 
+       zfs_unlinked_drain_stop_wait(zfsvfs);
+
        /*
         * If someone has not already unmounted this file system,
         * drain the iput_taskq to ensure all active references to the
@@ -1884,6 +1900,7 @@ zfs_preumount(struct super_block *sb)
 
        /* zfsvfs is NULL when zfs_domount fails during mount */
        if (zfsvfs) {
+               zfs_unlinked_drain_stop_wait(zfsvfs);
                zfsctl_destroy(sb->s_fs_info);
                /*
                 * Wait for iput_async before entering evict_inodes in
@@ -2159,6 +2176,15 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
        }
        mutex_exit(&zfsvfs->z_znodes_lock);
 
+       if (!zfs_is_readonly(zfsvfs) && !zfsvfs->z_unmounted) {
+               /*
+                * zfs_suspend_fs() could have interrupted freeing
+                * of dnodes. We need to restart this freeing so
+                * that we don't "leak" the space.
+                */
+               zfs_unlinked_drain(zfsvfs);
+       }
+
 bail:
        /* release the VFS ops */
        rw_exit(&zfsvfs->z_teardown_inactive_lock);
index 7b13927bea345d1e4c44461dc053dedf4be6fe6a..761fbcb33764301f028b0b851fc2559442236271 100644 (file)
@@ -91,6 +91,12 @@ static kmem_cache_t *znode_cache = NULL;
 static kmem_cache_t *znode_hold_cache = NULL;
 unsigned int zfs_object_mutex_size = ZFS_OBJ_MTX_SZ;
 
+/*
+ * This is used by the test suite so that it can delay znodes from being
+ * freed in order to inspect the unlinked set.
+ */
+int zfs_unlink_suspend_progress = 0;
+
 /*
  * This callback is invoked when acquiring a RL_WRITER or RL_APPEND lock on
  * z_rangelock. It will modify the offset and length of the lock to reflect
@@ -1339,7 +1345,7 @@ zfs_zinactive(znode_t *zp)
         */
        if (zp->z_unlinked) {
                ASSERT(!zfsvfs->z_issnap);
-               if (!zfs_is_readonly(zfsvfs)) {
+               if (!zfs_is_readonly(zfsvfs) && !zfs_unlink_suspend_progress) {
                        mutex_exit(&zp->z_lock);
                        zfs_znode_hold_exit(zfsvfs, zh);
                        zfs_rmnode(zp);
@@ -2214,4 +2220,7 @@ EXPORT_SYMBOL(zfs_obj_to_path);
 /* CSTYLED */
 module_param(zfs_object_mutex_size, uint, 0644);
 MODULE_PARM_DESC(zfs_object_mutex_size, "Size of znode hold array");
+module_param(zfs_unlink_suspend_progress, int, 0644);
+MODULE_PARM_DESC(zfs_unlink_suspend_progress, "Set to prevent async unlinks "
+"(debug - leaks space into the unlinked set)");
 #endif
index 8ab5e7033702f6ad2f07e7332f395f3e08296919..8663c24f904343f82299ba6c790dd5d11fd2ee5b 100644 (file)
@@ -644,7 +644,7 @@ tests = ['mmp_on_thread', 'mmp_on_uberblocks', 'mmp_on_off', 'mmp_interval',
 tags = ['functional', 'mmp']
 
 [tests/functional/mount]
-tests = ['umount_001', 'umountall_001']
+tests = ['umount_001', 'umount_unlinked_drain', 'umountall_001']
 tags = ['functional', 'mount']
 
 [tests/functional/mv_files]
index 9898e0510c15fa353919879d91c9f735b8f414ad..bdafa69badd85edcafd816d9b58d1dd2a21c1281 100644 (file)
@@ -3,4 +3,5 @@ dist_pkgdata_SCRIPTS = \
        setup.ksh \
        cleanup.ksh \
        umount_001.ksh \
+       umount_unlinked_drain.ksh \
        umountall_001.ksh
diff --git a/tests/zfs-tests/tests/functional/mount/umount_unlinked_drain.ksh b/tests/zfs-tests/tests/functional/mount/umount_unlinked_drain.ksh
new file mode 100755 (executable)
index 0000000..0d26280
--- /dev/null
@@ -0,0 +1,119 @@
+#!/bin/ksh -p
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2018 Datto Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# Test async unlinked drain to ensure mounting is not held up when there are
+# entries in the unlinked set. We also try to test that the list is able to be
+# filled up and drained at the same time.
+#
+# STRATEGY:
+# 1. Use zfs_unlink_suspend_progress tunable to disable freeing to build up
+#    the unlinked set
+# 2. Make sure mount happens even when there are entries in the unlinked set
+# 3. Drain and build up the unlinked list at the same time to test for races
+#
+
+function cleanup
+{
+       log_must set_tunable32 zfs_unlink_suspend_progress $default_unlink_sp
+       for fs in $(seq 1 3); do
+               mounted $TESTDIR.$fs || zfs mount $TESTPOOL/$TESTFS.$fs
+               rm -f $TESTDIR.$fs/file-*
+               zfs set xattr=on $TESTPOOL/$TESTFS.$fs
+       done
+}
+
+function unlinked_size_is
+{
+       MAX_ITERS=5 # iteration to do before we consider reported number stable
+       iters=0
+       last_usize=0
+       while [[ $iters -le $MAX_ITERS ]]; do
+               kstat_file=$(grep -nrwl /proc/spl/kstat/zfs/$2/objset-0x* -e $3)
+               nunlinks=`cat $kstat_file | grep nunlinks | awk '{print $3}'`
+               nunlinked=`cat $kstat_file | grep nunlinked | awk '{print $3}'`
+               usize=$(($nunlinks - $nunlinked))
+               if [[ $iters == $MAX_ITERS && $usize == $1 ]]; then
+                       return 0
+               fi
+               if [[ $usize == $last_usize ]]; then
+                       (( iters++ ))
+               else
+                       iters=0
+               fi
+               last_usize=$usize
+       done
+
+       log_note "Unexpected unlinked set size: $last_usize, expected $1"
+       return 1
+}
+
+
+UNLINK_SP_PARAM=/sys/module/zfs/parameters/zfs_unlink_suspend_progress
+default_unlink_sp=$(get_tunable zfs_unlink_suspend_progress)
+
+log_onexit cleanup
+
+log_assert "Unlinked list drain does not hold up mounting of fs"
+
+for fs in 1 2 3; do
+       set -A xattrs on sa off
+       for xa in ${xattrs[@]}; do
+               # setup fs and ensure all deleted files got into unliked set
+               log_must mounted $TESTDIR.$fs
+
+               log_must zfs set xattr=$xa $TESTPOOL/$TESTFS.$fs
+
+               if [[ $xa == off ]]; then
+                       for fn in $(seq 1 175); do
+                               log_must mkfile 128k $TESTDIR.$fs/file-$fn
+                       done
+               else
+                       log_must xattrtest -f 175 -x 3 -r -k -p $TESTDIR.$fs
+               fi
+
+               log_must set_tunable32 zfs_unlink_suspend_progress 1
+               log_must unlinked_size_is 0 $TESTPOOL $TESTPOOL/$TESTFS.$fs
+
+               # build up unlinked set
+               for fn in $(seq 1 100); do
+                       log_must eval "rm $TESTDIR.$fs/file-$fn &"
+               done
+               log_must unlinked_size_is 100 $TESTPOOL $TESTPOOL/$TESTFS.$fs
+
+               # test that we can mount fs without emptying the unlinked list
+               log_must zfs umount $TESTPOOL/$TESTFS.$fs
+               log_must unmounted $TESTDIR.$fs
+               log_must zfs mount $TESTPOOL/$TESTFS.$fs
+               log_must mounted $TESTDIR.$fs
+               log_must unlinked_size_is 100 $TESTPOOL $TESTPOOL/$TESTFS.$fs
+
+               # confirm we can drain and add to unlinked set at the same time
+               log_must set_tunable32 zfs_unlink_suspend_progress 0
+               log_must zfs umount $TESTPOOL/$TESTFS.$fs
+               log_must zfs mount $TESTPOOL/$TESTFS.$fs
+               for fn in $(seq 101 175); do
+                       log_must eval "rm $TESTDIR.$fs/file-$fn &"
+               done
+               log_must unlinked_size_is 0 $TESTPOOL $TESTPOOL/$TESTFS.$fs
+       done
+done
+
+log_pass "Confirmed unlinked list drain does not hold up mounting of fs"