port async unlinked drain from illumos-nexenta

author Alek P <alek-p@users.noreply.github.com>

Tue, 12 Feb 2019 18:41:15 +0000 (10:41 -0800)

committer Brian Behlendorf <behlendorf1@llnl.gov>

Tue, 12 Feb 2019 18:41:15 +0000 (10:41 -0800)
author Alek P <alek-p@users.noreply.github.com>
Tue, 12 Feb 2019 18:41:15 +0000 (10:41 -0800)
committer Brian Behlendorf <behlendorf1@llnl.gov>
Tue, 12 Feb 2019 18:41:15 +0000 (10:41 -0800)
diff --git a/include/sys/dataset_kstats.h b/include/sys/dataset_kstats.h

index 5dd9a8e61fe378c39ade464d9b27a36f018735ed..667d1b85fa2c1e6edd7c90dbe12af2a81529a1b3 100644 (file)
--- a/include/sys/dataset_kstats.h
+++ b/include/sys/dataset_kstats.h
@@ -21,6 +21,7 @@
  
  /*
   * Copyright (c) 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2018 Datto Inc.
   */
  
  #ifndef _SYS_DATASET_KSTATS_H
@@ -35,6 +36,8 @@ typedef struct dataset_aggsum_stats_t {
         aggsum_t das_nwritten;
         aggsum_t das_reads;
         aggsum_t das_nread;
+       aggsum_t das_nunlinks;
+       aggsum_t das_nunlinked;
  } dataset_aggsum_stats_t;
  
  typedef struct dataset_kstat_values {
@@ -43,6 +46,16 @@ typedef struct dataset_kstat_values {
         kstat_named_t dkv_nwritten;
         kstat_named_t dkv_reads;
         kstat_named_t dkv_nread;
+       /*
+        * nunlinks is initialized to the unlinked set size on mount and
+        * is incremented whenever a new entry is added to the unlinked set
+        */
+       kstat_named_t dkv_nunlinks;
+       /*
+        * nunlinked is initialized to zero on mount and is incremented when an
+        * entry is removed from the unlinked set
+        */
+       kstat_named_t dkv_nunlinked;
  } dataset_kstat_values_t;
  
  typedef struct dataset_kstats {
@@ -56,4 +69,7 @@ void dataset_kstats_destroy(dataset_kstats_t *);
  void dataset_kstats_update_write_kstats(dataset_kstats_t *, int64_t);
  void dataset_kstats_update_read_kstats(dataset_kstats_t *, int64_t);
  
+void dataset_kstats_update_nunlinks_kstat(dataset_kstats_t *, int64_t);
+void dataset_kstats_update_nunlinked_kstat(dataset_kstats_t *, int64_t);
+
  #endif /* _SYS_DATASET_KSTATS_H */
diff --git a/include/sys/dsl_pool.h b/include/sys/dsl_pool.h

index 56317cf733f91cc18f02e9ed16941aa616ca10b2..63ba3509a5326046d53e54b17a924db52bba3627 100644 (file)
--- a/include/sys/dsl_pool.h
+++ b/include/sys/dsl_pool.h
@@ -96,6 +96,7 @@ typedef struct dsl_pool {
         struct dsl_dataset *dp_origin_snap;
         uint64_t dp_root_dir_obj;
         struct taskq *dp_iput_taskq;
+       struct taskq *dp_unlinked_drain_taskq;
  
         /* No lock needed - sync context only */
         blkptr_t dp_meta_rootbp;
@@ -176,6 +177,7 @@ boolean_t dsl_pool_config_held(dsl_pool_t *dp);
  boolean_t dsl_pool_config_held_writer(dsl_pool_t *dp);
  
  taskq_t *dsl_pool_iput_taskq(dsl_pool_t *dp);
+taskq_t *dsl_pool_unlinked_drain_taskq(dsl_pool_t *dp);
  
  int dsl_pool_user_hold(dsl_pool_t *dp, uint64_t dsobj,
      const char *tag, uint64_t now, dmu_tx_t *tx);
diff --git a/include/sys/zfs_dir.h b/include/sys/zfs_dir.h

index 9ce3accfce7022c73d3e84b12e11c49e9a0c0612..bcd4ec2c1de5c94f602cda38b5f13c85feb17ca0 100644 (file)
--- a/include/sys/zfs_dir.h
+++ b/include/sys/zfs_dir.h
@@ -64,6 +64,7 @@ extern void zfs_dl_name_switch(zfs_dirlock_t *dl, char *new, char **old);
  extern boolean_t zfs_dirempty(znode_t *);
  extern void zfs_unlinked_add(znode_t *, dmu_tx_t *);
  extern void zfs_unlinked_drain(zfsvfs_t *zfsvfs);
+extern void zfs_unlinked_drain_stop_wait(zfsvfs_t *zfsvfs);
  extern int zfs_sticky_remove_access(znode_t *, znode_t *, cred_t *cr);
  extern int zfs_get_xattrdir(znode_t *, struct inode **, cred_t *, int);
  extern int zfs_make_xattrdir(znode_t *, vattr_t *, struct inode **, cred_t *);
diff --git a/include/sys/zfs_vfsops.h b/include/sys/zfs_vfsops.h

index 0a4f52f2f5b62953c94dae11081c4525122eecd0..cad0aaece4b20433c8952c8ca3b40982e89a15a8 100644 (file)
--- a/include/sys/zfs_vfsops.h
+++ b/include/sys/zfs_vfsops.h
@@ -117,6 +117,8 @@ struct zfsvfs {
         boolean_t       z_replay;       /* set during ZIL replay */
         boolean_t       z_use_sa;       /* version allow system attributes */
         boolean_t       z_xattr_sa;     /* allow xattrs to be stores as SA */
+       boolean_t       z_draining;     /* is true when drain is active */
+       boolean_t       z_drain_cancel; /* signal the unlinked drain to stop */
         uint64_t        z_version;      /* ZPL version */
         uint64_t        z_shares_dir;   /* hidden shares dir */
         dataset_kstats_t        z_kstat;        /* fs kstats */
@@ -132,6 +134,7 @@ struct zfsvfs {
         uint64_t        z_hold_size;    /* znode hold array size */
         avl_tree_t      *z_hold_trees;  /* znode hold trees */
         kmutex_t        *z_hold_locks;  /* znode hold locks */
+       taskqid_t       z_drain_task;   /* task id for the unlink drain task */
  };
  
  #define        ZSB_XATTR       0x0001          /* Enable user xattrs */
diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5

index 7dd333f0485620c75e7205b46c71c9129df45a7e..f6b04318307c7bd07262360a7748bc98ba9d6e0a 100644 (file)
--- a/man/man5/zfs-module-parameters.5
+++ b/man/man5/zfs-module-parameters.5
@@ -1149,6 +1149,21 @@ Rate limit delay zevents (which report slow I/Os) to this many per second.
  Default value: 20
  .RE
  
+.sp
+.ne 2
+.na
+\fBzfs_unlink_suspend_progress\fR (uint)
+.ad
+.RS 12n
+When enabled, files will not be asynchronously removed from the list of pending
+unlinks and the space they consume will be leaked. Once this option has been
+disabled and the dataset is remounted, the pending unlinks will be processed
+and the freed space returned to the pool.
+This option is used by the test suite to facilitate testing.
+.sp
+Uses \fB0\fR (default) to allow progress and \fB1\fR to pause progress.
+.RE
+
  .sp
  .ne 2
  .na
diff --git a/module/zfs/dataset_kstats.c b/module/zfs/dataset_kstats.c

index ac0ad84ed63f4b8e499a6cdf833e277b4dac5f9a..522825c42ccfbb2c7e0b2c3b9bc6fa3f92f48e53 100644 (file)
--- a/module/zfs/dataset_kstats.c
+++ b/module/zfs/dataset_kstats.c
@@ -21,6 +21,7 @@
  
  /*
   * Copyright (c) 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2018 Datto Inc.
   */
  
  #include <sys/dataset_kstats.h>
@@ -34,6 +35,8 @@ static dataset_kstat_values_t empty_dataset_kstats = {
         { "nwritten",   KSTAT_DATA_UINT64 },
         { "reads",      KSTAT_DATA_UINT64 },
         { "nread",      KSTAT_DATA_UINT64 },
+       { "nunlinks",   KSTAT_DATA_UINT64 },
+       { "nunlinked",  KSTAT_DATA_UINT64 },
  };
  
  static int
@@ -54,6 +57,10 @@ dataset_kstats_update(kstat_t *ksp, int rw)
             aggsum_value(&dk->dk_aggsums.das_reads);
         dkv->dkv_nread.value.ui64 =
             aggsum_value(&dk->dk_aggsums.das_nread);
+       dkv->dkv_nunlinks.value.ui64 =
+           aggsum_value(&dk->dk_aggsums.das_nunlinks);
+       dkv->dkv_nunlinked.value.ui64 =
+           aggsum_value(&dk->dk_aggsums.das_nunlinked);
  
         return (0);
  }
@@ -136,6 +143,8 @@ dataset_kstats_create(dataset_kstats_t *dk, objset_t *objset)
         aggsum_init(&dk->dk_aggsums.das_nwritten, 0);
         aggsum_init(&dk->dk_aggsums.das_reads, 0);
         aggsum_init(&dk->dk_aggsums.das_nread, 0);
+       aggsum_init(&dk->dk_aggsums.das_nunlinks, 0);
+       aggsum_init(&dk->dk_aggsums.das_nunlinked, 0);
  }
  
  void
@@ -156,6 +165,8 @@ dataset_kstats_destroy(dataset_kstats_t *dk)
         aggsum_fini(&dk->dk_aggsums.das_nwritten);
         aggsum_fini(&dk->dk_aggsums.das_reads);
         aggsum_fini(&dk->dk_aggsums.das_nread);
+       aggsum_fini(&dk->dk_aggsums.das_nunlinks);
+       aggsum_fini(&dk->dk_aggsums.das_nunlinked);
  }
  
  void
@@ -183,3 +194,21 @@ dataset_kstats_update_read_kstats(dataset_kstats_t *dk,
         aggsum_add(&dk->dk_aggsums.das_reads, 1);
         aggsum_add(&dk->dk_aggsums.das_nread, nread);
  }
+
+void
+dataset_kstats_update_nunlinks_kstat(dataset_kstats_t *dk, int64_t delta)
+{
+       if (dk->dk_kstats == NULL)
+               return;
+
+       aggsum_add(&dk->dk_aggsums.das_nunlinks, delta);
+}
+
+void
+dataset_kstats_update_nunlinked_kstat(dataset_kstats_t *dk, int64_t delta)
+{
+       if (dk->dk_kstats == NULL)
+               return;
+
+       aggsum_add(&dk->dk_aggsums.das_nunlinked, delta);
+}
diff --git a/module/zfs/dsl_pool.c b/module/zfs/dsl_pool.c

index 78e782c81e1fce121c725ea17203e5325ee9bd74..10e967ab91ed9218415c2cfa5db3c3628b20cfd4 100644 (file)
--- a/module/zfs/dsl_pool.c
+++ b/module/zfs/dsl_pool.c
@@ -223,6 +223,9 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg)
  
         dp->dp_iput_taskq = taskq_create("z_iput", max_ncpus, defclsyspri,
             max_ncpus * 8, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
+       dp->dp_unlinked_drain_taskq = taskq_create("z_unlinked_drain",
+           max_ncpus, defclsyspri, max_ncpus, INT_MAX,
+           TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
  
         return (dp);
  }
@@ -413,6 +416,7 @@ dsl_pool_close(dsl_pool_t *dp)
         rrw_destroy(&dp->dp_config_rwlock);
         mutex_destroy(&dp->dp_lock);
         cv_destroy(&dp->dp_spaceavail_cv);
+       taskq_destroy(dp->dp_unlinked_drain_taskq);
         taskq_destroy(dp->dp_iput_taskq);
         if (dp->dp_blkstats != NULL) {
                 mutex_destroy(&dp->dp_blkstats->zab_lock);
@@ -1097,6 +1101,12 @@ dsl_pool_iput_taskq(dsl_pool_t *dp)
         return (dp->dp_iput_taskq);
  }
  
+taskq_t *
+dsl_pool_unlinked_drain_taskq(dsl_pool_t *dp)
+{
+       return (dp->dp_unlinked_drain_taskq);
+}
+
  /*
   * Walk through the pool-wide zap object of temporary snapshot user holds
   * and release them.
diff --git a/module/zfs/zfs_dir.c b/module/zfs/zfs_dir.c

index bd173e7c357c8a6ad53463ad2d2a5ddf95620af2..63ac97754d375bb60c572242d8d9ee9d28927a95 100644 (file)
--- a/module/zfs/zfs_dir.c
+++ b/module/zfs/zfs_dir.c
@@ -458,26 +458,31 @@ zfs_unlinked_add(znode_t *zp, dmu_tx_t *tx)
  
         VERIFY3U(0, ==,
             zap_add_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx));
+
+       dataset_kstats_update_nunlinks_kstat(&zfsvfs->z_kstat, 1);
  }
  
  /*
   * Clean up any znodes that had no links when we either crashed or
   * (force) umounted the file system.
   */
-void
-zfs_unlinked_drain(zfsvfs_t *zfsvfs)
+static void
+zfs_unlinked_drain_task(void *arg)
  {
+       zfsvfs_t *zfsvfs = arg;
         zap_cursor_t    zc;
         zap_attribute_t zap;
         dmu_object_info_t doi;
         znode_t         *zp;
         int             error;
  
+       ASSERT3B(zfsvfs->z_draining, ==, B_TRUE);
+
         /*
          * Iterate over the contents of the unlinked set.
          */
         for (zap_cursor_init(&zc, zfsvfs->z_os, zfsvfs->z_unlinkedobj);
-           zap_cursor_retrieve(&zc, &zap) == 0;
+           zap_cursor_retrieve(&zc, &zap) == 0 && !zfsvfs->z_drain_cancel;
             zap_cursor_advance(&zc)) {
  
                 /*
@@ -507,9 +512,61 @@ zfs_unlinked_drain(zfsvfs_t *zfsvfs)
                         continue;
  
                 zp->z_unlinked = B_TRUE;
+
+               /*
+                * iput() is Linux's equivalent to illumos' VN_RELE(). It will
+                * decrement the inode's ref count and may cause the inode to be
+                * synchronously freed. We interrupt freeing of this inode, by
+                * checking the return value of dmu_objset_zfs_unmounting() in
+                * dmu_free_long_range(), when an unmount is requested.
+                */
                 iput(ZTOI(zp));
+               ASSERT3B(zfsvfs->z_unmounted, ==, B_FALSE);
         }
         zap_cursor_fini(&zc);
+
+       zfsvfs->z_draining = B_FALSE;
+       zfsvfs->z_drain_task = TASKQID_INVALID;
+}
+
+/*
+ * Sets z_draining then tries to dispatch async unlinked drain.
+ * If that fails executes synchronous unlinked drain.
+ */
+void
+zfs_unlinked_drain(zfsvfs_t *zfsvfs)
+{
+       ASSERT3B(zfsvfs->z_unmounted, ==, B_FALSE);
+       ASSERT3B(zfsvfs->z_draining, ==, B_FALSE);
+
+       zfsvfs->z_draining = B_TRUE;
+       zfsvfs->z_drain_cancel = B_FALSE;
+
+       zfsvfs->z_drain_task = taskq_dispatch(
+           dsl_pool_unlinked_drain_taskq(dmu_objset_pool(zfsvfs->z_os)),
+           zfs_unlinked_drain_task, zfsvfs, TQ_SLEEP);
+       if (zfsvfs->z_drain_task == TASKQID_INVALID) {
+               zfs_dbgmsg("async zfs_unlinked_drain dispatch failed");
+               zfs_unlinked_drain_task(zfsvfs);
+       }
+}
+
+/*
+ * Wait for the unlinked drain taskq task to stop. This will interrupt the
+ * unlinked set processing if it is in progress.
+ */
+void
+zfs_unlinked_drain_stop_wait(zfsvfs_t *zfsvfs)
+{
+       ASSERT3B(zfsvfs->z_unmounted, ==, B_FALSE);
+
+       if (zfsvfs->z_draining) {
+               zfsvfs->z_drain_cancel = B_TRUE;
+               taskq_cancel_id(dsl_pool_unlinked_drain_taskq(
+                   dmu_objset_pool(zfsvfs->z_os)), zfsvfs->z_drain_task);
+               zfsvfs->z_drain_task = TASKQID_INVALID;
+               zfsvfs->z_draining = B_FALSE;
+       }
  }
  
  /*
@@ -684,6 +741,8 @@ zfs_rmnode(znode_t *zp)
         VERIFY3U(0, ==,
             zap_remove_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx));
  
+       dataset_kstats_update_nunlinked_kstat(&zfsvfs->z_kstat, 1);
+
         zfs_znode_delete(zp, tx);
  
         dmu_tx_commit(tx);
diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c

index 766cbab7436fc372d2a557452b5ffb843ee470db..cdc1bc70758136fb47c2bb68dded69cf7d432f4b 100644 (file)
--- a/module/zfs/zfs_vfsops.c
+++ b/module/zfs/zfs_vfsops.c
@@ -1178,6 +1178,10 @@ zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os)
                 return (error);
         }
  
+       zfsvfs->z_drain_task = TASKQID_INVALID;
+       zfsvfs->z_draining = B_FALSE;
+       zfsvfs->z_drain_cancel = B_TRUE;
+
         *zfvp = zfsvfs;
         return (0);
  }
@@ -1200,14 +1204,27 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
          * operations out since we closed the ZIL.
          */
         if (mounting) {
+               ASSERT3P(zfsvfs->z_kstat.dk_kstats, ==, NULL);
+               dataset_kstats_create(&zfsvfs->z_kstat, zfsvfs->z_os);
+
                 /*
                  * During replay we remove the read only flag to
                  * allow replays to succeed.
                  */
-               if (readonly != 0)
+               if (readonly != 0) {
                         readonly_changed_cb(zfsvfs, B_FALSE);
-               else
+               } else {
+                       zap_stats_t zs;
+                       if (zap_get_stats(zfsvfs->z_os, zfsvfs->z_unlinkedobj,
+                           &zs) == 0) {
+                               dataset_kstats_update_nunlinks_kstat(
+                                   &zfsvfs->z_kstat, zs.zs_num_entries);
+                       }
+                       dprintf_ds(zfsvfs->z_os->os_dsl_dataset,
+                           "num_entries in unlinked set: %llu",
+                           zs.zs_num_entries);
                         zfs_unlinked_drain(zfsvfs);
+               }
  
                 /*
                  * Parse and replay the intent log.
@@ -1250,9 +1267,6 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
                 /* restore readonly bit */
                 if (readonly != 0)
                         readonly_changed_cb(zfsvfs, B_TRUE);
-
-               ASSERT3P(zfsvfs->z_kstat.dk_kstats, ==, NULL);
-               dataset_kstats_create(&zfsvfs->z_kstat, zfsvfs->z_os);
         }
  
         /*
@@ -1633,6 +1647,8 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
  {
         znode_t *zp;
  
+       zfs_unlinked_drain_stop_wait(zfsvfs);
+
         /*
          * If someone has not already unmounted this file system,
          * drain the iput_taskq to ensure all active references to the
@@ -1884,6 +1900,7 @@ zfs_preumount(struct super_block *sb)
  
         /* zfsvfs is NULL when zfs_domount fails during mount */
         if (zfsvfs) {
+               zfs_unlinked_drain_stop_wait(zfsvfs);
                 zfsctl_destroy(sb->s_fs_info);
                 /*
                  * Wait for iput_async before entering evict_inodes in
@@ -2159,6 +2176,15 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
         }
         mutex_exit(&zfsvfs->z_znodes_lock);
  
+       if (!zfs_is_readonly(zfsvfs) && !zfsvfs->z_unmounted) {
+               /*
+                * zfs_suspend_fs() could have interrupted freeing
+                * of dnodes. We need to restart this freeing so
+                * that we don't "leak" the space.
+                */
+               zfs_unlinked_drain(zfsvfs);
+       }
+
  bail:
         /* release the VFS ops */
         rw_exit(&zfsvfs->z_teardown_inactive_lock);
diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c

index 7b13927bea345d1e4c44461dc053dedf4be6fe6a..761fbcb33764301f028b0b851fc2559442236271 100644 (file)
--- a/module/zfs/zfs_znode.c
+++ b/module/zfs/zfs_znode.c
@@ -91,6 +91,12 @@ static kmem_cache_t *znode_cache = NULL;
  static kmem_cache_t *znode_hold_cache = NULL;
  unsigned int zfs_object_mutex_size = ZFS_OBJ_MTX_SZ;
  
+/*
+ * This is used by the test suite so that it can delay znodes from being
+ * freed in order to inspect the unlinked set.
+ */
+int zfs_unlink_suspend_progress = 0;
+
  /*
   * This callback is invoked when acquiring a RL_WRITER or RL_APPEND lock on
   * z_rangelock. It will modify the offset and length of the lock to reflect
@@ -1339,7 +1345,7 @@ zfs_zinactive(znode_t *zp)
          */
         if (zp->z_unlinked) {
                 ASSERT(!zfsvfs->z_issnap);
-               if (!zfs_is_readonly(zfsvfs)) {
+               if (!zfs_is_readonly(zfsvfs) && !zfs_unlink_suspend_progress) {
                         mutex_exit(&zp->z_lock);
                         zfs_znode_hold_exit(zfsvfs, zh);
                         zfs_rmnode(zp);
@@ -2214,4 +2220,7 @@ EXPORT_SYMBOL(zfs_obj_to_path);
  /* CSTYLED */
  module_param(zfs_object_mutex_size, uint, 0644);
  MODULE_PARM_DESC(zfs_object_mutex_size, "Size of znode hold array");
+module_param(zfs_unlink_suspend_progress, int, 0644);
+MODULE_PARM_DESC(zfs_unlink_suspend_progress, "Set to prevent async unlinks "
+"(debug - leaks space into the unlinked set)");
  #endif
diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run

index 8ab5e7033702f6ad2f07e7332f395f3e08296919..8663c24f904343f82299ba6c790dd5d11fd2ee5b 100644 (file)
--- a/tests/runfiles/linux.run
+++ b/tests/runfiles/linux.run
@@ -644,7 +644,7 @@ tests = ['mmp_on_thread', 'mmp_on_uberblocks', 'mmp_on_off', 'mmp_interval',
  tags = ['functional', 'mmp']
  
  [tests/functional/mount]
-tests = ['umount_001', 'umountall_001']
+tests = ['umount_001', 'umount_unlinked_drain', 'umountall_001']
  tags = ['functional', 'mount']
  
  [tests/functional/mv_files]
diff --git a/tests/zfs-tests/tests/functional/mount/Makefile.am b/tests/zfs-tests/tests/functional/mount/Makefile.am

index 9898e0510c15fa353919879d91c9f735b8f414ad..bdafa69badd85edcafd816d9b58d1dd2a21c1281 100644 (file)
--- a/tests/zfs-tests/tests/functional/mount/Makefile.am
+++ b/tests/zfs-tests/tests/functional/mount/Makefile.am
@@ -3,4 +3,5 @@ dist_pkgdata_SCRIPTS = \
         setup.ksh \
         cleanup.ksh \
         umount_001.ksh \
+       umount_unlinked_drain.ksh \
         umountall_001.ksh
diff --git a/tests/zfs-tests/tests/functional/mount/umount_unlinked_drain.ksh b/tests/zfs-tests/tests/functional/mount/umount_unlinked_drain.ksh

new file mode 100755 (executable)

index 0000000..0d26280
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/mount/umount_unlinked_drain.ksh
@@ -0,0 +1,119 @@
+#!/bin/ksh -p
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2018 Datto Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# Test async unlinked drain to ensure mounting is not held up when there are
+# entries in the unlinked set. We also try to test that the list is able to be
+# filled up and drained at the same time.
+#
+# STRATEGY:
+# 1. Use zfs_unlink_suspend_progress tunable to disable freeing to build up
+#    the unlinked set
+# 2. Make sure mount happens even when there are entries in the unlinked set
+# 3. Drain and build up the unlinked list at the same time to test for races
+#
+
+function cleanup
+{
+       log_must set_tunable32 zfs_unlink_suspend_progress $default_unlink_sp
+       for fs in $(seq 1 3); do
+               mounted $TESTDIR.$fs || zfs mount $TESTPOOL/$TESTFS.$fs
+               rm -f $TESTDIR.$fs/file-*
+               zfs set xattr=on $TESTPOOL/$TESTFS.$fs
+       done
+}
+
+function unlinked_size_is
+{
+       MAX_ITERS=5 # iteration to do before we consider reported number stable
+       iters=0
+       last_usize=0
+       while [[ $iters -le $MAX_ITERS ]]; do
+               kstat_file=$(grep -nrwl /proc/spl/kstat/zfs/$2/objset-0x* -e $3)
+               nunlinks=`cat $kstat_file | grep nunlinks | awk '{print $3}'`
+               nunlinked=`cat $kstat_file | grep nunlinked | awk '{print $3}'`
+               usize=$(($nunlinks - $nunlinked))
+               if [[ $iters == $MAX_ITERS && $usize == $1 ]]; then
+                       return 0
+               fi
+               if [[ $usize == $last_usize ]]; then
+                       (( iters++ ))
+               else
+                       iters=0
+               fi
+               last_usize=$usize
+       done
+
+       log_note "Unexpected unlinked set size: $last_usize, expected $1"
+       return 1
+}
+
+
+UNLINK_SP_PARAM=/sys/module/zfs/parameters/zfs_unlink_suspend_progress
+default_unlink_sp=$(get_tunable zfs_unlink_suspend_progress)
+
+log_onexit cleanup
+
+log_assert "Unlinked list drain does not hold up mounting of fs"
+
+for fs in 1 2 3; do
+       set -A xattrs on sa off
+       for xa in ${xattrs[@]}; do
+               # setup fs and ensure all deleted files got into unliked set
+               log_must mounted $TESTDIR.$fs
+
+               log_must zfs set xattr=$xa $TESTPOOL/$TESTFS.$fs
+
+               if [[ $xa == off ]]; then
+                       for fn in $(seq 1 175); do
+                               log_must mkfile 128k $TESTDIR.$fs/file-$fn
+                       done
+               else
+                       log_must xattrtest -f 175 -x 3 -r -k -p $TESTDIR.$fs
+               fi
+
+               log_must set_tunable32 zfs_unlink_suspend_progress 1
+               log_must unlinked_size_is 0 $TESTPOOL $TESTPOOL/$TESTFS.$fs
+
+               # build up unlinked set
+               for fn in $(seq 1 100); do
+                       log_must eval "rm $TESTDIR.$fs/file-$fn &"
+               done
+               log_must unlinked_size_is 100 $TESTPOOL $TESTPOOL/$TESTFS.$fs
+
+               # test that we can mount fs without emptying the unlinked list
+               log_must zfs umount $TESTPOOL/$TESTFS.$fs
+               log_must unmounted $TESTDIR.$fs
+               log_must zfs mount $TESTPOOL/$TESTFS.$fs
+               log_must mounted $TESTDIR.$fs
+               log_must unlinked_size_is 100 $TESTPOOL $TESTPOOL/$TESTFS.$fs
+
+               # confirm we can drain and add to unlinked set at the same time
+               log_must set_tunable32 zfs_unlink_suspend_progress 0
+               log_must zfs umount $TESTPOOL/$TESTFS.$fs
+               log_must zfs mount $TESTPOOL/$TESTFS.$fs
+               for fn in $(seq 101 175); do
+                       log_must eval "rm $TESTDIR.$fs/file-$fn &"
+               done
+               log_must unlinked_size_is 0 $TESTPOOL $TESTPOOL/$TESTFS.$fs
+       done
+done
+
+log_pass "Confirmed unlinked list drain does not hold up mounting of fs"
author	Alek P <alek-p@users.noreply.github.com>
	Tue, 12 Feb 2019 18:41:15 +0000 (10:41 -0800)
committer	Brian Behlendorf <behlendorf1@llnl.gov>
	Tue, 12 Feb 2019 18:41:15 +0000 (10:41 -0800)
include/sys/dataset_kstats.h		patch \| blob \| blame \| history
include/sys/dsl_pool.h		patch \| blob \| blame \| history
include/sys/zfs_dir.h		patch \| blob \| blame \| history
include/sys/zfs_vfsops.h		patch \| blob \| blame \| history
man/man5/zfs-module-parameters.5		patch \| blob \| blame \| history
module/zfs/dataset_kstats.c		patch \| blob \| blame \| history
module/zfs/dsl_pool.c		patch \| blob \| blame \| history
module/zfs/zfs_dir.c		patch \| blob \| blame \| history
module/zfs/zfs_vfsops.c		patch \| blob \| blame \| history
module/zfs/zfs_znode.c		patch \| blob \| blame \| history
tests/runfiles/linux.run		patch \| blob \| blame \| history
tests/zfs-tests/tests/functional/mount/Makefile.am		patch \| blob \| blame \| history
tests/zfs-tests/tests/functional/mount/umount_unlinked_drain.ksh	[new file with mode: 0755]	patch \| blob