Restructure per-filesystem reclaim

author Brian Behlendorf <behlendorf1@llnl.gov>

Tue, 17 Mar 2015 22:07:47 +0000 (15:07 -0700)

committer Brian Behlendorf <behlendorf1@llnl.gov>

Fri, 20 Mar 2015 17:35:20 +0000 (10:35 -0700)
author Brian Behlendorf <behlendorf1@llnl.gov>
Tue, 17 Mar 2015 22:07:47 +0000 (15:07 -0700)
committer Brian Behlendorf <behlendorf1@llnl.gov>
Fri, 20 Mar 2015 17:35:20 +0000 (10:35 -0700)
diff --git a/include/sys/zfs_vfsops.h b/include/sys/zfs_vfsops.h

index eeeffbe4c72c14a0c0ffd90071f043845e3ea250..4b88260de2b91f01f6e561e5ba741a4abde916e9 100644 (file)
--- a/include/sys/zfs_vfsops.h
+++ b/include/sys/zfs_vfsops.h
@@ -73,6 +73,7 @@ typedef struct zfs_sb {
         uint64_t        z_nr_znodes;    /* number of znodes in the fs */
         unsigned long   z_rollback_time; /* last online rollback time */
         kmutex_t        z_znodes_lock;  /* lock for z_all_znodes */
+       arc_prune_t     *z_arc_prune;   /* called by ARC to prune caches */
         struct inode    *z_ctldir;      /* .zfs directory inode */
         avl_tree_t      z_ctldir_snaps; /* .zfs/snapshot entries */
         kmutex_t        z_ctldir_lock;  /* .zfs ctldir lock */
diff --git a/include/sys/zpl.h b/include/sys/zpl.h

index 3fc5d979f76eb7d6427577bd6709ec86b145af6a..c7701aae57d09f0516ea88c8bea5c8ef7864d32e 100644 (file)
--- a/include/sys/zpl.h
+++ b/include/sys/zpl.h
@@ -63,7 +63,7 @@ extern const struct file_operations zpl_file_operations;
  extern const struct file_operations zpl_dir_file_operations;
  
  /* zpl_super.c */
-extern void zpl_prune_sbs(int64_t bytes_to_scan, void *private);
+extern void zpl_prune_sb(int64_t nr_to_scan, void *arg);
  
  typedef struct zpl_mount_data {
         const char *z_osname;   /* Dataset name */
diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5

index 321b6285cad67085699a4c52d3801e17bb9aa05b..4b3dc3666db0fd17a57cae3221005e7ccc9b9a8a 100644 (file)
--- a/man/man5/zfs-module-parameters.5
+++ b/man/man5/zfs-module-parameters.5
@@ -386,7 +386,11 @@ Use \fB1\fR for yes (default) and \fB0\fR to disable.
  \fBzfs_arc_meta_limit\fR (ulong)
  .ad
  .RS 12n
-Meta limit for arc size
+The maximum allowed size in bytes that meta data buffers are allowed to
+consume in the ARC.  When this limit is reached meta data buffers will
+be reclaimed even if the overall arc_c_max has not been reached.  This
+value defaults to 0 which indicates that 3/4 of the ARC may be used
+for meta data.
  .sp
  Default value: \fB0\fR.
  .RE
@@ -397,9 +401,14 @@ Default value: \fB0\fR.
  \fBzfs_arc_meta_prune\fR (int)
  .ad
  .RS 12n
-Bytes of meta data to prune
+The number of dentries and inodes to be scanned looking for entries
+which can be dropped.  This may be required when the ARC reaches the
+\fBzfs_arc_meta_limit\fR because dentries and inodes can pin buffers
+in the ARC.  Increasing this value will cause to dentry and inode caches
+to be pruned more aggressively.  Setting this value to 0 will disable
+pruning the inode and dentry caches.
  .sp
-Default value: \fB1,048,576\fR.
+Default value: \fB10,000\fR.
  .RE
  
  .sp
diff --git a/module/zfs/arc.c b/module/zfs/arc.c

index 188086767b42882a4bb6aa9f79a69312426dcd84..f9f0008c00b6a2c8ec608c367d3c0747c772e976 100644 (file)
--- a/module/zfs/arc.c
+++ b/module/zfs/arc.c
@@ -158,8 +158,8 @@ static kmutex_t             arc_reclaim_thr_lock;
  static kcondvar_t      arc_reclaim_thr_cv;     /* used to signal reclaim thr */
  static uint8_t         arc_thread_exit;
  
-/* number of bytes to prune from caches when at arc_meta_limit is reached */
-int zfs_arc_meta_prune = 1048576;
+/* number of objects to prune from caches when arc_meta_limit is reached */
+int zfs_arc_meta_prune = 10000;
  
  typedef enum arc_reclaim_strategy {
         ARC_RECLAIM_AGGR,               /* Aggressive reclaim strategy */
@@ -5607,7 +5607,7 @@ module_param(zfs_arc_meta_limit, ulong, 0644);
  MODULE_PARM_DESC(zfs_arc_meta_limit, "Meta limit for arc size");
  
  module_param(zfs_arc_meta_prune, int, 0644);
-MODULE_PARM_DESC(zfs_arc_meta_prune, "Bytes of meta data to prune");
+MODULE_PARM_DESC(zfs_arc_meta_prune, "Meta objects to scan for prune");
  
  module_param(zfs_arc_grow_retry, int, 0644);
  MODULE_PARM_DESC(zfs_arc_grow_retry, "Seconds before growing arc size");
diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c

index 4df324a68f88216b0984caacd8f6e44e773cda63..e98f4bf6a120f32070fba8764c33f52612d514b7 100644 (file)
--- a/module/zfs/zfs_vfsops.c
+++ b/module/zfs/zfs_vfsops.c
@@ -1068,29 +1068,52 @@ zfs_root(zfs_sb_t *zsb, struct inode **ipp)
  }
  EXPORT_SYMBOL(zfs_root);
  
-#if defined(HAVE_SHRINK) || defined(HAVE_SPLIT_SHRINKER_CALLBACK)
+/*
+ * The ARC has requested that the filesystem drop entries from the dentry
+ * and inode caches.  This can occur when the ARC needs to free meta data
+ * blocks but can't because they are all pinned by entries in these caches.
+ */
  int
  zfs_sb_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects)
  {
         zfs_sb_t *zsb = sb->s_fs_info;
+       int error = 0;
+#if defined(HAVE_SHRINK) || defined(HAVE_SPLIT_SHRINKER_CALLBACK)
         struct shrinker *shrinker = &sb->s_shrink;
         struct shrink_control sc = {
                 .nr_to_scan = nr_to_scan,
                 .gfp_mask = GFP_KERNEL,
         };
+#endif
  
         ZFS_ENTER(zsb);
-#ifdef HAVE_SPLIT_SHRINKER_CALLBACK
+
+#if defined(HAVE_SPLIT_SHRINKER_CALLBACK)
         *objects = (*shrinker->scan_objects)(shrinker, &sc);
-#else
+#elif defined(HAVE_SHRINK)
         *objects = (*shrinker->shrink)(shrinker, &sc);
+#else
+       /*
+        * Linux kernels older than 3.1 do not support a per-filesystem
+        * shrinker.  Therefore, we must fall back to the only available
+        * interface which is to discard all unused dentries and inodes.
+        * This behavior clearly isn't ideal but it's required so the ARC
+        * may free memory.  The performance impact is mitigated by the
+        * fact that the frequently accessed dentry and inode buffers will
+        * still be in the ARC making them relatively cheap to recreate.
+        */
+       *objects = 0;
+       shrink_dcache_parent(sb->s_root);
  #endif
         ZFS_EXIT(zsb);
  
-       return (0);
+       dprintf_ds(zsb->z_os->os_dsl_dataset,
+           "pruning, nr_to_scan=%lu objects=%d error=%d\n",
+           nr_to_scan, *objects, error);
+
+       return (error);
  }
  EXPORT_SYMBOL(zfs_sb_prune);
-#endif /* defined(HAVE_SHRINK) || defined(HAVE_SPLIT_SHRINKER_CALLBACK) */
  
  /*
   * Teardown the zfs_sb_t.
@@ -1286,6 +1309,8 @@ zfs_domount(struct super_block *sb, void *data, int silent)
  
         if (!zsb->z_issnap)
                 zfsctl_create(zsb);
+
+       zsb->z_arc_prune = arc_add_prune_callback(zpl_prune_sb, sb);
  out:
         if (error) {
                 dmu_objset_disown(zsb->z_os, zsb);
@@ -1324,6 +1349,7 @@ zfs_umount(struct super_block *sb)
         zfs_sb_t *zsb = sb->s_fs_info;
         objset_t *os;
  
+       arc_remove_prune_callback(zsb->z_arc_prune);
         VERIFY(zfs_sb_teardown(zsb, B_TRUE) == 0);
         os = zsb->z_os;
         bdi_destroy(sb->s_bdi);
@@ -1682,7 +1708,6 @@ zfs_init(void)
         zfs_znode_init();
         dmu_objset_register_type(DMU_OST_ZFS, zfs_space_delta_cb);
         register_filesystem(&zpl_fs_type);
-       (void) arc_add_prune_callback(zpl_prune_sbs, NULL);
  }
  
  void
diff --git a/module/zfs/zpl_super.c b/module/zfs/zpl_super.c

index 47cc2fcf46088bdcb4a3174cddc68cb5f3446a9b..ef0f9d311e38be8eb1c1f1a2ca485c380427d83a 100644 (file)
--- a/module/zfs/zpl_super.c
+++ b/module/zfs/zpl_super.c
@@ -109,6 +109,12 @@ zpl_evict_inode(struct inode *ip)
  
  #else
  
+static void
+zpl_drop_inode(struct inode *ip)
+{
+       generic_delete_inode(ip);
+}
+
  static void
  zpl_clear_inode(struct inode *ip)
  {
@@ -125,7 +131,6 @@ zpl_inode_delete(struct inode *ip)
         truncate_setsize(ip, 0);
         clear_inode(ip);
  }
-
  #endif /* HAVE_EVICT_INODE */
  
  static void
@@ -276,37 +281,13 @@ zpl_kill_sb(struct super_block *sb)
  #endif /* HAVE_S_INSTANCES_LIST_HEAD */
  }
  
-#if defined(HAVE_SHRINK) || defined(HAVE_SPLIT_SHRINKER_CALLBACK)
-/*
- * Linux 3.1 - 3.x API
- *
- * The Linux 3.1 API introduced per-sb cache shrinkers to replace the
- * global ones.  This allows us a mechanism to cleanly target a specific
- * zfs file system when the dnode and inode caches grow too large.
- *
- * In addition, the 3.0 kernel added the iterate_supers_type() helper
- * function which is used to safely walk all of the zfs file systems.
- */
-static void
-zpl_prune_sb(struct super_block *sb, void *arg)
-{
-       int objects = 0;
-       int error;
-
-       error = -zfs_sb_prune(sb, *(unsigned long *)arg, &objects);
-       ASSERT3S(error, <=, 0);
-}
-#endif /* defined(HAVE_SHRINK) || defined(HAVE_SPLIT_SHRINKER_CALLBACK) */
-
  void
-zpl_prune_sbs(int64_t bytes_to_scan, void *private)
+zpl_prune_sb(int64_t nr_to_scan, void *arg)
  {
-#if defined(HAVE_SHRINK) || defined(HAVE_SPLIT_SHRINKER_CALLBACK)
-       unsigned long nr_to_scan = (bytes_to_scan / sizeof (znode_t));
+       struct super_block *sb = (struct super_block *)arg;
+       int objects = 0;
  
-       iterate_supers_type(&zpl_fs_type, zpl_prune_sb, &nr_to_scan);
-       kmem_reap();
-#endif /* defined(HAVE_SHRINK) || defined(HAVE_SPLIT_SHRINKER_CALLBACK) */
+       (void) -zfs_sb_prune(sb, nr_to_scan, &objects);
  }
  
  #ifdef HAVE_NR_CACHED_OBJECTS
@@ -343,10 +324,10 @@ const struct super_operations zpl_super_operations = {
         .destroy_inode          = zpl_inode_destroy,
         .dirty_inode            = zpl_dirty_inode,
         .write_inode            = NULL,
-       .drop_inode             = NULL,
  #ifdef HAVE_EVICT_INODE
         .evict_inode            = zpl_evict_inode,
  #else
+       .drop_inode             = zpl_drop_inode,
         .clear_inode            = zpl_clear_inode,
         .delete_inode           = zpl_inode_delete,
  #endif /* HAVE_EVICT_INODE */
author	Brian Behlendorf <behlendorf1@llnl.gov>
	Tue, 17 Mar 2015 22:07:47 +0000 (15:07 -0700)
committer	Brian Behlendorf <behlendorf1@llnl.gov>
	Fri, 20 Mar 2015 17:35:20 +0000 (10:35 -0700)
include/sys/zfs_vfsops.h		patch \| blob \| blame \| history
include/sys/zpl.h		patch \| blob \| blame \| history
man/man5/zfs-module-parameters.5		patch \| blob \| blame \| history
module/zfs/arc.c		patch \| blob \| blame \| history
module/zfs/zfs_vfsops.c		patch \| blob \| blame \| history
module/zfs/zpl_super.c		patch \| blob \| blame \| history