xfs: mark reclaimed inodes invalid earlier

author Dave Chinner <dchinner@redhat.com>

Wed, 18 May 2016 04:09:12 +0000 (14:09 +1000)

committer Dave Chinner <david@fromorbit.com>

Wed, 18 May 2016 04:09:12 +0000 (14:09 +1000)
author Dave Chinner <dchinner@redhat.com>
Wed, 18 May 2016 04:09:12 +0000 (14:09 +1000)
committer Dave Chinner <david@fromorbit.com>
Wed, 18 May 2016 04:09:12 +0000 (14:09 +1000)
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c

index 0c94cde41016aa4dcc43cde0d966a5b7b9cdea4b..57fcd5917a66a69ab61ef4eab647c9f422535eaa 100644 (file)
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -114,6 +114,18 @@ xfs_inode_free_callback(
         kmem_zone_free(xfs_inode_zone, ip);
  }
  
+static void
+__xfs_inode_free(
+       struct xfs_inode        *ip)
+{
+       /* asserts to verify all state is correct here */
+       ASSERT(atomic_read(&ip->i_pincount) == 0);
+       ASSERT(!xfs_isiflocked(ip));
+       XFS_STATS_DEC(ip->i_mount, vn_active);
+
+       call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback);
+}
+
  void
  xfs_inode_free(
         struct xfs_inode        *ip)
@@ -129,12 +141,7 @@ xfs_inode_free(
         ip->i_ino = 0;
         spin_unlock(&ip->i_flags_lock);
  
-       /* asserts to verify all state is correct here */
-       ASSERT(atomic_read(&ip->i_pincount) == 0);
-       ASSERT(!xfs_isiflocked(ip));
-       XFS_STATS_DEC(ip->i_mount, vn_active);
-
-       call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback);
+       __xfs_inode_free(ip);
  }
  
  /*
@@ -772,8 +779,7 @@ __xfs_inode_set_reclaim_tag(
         if (!pag->pag_ici_reclaimable) {
                 /* propagate the reclaim tag up into the perag radix tree */
                 spin_lock(&ip->i_mount->m_perag_lock);
-               radix_tree_tag_set(&ip->i_mount->m_perag_tree,
-                               XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
+               radix_tree_tag_set(&ip->i_mount->m_perag_tree, pag->pag_agno,
                                 XFS_ICI_RECLAIM_TAG);
                 spin_unlock(&ip->i_mount->m_perag_lock);
  
@@ -817,8 +823,7 @@ __xfs_inode_clear_reclaim(
         if (!pag->pag_ici_reclaimable) {
                 /* clear the reclaim tag from the perag radix tree */
                 spin_lock(&ip->i_mount->m_perag_lock);
-               radix_tree_tag_clear(&ip->i_mount->m_perag_tree,
-                               XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
+               radix_tree_tag_clear(&ip->i_mount->m_perag_tree, pag->pag_agno,
                                 XFS_ICI_RECLAIM_TAG);
                 spin_unlock(&ip->i_mount->m_perag_lock);
                 trace_xfs_perag_clear_reclaim(ip->i_mount, pag->pag_agno,
@@ -929,6 +934,7 @@ xfs_reclaim_inode(
         int                     sync_mode)
  {
         struct xfs_buf          *bp = NULL;
+       xfs_ino_t               ino = ip->i_ino; /* for radix_tree_delete */
         int                     error;
  
  restart:
@@ -993,6 +999,22 @@ restart:
  
         xfs_iflock(ip);
  reclaim:
+       /*
+        * Because we use RCU freeing we need to ensure the inode always appears
+        * to be reclaimed with an invalid inode number when in the free state.
+        * We do this as early as possible under the ILOCK and flush lock so
+        * that xfs_iflush_cluster() can be guaranteed to detect races with us
+        * here. By doing this, we guarantee that once xfs_iflush_cluster has
+        * locked both the XFS_ILOCK and the flush lock that it will see either
+        * a valid, flushable inode that will serialise correctly against the
+        * locks below, or it will see a clean (and invalid) inode that it can
+        * skip.
+        */
+       spin_lock(&ip->i_flags_lock);
+       ip->i_flags = XFS_IRECLAIM;
+       ip->i_ino = 0;
+       spin_unlock(&ip->i_flags_lock);
+
         xfs_ifunlock(ip);
         xfs_iunlock(ip, XFS_ILOCK_EXCL);
  
@@ -1006,7 +1028,7 @@ reclaim:
          */
         spin_lock(&pag->pag_ici_lock);
         if (!radix_tree_delete(&pag->pag_ici_root,
-                               XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino)))
+                               XFS_INO_TO_AGINO(ip->i_mount, ino)))
                 ASSERT(0);
         __xfs_inode_clear_reclaim(pag, ip);
         spin_unlock(&pag->pag_ici_lock);
@@ -1023,7 +1045,7 @@ reclaim:
         xfs_qm_dqdetach(ip);
         xfs_iunlock(ip, XFS_ILOCK_EXCL);
  
-       xfs_inode_free(ip);
+       __xfs_inode_free(ip);
         return error;
  
  out_ifunlock:
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c

index 3cbc9031731b4036971925722598fc7b5e622902..e3b27982b3b26f5b6c52acf5e315bf5613833486 100644 (file)
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -3239,6 +3239,19 @@ xfs_iflush_cluster(
                         continue;
                 }
  
+
+               /*
+                * Check the inode number again, just to be certain we are not
+                * racing with freeing in xfs_reclaim_inode(). See the comments
+                * in that function for more information as to why the initial
+                * check is not sufficient.
+                */
+               if (!iq->i_ino) {
+                       xfs_ifunlock(iq);
+                       xfs_iunlock(iq, XFS_ILOCK_SHARED);
+                       continue;
+               }
+
                 /*
                  * arriving here means that this inode can be flushed.  First
                  * re-check that it's dirty before flushing.
author	Dave Chinner <dchinner@redhat.com>
	Wed, 18 May 2016 04:09:12 +0000 (14:09 +1000)
committer	Dave Chinner <david@fromorbit.com>
	Wed, 18 May 2016 04:09:12 +0000 (14:09 +1000)
fs/xfs/xfs_icache.c		patch \| blob \| blame \| history
fs/xfs/xfs_inode.c		patch \| blob \| blame \| history