xfs: optimise away log forces on timestamp updates for fdatasync

author Dave Chinner <dchinner@redhat.com>

Tue, 3 Nov 2015 02:14:59 +0000 (13:14 +1100)

committer Dave Chinner <david@fromorbit.com>

Tue, 3 Nov 2015 02:14:59 +0000 (13:14 +1100)
author Dave Chinner <dchinner@redhat.com>
Tue, 3 Nov 2015 02:14:59 +0000 (13:14 +1100)
committer Dave Chinner <david@fromorbit.com>
Tue, 3 Nov 2015 02:14:59 +0000 (13:14 +1100)
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c

index e78feb400e22b22d59228b4f959d5b845848a675..c94699cbc667fd64954ca3ea2d2fe100e872a1c9 100644 (file)
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -242,19 +242,30 @@ xfs_file_fsync(
         }
  
         /*
-        * All metadata updates are logged, which means that we just have
-        * to flush the log up to the latest LSN that touched the inode.
+        * All metadata updates are logged, which means that we just have to
+        * flush the log up to the latest LSN that touched the inode. If we have
+        * concurrent fsync/fdatasync() calls, we need them to all block on the
+        * log force before we clear the ili_fsync_fields field. This ensures
+        * that we don't get a racing sync operation that does not wait for the
+        * metadata to hit the journal before returning. If we race with
+        * clearing the ili_fsync_fields, then all that will happen is the log
+        * force will do nothing as the lsn will already be on disk. We can't
+        * race with setting ili_fsync_fields because that is done under
+        * XFS_ILOCK_EXCL, and that can't happen because we hold the lock shared
+        * until after the ili_fsync_fields is cleared.
          */
         xfs_ilock(ip, XFS_ILOCK_SHARED);
         if (xfs_ipincount(ip)) {
                 if (!datasync ||
-                   (ip->i_itemp->ili_fields & ~XFS_ILOG_TIMESTAMP))
+                   (ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
                         lsn = ip->i_itemp->ili_last_lsn;
         }
-       xfs_iunlock(ip, XFS_ILOCK_SHARED);
  
-       if (lsn)
+       if (lsn) {
                 error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed);
+               ip->i_itemp->ili_fsync_fields = 0;
+       }
+       xfs_iunlock(ip, XFS_ILOCK_SHARED);
  
         /*
          * If we only have a single device, and the log force about was
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c

index dc40a6d5ae0dc909a79ef72917c5eb1551fae227..ff629d54470698004a655970c20f941bd04a6139 100644 (file)
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2365,6 +2365,7 @@ retry:
  
                         iip->ili_last_fields = iip->ili_fields;
                         iip->ili_fields = 0;
+                       iip->ili_fsync_fields = 0;
                         iip->ili_logged = 1;
                         xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
                                                 &iip->ili_item.li_lsn);
@@ -3560,6 +3561,7 @@ xfs_iflush_int(
          */
         iip->ili_last_fields = iip->ili_fields;
         iip->ili_fields = 0;
+       iip->ili_fsync_fields = 0;
         iip->ili_logged = 1;
  
         xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c

index 62bd80f4edd9aacb74c254c70b3e9de8618ef988..d14b12b8cfefb90f8fe4c92a0033a41cbde2e552 100644 (file)
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -719,6 +719,7 @@ xfs_iflush_abort(
                  * attempted.
                  */
                 iip->ili_fields = 0;
+               iip->ili_fsync_fields = 0;
         }
         /*
          * Release the inode's flush lock since we're done with it.
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h

index 488d81254e28a73e26fba494fd89d21139415ac6..4c7722e325b369332d381c8435edfc33728a4067 100644 (file)
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -34,6 +34,7 @@ typedef struct xfs_inode_log_item {
         unsigned short          ili_logged;        /* flushed logged data */
         unsigned int            ili_last_fields;   /* fields when flushed */
         unsigned int            ili_fields;        /* fields to be logged */
+       unsigned int            ili_fsync_fields;  /* logged since last fsync */
  } xfs_inode_log_item_t;
  
  static inline int xfs_inode_clean(xfs_inode_t *ip)
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c

index 17280cd71934b3eff4ea11f44639d763042238a2..b97f1df910abb0bd60ac5850fbe549237806d078 100644 (file)
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -107,6 +107,15 @@ xfs_trans_log_inode(
         ASSERT(ip->i_itemp != NULL);
         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
  
+       /*
+        * Record the specific change for fdatasync optimisation. This
+        * allows fdatasync to skip log forces for inodes that are only
+        * timestamp dirty. We do this before the change count so that
+        * the core being logged in this case does not impact on fdatasync
+        * behaviour.
+        */
+       ip->i_itemp->ili_fsync_fields |= flags;
+
         /*
          * First time we log the inode in a transaction, bump the inode change
          * counter if it is configured for this to occur. We don't use
author	Dave Chinner <dchinner@redhat.com>
	Tue, 3 Nov 2015 02:14:59 +0000 (13:14 +1100)
committer	Dave Chinner <david@fromorbit.com>
	Tue, 3 Nov 2015 02:14:59 +0000 (13:14 +1100)
fs/xfs/xfs_file.c		patch \| blob \| blame \| history
fs/xfs/xfs_inode.c		patch \| blob \| blame \| history
fs/xfs/xfs_inode_item.c		patch \| blob \| blame \| history
fs/xfs/xfs_inode_item.h		patch \| blob \| blame \| history
fs/xfs/xfs_trans_inode.c		patch \| blob \| blame \| history