]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blobdiff - fs/xfs/xfs_vfsops.c
[XFS] Sleeping with the ilock waiting for I/O completion is Bad.
[mirror_ubuntu-bionic-kernel.git] / fs / xfs / xfs_vfsops.c
index 62336a4cc5a4c72339932fdc91dcf8dd2323f38a..92c1425d06cec2e537e00c8ec9797e41429d3bc3 100644 (file)
@@ -640,7 +640,7 @@ xfs_quiesce_fs(
         * we can write the unmount record.
         */
        do {
-               xfs_syncsub(mp, SYNC_REMOUNT|SYNC_ATTR|SYNC_WAIT, 0, NULL);
+               xfs_syncsub(mp, SYNC_REMOUNT|SYNC_ATTR|SYNC_WAIT, NULL);
                pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
                if (!pincount) {
                        delay(50);
@@ -696,7 +696,7 @@ xfs_unmount_flush(
        bhv_vnode_t     *rvp = XFS_ITOV(rip);
        int             error;
 
-       xfs_ilock(rip, XFS_ILOCK_EXCL);
+       xfs_ilock(rip, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
        xfs_iflock(rip);
 
        /*
@@ -806,7 +806,7 @@ xfs_statvfs(
 
        statp->f_type = XFS_SB_MAGIC;
 
-       xfs_icsb_sync_counters_lazy(mp);
+       xfs_icsb_sync_counters_flags(mp, XFS_ICSB_LAZY_COUNT);
        s = XFS_SB_LOCK(mp);
        statp->f_bsize = sbp->sb_blocksize;
        lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
@@ -872,6 +872,10 @@ xfs_statvfs(
  *                    this by simply making sure the log gets flushed
  *                    if SYNC_BDFLUSH is set, and by actually writing it
  *                    out otherwise.
+ *     SYNC_IOWAIT  - The caller wants us to wait for all data I/O to complete
+ *                    before we return (including direct I/O). Forms the drain
+ *                    side of the write barrier needed to safely quiesce the
+ *                    filesystem.
  *
  */
 /*ARGSUSED*/
@@ -883,27 +887,20 @@ xfs_sync(
 {
        xfs_mount_t     *mp = XFS_BHVTOM(bdp);
 
-       if (unlikely(flags == SYNC_QUIESCE))
-               return xfs_quiesce_fs(mp);
-       else
-               return xfs_syncsub(mp, flags, 0, NULL);
+       return xfs_syncsub(mp, flags, NULL);
 }
 
 /*
  * xfs sync routine for internal use
  *
  * This routine supports all of the flags defined for the generic vfs_sync
- * interface as explained above under xfs_sync.  In the interests of not
- * changing interfaces within the 6.5 family, additional internally-
- * required functions are specified within a separate xflags parameter,
- * only available by calling this routine.
+ * interface as explained above under xfs_sync.
  *
  */
 int
 xfs_sync_inodes(
        xfs_mount_t     *mp,
        int             flags,
-       int             xflags,
        int             *bypassed)
 {
        xfs_inode_t     *ip = NULL;
@@ -1131,50 +1128,40 @@ xfs_sync_inodes(
                 * in the inode list.
                 */
 
-               if ((flags & SYNC_CLOSE)  && (vp != NULL)) {
-                       /*
-                        * This is the shutdown case.  We just need to
-                        * flush and invalidate all the pages associated
-                        * with the inode.  Drop the inode lock since
-                        * we can't hold it across calls to the buffer
-                        * cache.
-                        *
-                        * We don't set the VREMAPPING bit in the vnode
-                        * here, because we don't hold the vnode lock
-                        * exclusively.  It doesn't really matter, though,
-                        * because we only come here when we're shutting
-                        * down anyway.
-                        */
-                       xfs_iunlock(ip, XFS_ILOCK_SHARED);
-
-                       if (XFS_FORCED_SHUTDOWN(mp)) {
-                               bhv_vop_toss_pages(vp, 0, -1, FI_REMAPF);
-                       } else {
-                               bhv_vop_flushinval_pages(vp, 0, -1, FI_REMAPF);
+               /*
+                * If we have to flush data or wait for I/O completion
+                * we need to drop the ilock that we currently hold.
+                * If we need to drop the lock, insert a marker if we
+                * have not already done so.
+                */
+               if ((flags & (SYNC_CLOSE|SYNC_IOWAIT)) ||
+                   ((flags & SYNC_DELWRI) && VN_DIRTY(vp))) {
+                       if (mount_locked) {
+                               IPOINTER_INSERT(ip, mp);
                        }
+                       xfs_iunlock(ip, XFS_ILOCK_SHARED);
 
-                       xfs_ilock(ip, XFS_ILOCK_SHARED);
-
-               } else if ((flags & SYNC_DELWRI) && (vp != NULL)) {
-                       if (VN_DIRTY(vp)) {
-                               /* We need to have dropped the lock here,
-                                * so insert a marker if we have not already
-                                * done so.
-                                */
-                               if (mount_locked) {
-                                       IPOINTER_INSERT(ip, mp);
-                               }
-
-                               /*
-                                * Drop the inode lock since we can't hold it
-                                * across calls to the buffer cache.
-                                */
-                               xfs_iunlock(ip, XFS_ILOCK_SHARED);
+                       if (flags & SYNC_CLOSE) {
+                               /* Shutdown case. Flush and invalidate. */
+                               if (XFS_FORCED_SHUTDOWN(mp))
+                                       bhv_vop_toss_pages(vp, 0, -1, FI_REMAPF);
+                               else
+                                       error = bhv_vop_flushinval_pages(vp, 0,
+                                                               -1, FI_REMAPF);
+                       } else if ((flags & SYNC_DELWRI) && VN_DIRTY(vp)) {
                                error = bhv_vop_flush_pages(vp, (xfs_off_t)0,
                                                        -1, fflag, FI_NONE);
-                               xfs_ilock(ip, XFS_ILOCK_SHARED);
                        }
 
+                       /*
+                        * When freezing, we need to wait ensure all I/O (including direct
+                        * I/O) is complete to ensure no further data modification can take
+                        * place after this point
+                        */
+                       if (flags & SYNC_IOWAIT)
+                               vn_iowait(vp);
+
+                       xfs_ilock(ip, XFS_ILOCK_SHARED);
                }
 
                if (flags & SYNC_BDFLUSH) {
@@ -1412,17 +1399,13 @@ xfs_sync_inodes(
  * xfs sync routine for internal use
  *
  * This routine supports all of the flags defined for the generic vfs_sync
- * interface as explained above under xfs_sync.  In the interests of not
- * changing interfaces within the 6.5 family, additional internally-
- * required functions are specified within a separate xflags parameter,
- * only available by calling this routine.
+ * interface as explained above under xfs_sync.
  *
  */
 int
 xfs_syncsub(
        xfs_mount_t     *mp,
        int             flags,
-       int             xflags,
        int             *bypassed)
 {
        int             error = 0;
@@ -1444,7 +1427,7 @@ xfs_syncsub(
                if (flags & SYNC_BDFLUSH)
                        xfs_finish_reclaim_all(mp, 1);
                else
-                       error = xfs_sync_inodes(mp, flags, xflags, bypassed);
+                       error = xfs_sync_inodes(mp, flags, bypassed);
        }
 
        /*
@@ -1539,7 +1522,7 @@ xfs_syncsub(
                xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
                xfs_trans_ihold(tp, ip);
                xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-               error = xfs_trans_commit(tp, 0, NULL);
+               error = xfs_trans_commit(tp, 0);
                xfs_iunlock(ip, XFS_ILOCK_EXCL);
                xfs_log_force(mp, (xfs_lsn_t)0, log_flags);
        }
@@ -1958,15 +1941,26 @@ xfs_showargs(
        return 0;
 }
 
+/*
+ * Second stage of a freeze. The data is already frozen, now we have to take
+ * care of the metadata. New transactions are already blocked, so we need to
+ * wait for any remaining transactions to drain out before proceding.
+ */
 STATIC void
 xfs_freeze(
        bhv_desc_t      *bdp)
 {
        xfs_mount_t     *mp = XFS_BHVTOM(bdp);
 
+       /* wait for all modifications to complete */
        while (atomic_read(&mp->m_active_trans) > 0)
                delay(100);
 
+       /* flush inodes and push all remaining buffers out to disk */
+       xfs_quiesce_fs(mp);
+
+       ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0);
+
        /* Push the superblock and write an unmount record */
        xfs_log_unmount_write(mp);
        xfs_unmountfs_writesb(mp);