]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/commitdiff
xfs: introduce an always_cow mode
authorChristoph Hellwig <hch@lst.de>
Mon, 18 Feb 2019 17:38:49 +0000 (09:38 -0800)
committerDarrick J. Wong <darrick.wong@oracle.com>
Thu, 21 Feb 2019 15:55:07 +0000 (07:55 -0800)
Add a mode where XFS never overwrites existing blocks in place.  This
is to aid debugging our COW code, and also put infatructure in place
for things like possible future support for zoned block devices, which
can't support overwrites.

This mode is enabled globally by doing a:

    echo 1 > /sys/fs/xfs/debug/always_cow

Note that the parameter is global to allow running all tests in xfstests
easily in this mode, which would not easily be possible with a per-fs
sysfs file.

In always_cow mode persistent preallocations are disabled, and fallocate
will fail when called with a 0 mode (with our without
FALLOC_FL_KEEP_SIZE), and not create unwritten extent for zeroed space
when called with FALLOC_FL_ZERO_RANGE or FALLOC_FL_UNSHARE_RANGE.

There are a few interesting xfstests failures when run in always_cow
mode:

 - generic/392 fails because the bytes used in the file used to test
   hole punch recovery are less after the log replay.  This is
   because the blocks written and then punched out are only freed
   with a delay due to the logging mechanism.
 - xfs/170 will fail as the already fragile file streams mechanism
   doesn't seem to interact well with the COW allocator
 - xfs/180 xfs/182 xfs/192 xfs/198 xfs/204 and xfs/208 will claim
   the file system is badly fragmented, but there is not much we
   can do to avoid that when always writing out of place
 - xfs/205 fails because overwriting a file in always_cow mode
   will require new space allocation and the assumption in the
   test thus don't work anymore.
 - xfs/326 fails to modify the file at all in always_cow mode after
   injecting the refcount error, leading to an unexpected md5sum
   after the remount, but that again is expected

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
fs/xfs/xfs_aops.c
fs/xfs/xfs_bmap_util.c
fs/xfs/xfs_file.c
fs/xfs/xfs_iomap.c
fs/xfs/xfs_mount.h
fs/xfs/xfs_reflink.c
fs/xfs/xfs_reflink.h
fs/xfs/xfs_super.c
fs/xfs/xfs_sysctl.h
fs/xfs/xfs_sysfs.c

index 983d11c27d3299908bbca6e66b79566ea53ad869..7b8bb6bde981028ad692fa07c87b0e4911bf3436 100644 (file)
@@ -1023,7 +1023,7 @@ xfs_vm_bmap(
         * Since we don't pass back blockdev info, we can't return bmap
         * information for rt files either.
         */
-       if (xfs_is_reflink_inode(ip) || XFS_IS_REALTIME_INODE(ip))
+       if (xfs_is_cow_inode(ip) || XFS_IS_REALTIME_INODE(ip))
                return 0;
        return iomap_bmap(mapping, block, &xfs_iomap_ops);
 }
index 1ee8c5539fa4f2e999808acc021d63be0b4963b0..2db43ff4f8b59d303b70a0dbf537a45bbf897706 100644 (file)
@@ -1162,16 +1162,13 @@ xfs_zero_file_space(
         * by virtue of the hole punch.
         */
        error = xfs_free_file_space(ip, offset, len);
-       if (error)
-               goto out;
+       if (error || xfs_is_always_cow_inode(ip))
+               return error;
 
-       error = xfs_alloc_file_space(ip, round_down(offset, blksize),
+       return xfs_alloc_file_space(ip, round_down(offset, blksize),
                                     round_up(offset + len, blksize) -
                                     round_down(offset, blksize),
                                     XFS_BMAPI_PREALLOC);
-out:
-       return error;
-
 }
 
 static int
index 1d07dcfbbff35e70f73672b340740cca93109372..770cc2edf777f4bb3ef6089986d5d49f75788ee4 100644 (file)
@@ -507,7 +507,7 @@ xfs_file_dio_aio_write(
                 * We can't properly handle unaligned direct I/O to reflink
                 * files yet, as we can't unshare a partial block.
                 */
-               if (xfs_is_reflink_inode(ip)) {
+               if (xfs_is_cow_inode(ip)) {
                        trace_xfs_reflink_bounce_dio_write(ip, iocb->ki_pos, count);
                        return -EREMCHG;
                }
@@ -872,14 +872,27 @@ xfs_file_fallocate(
                                goto out_unlock;
                }
 
-               if (mode & FALLOC_FL_ZERO_RANGE)
+               if (mode & FALLOC_FL_ZERO_RANGE) {
                        error = xfs_zero_file_space(ip, offset, len);
-               else {
-                       if (mode & FALLOC_FL_UNSHARE_RANGE) {
-                               error = xfs_reflink_unshare(ip, offset, len);
-                               if (error)
-                                       goto out_unlock;
+               } else if (mode & FALLOC_FL_UNSHARE_RANGE) {
+                       error = xfs_reflink_unshare(ip, offset, len);
+                       if (error)
+                               goto out_unlock;
+
+                       if (!xfs_is_always_cow_inode(ip)) {
+                               error = xfs_alloc_file_space(ip, offset, len,
+                                               XFS_BMAPI_PREALLOC);
                        }
+               } else {
+                       /*
+                        * If always_cow mode we can't use preallocations and
+                        * thus should not create them.
+                        */
+                       if (xfs_is_always_cow_inode(ip)) {
+                               error = -EOPNOTSUPP;
+                               goto out_unlock;
+                       }
+
                        error = xfs_alloc_file_space(ip, offset, len,
                                                     XFS_BMAPI_PREALLOC);
                }
index 0803ed76e296224f69f4e07d60b6157ec223ca00..01210eae8bf31479c65b424f150a5f0fa29946c9 100644 (file)
@@ -395,12 +395,13 @@ xfs_quota_calc_throttle(
 STATIC xfs_fsblock_t
 xfs_iomap_prealloc_size(
        struct xfs_inode        *ip,
+       int                     whichfork,
        loff_t                  offset,
        loff_t                  count,
        struct xfs_iext_cursor  *icur)
 {
        struct xfs_mount        *mp = ip->i_mount;
-       struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+       struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
        xfs_fileoff_t           offset_fsb = XFS_B_TO_FSBT(mp, offset);
        struct xfs_bmbt_irec    prev;
        int                     shift = 0;
@@ -593,7 +594,11 @@ xfs_file_iomap_begin_delay(
         * themselves.  Second the lookup in the extent list is generally faster
         * than going out to the shared extent tree.
         */
-       if (xfs_is_reflink_inode(ip)) {
+       if (xfs_is_cow_inode(ip)) {
+               if (!ip->i_cowfp) {
+                       ASSERT(!xfs_is_reflink_inode(ip));
+                       xfs_ifork_init_cow(ip);
+               }
                cow_eof = !xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb,
                                &ccur, &cmap);
                if (!cow_eof && cmap.br_startoff <= offset_fsb) {
@@ -609,7 +614,7 @@ xfs_file_iomap_begin_delay(
                 * overwriting shared extents.   This includes zeroing of
                 * existing extents that contain data.
                 */
-               if (!xfs_is_reflink_inode(ip) ||
+               if (!xfs_is_cow_inode(ip) ||
                    ((flags & IOMAP_ZERO) && imap.br_state != XFS_EXT_NORM)) {
                        trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK,
                                        &imap);
@@ -619,7 +624,7 @@ xfs_file_iomap_begin_delay(
                xfs_trim_extent(&imap, offset_fsb, end_fsb - offset_fsb);
 
                /* Trim the mapping to the nearest shared extent boundary. */
-               error = xfs_reflink_trim_around_shared(ip, &imap, &shared);
+               error = xfs_inode_need_cow(ip, &imap, &shared);
                if (error)
                        goto out_unlock;
 
@@ -648,15 +653,18 @@ xfs_file_iomap_begin_delay(
                 */
                count = min_t(loff_t, count, 1024 * PAGE_SIZE);
                end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb);
+
+               if (xfs_is_always_cow_inode(ip))
+                       whichfork = XFS_COW_FORK;
        }
 
        error = xfs_qm_dqattach_locked(ip, false);
        if (error)
                goto out_unlock;
 
-       if (eof && whichfork == XFS_DATA_FORK) {
-               prealloc_blocks = xfs_iomap_prealloc_size(ip, offset, count,
-                               &icur);
+       if (eof) {
+               prealloc_blocks = xfs_iomap_prealloc_size(ip, whichfork, offset,
+                               count, &icur);
                if (prealloc_blocks) {
                        xfs_extlen_t    align;
                        xfs_off_t       end_offset;
@@ -867,7 +875,7 @@ xfs_ilock_for_iomap(
         * COW writes may allocate delalloc space or convert unwritten COW
         * extents, so we need to make sure to take the lock exclusively here.
         */
-       if (xfs_is_reflink_inode(ip) && is_write) {
+       if (xfs_is_cow_inode(ip) && is_write) {
                /*
                 * FIXME: It could still overwrite on unshared extents and not
                 * need allocation.
@@ -901,7 +909,7 @@ relock:
         * check, so if we got ILOCK_SHARED for a write and but we're now a
         * reflink inode we have to switch to ILOCK_EXCL and relock.
         */
-       if (mode == XFS_ILOCK_SHARED && is_write && xfs_is_reflink_inode(ip)) {
+       if (mode == XFS_ILOCK_SHARED && is_write && xfs_is_cow_inode(ip)) {
                xfs_iunlock(ip, mode);
                mode = XFS_ILOCK_EXCL;
                goto relock;
@@ -973,7 +981,7 @@ xfs_file_iomap_begin(
         * Break shared extents if necessary. Checks for non-blocking IO have
         * been done up front, so we don't need to do them here.
         */
-       if (xfs_is_reflink_inode(ip)) {
+       if (xfs_is_cow_inode(ip)) {
                struct xfs_bmbt_irec    orig = imap;
 
                /* if zeroing doesn't need COW allocation, then we are done. */
index 864ecf27aa75250b0978391b6a83389a9842cf5d..110f927cf943dbc9cdb9858c37cb51ad92c7d07d 100644 (file)
@@ -194,6 +194,7 @@ typedef struct xfs_mount {
         */
        uint32_t                m_generation;
 
+       bool                    m_always_cow;
        bool                    m_fail_unmount;
 #ifdef DEBUG
        /*
index f84b37fa4f1757fea11fae869b985f5f6c668b6f..e2d9179bd50dfb9ac09f9a1ed39db23d0fb9dff1 100644 (file)
@@ -192,7 +192,7 @@ xfs_reflink_trim_around_shared(
        int                     error = 0;
 
        /* Holes, unwritten, and delalloc extents cannot be shared */
-       if (!xfs_is_reflink_inode(ip) || !xfs_bmap_is_real_extent(irec)) {
+       if (!xfs_is_cow_inode(ip) || !xfs_bmap_is_real_extent(irec)) {
                *shared = false;
                return 0;
        }
@@ -234,6 +234,23 @@ xfs_reflink_trim_around_shared(
        }
 }
 
+bool
+xfs_inode_need_cow(
+       struct xfs_inode        *ip,
+       struct xfs_bmbt_irec    *imap,
+       bool                    *shared)
+{
+       /* We can't update any real extents in always COW mode. */
+       if (xfs_is_always_cow_inode(ip) &&
+           !isnullstartblock(imap->br_startblock)) {
+               *shared = true;
+               return 0;
+       }
+
+       /* Trim the mapping to the nearest shared extent boundary. */
+       return xfs_reflink_trim_around_shared(ip, imap, shared);
+}
+
 static int
 xfs_reflink_convert_cow_locked(
        struct xfs_inode        *ip,
@@ -321,7 +338,7 @@ xfs_find_trim_cow_extent(
        if (got.br_startoff > offset_fsb) {
                xfs_trim_extent(imap, imap->br_startoff,
                                got.br_startoff - imap->br_startoff);
-               return xfs_reflink_trim_around_shared(ip, imap, shared);
+               return xfs_inode_need_cow(ip, imap, shared);
        }
 
        *shared = true;
@@ -356,7 +373,10 @@ xfs_reflink_allocate_cow(
        xfs_extlen_t            resblks = 0;
 
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-       ASSERT(xfs_is_reflink_inode(ip));
+       if (!ip->i_cowfp) {
+               ASSERT(!xfs_is_reflink_inode(ip));
+               xfs_ifork_init_cow(ip);
+       }
 
        error = xfs_find_trim_cow_extent(ip, imap, shared, &found);
        if (error || !*shared)
@@ -542,7 +562,7 @@ xfs_reflink_cancel_cow_range(
        int                     error;
 
        trace_xfs_reflink_cancel_cow_range(ip, offset, count);
-       ASSERT(xfs_is_reflink_inode(ip));
+       ASSERT(ip->i_cowfp);
 
        offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
        if (count == NULLFILEOFF)
index 4a9e3cd4768ada32afadd116bfee527043daa65c..2a3052fbe23e1e6ec21eeb41a565f45335c1e627 100644 (file)
@@ -6,11 +6,24 @@
 #ifndef __XFS_REFLINK_H
 #define __XFS_REFLINK_H 1
 
+static inline bool xfs_is_always_cow_inode(struct xfs_inode *ip)
+{
+       return ip->i_mount->m_always_cow &&
+               xfs_sb_version_hasreflink(&ip->i_mount->m_sb);
+}
+
+static inline bool xfs_is_cow_inode(struct xfs_inode *ip)
+{
+       return xfs_is_reflink_inode(ip) || xfs_is_always_cow_inode(ip);
+}
+
 extern int xfs_reflink_find_shared(struct xfs_mount *mp, struct xfs_trans *tp,
                xfs_agnumber_t agno, xfs_agblock_t agbno, xfs_extlen_t aglen,
                xfs_agblock_t *fbno, xfs_extlen_t *flen, bool find_maximal);
 extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip,
                struct xfs_bmbt_irec *irec, bool *shared);
+bool xfs_inode_need_cow(struct xfs_inode *ip, struct xfs_bmbt_irec *imap,
+               bool *shared);
 
 extern int xfs_reflink_allocate_cow(struct xfs_inode *ip,
                struct xfs_bmbt_irec *imap, bool *shared, uint *lockmode,
index 08033ac040d6cf743e23536cd0484b06ce29c358..f093ea244849eb96d31eeaea0f292796f893c778 100644 (file)
@@ -1736,11 +1736,18 @@ xfs_fs_fill_super(
                }
        }
 
-       if (xfs_sb_version_hasreflink(&mp->m_sb) && mp->m_sb.sb_rblocks) {
-               xfs_alert(mp,
+       if (xfs_sb_version_hasreflink(&mp->m_sb)) {
+               if (mp->m_sb.sb_rblocks) {
+                       xfs_alert(mp,
        "reflink not compatible with realtime device!");
-               error = -EINVAL;
-               goto out_filestream_unmount;
+                       error = -EINVAL;
+                       goto out_filestream_unmount;
+               }
+
+               if (xfs_globals.always_cow) {
+                       xfs_info(mp, "using DEBUG-only always_cow mode.");
+                       mp->m_always_cow = true;
+               }
        }
 
        if (xfs_sb_version_hasrmapbt(&mp->m_sb) && mp->m_sb.sb_rblocks) {
index 168488130a1906e34e1181da624891e20b73436a..ad7f9be130872c9e0664780115ba496bfd49e885 100644 (file)
@@ -85,6 +85,7 @@ struct xfs_globals {
        int     log_recovery_delay;     /* log recovery delay (secs) */
        int     mount_delay;            /* mount setup delay (secs) */
        bool    bug_on_assert;          /* BUG() the kernel on assert failure */
+       bool    always_cow;             /* use COW fork for all overwrites */
 };
 extern struct xfs_globals      xfs_globals;
 
index cd6a994a72500ac48755549db33d5510e3c36fdc..cabda13f3c64168a7a33d01e37bf895f9e4a07a4 100644 (file)
@@ -183,10 +183,34 @@ mount_delay_show(
 }
 XFS_SYSFS_ATTR_RW(mount_delay);
 
+static ssize_t
+always_cow_store(
+       struct kobject  *kobject,
+       const char      *buf,
+       size_t          count)
+{
+       ssize_t         ret;
+
+       ret = kstrtobool(buf, &xfs_globals.always_cow);
+       if (ret < 0)
+               return ret;
+       return count;
+}
+
+static ssize_t
+always_cow_show(
+       struct kobject  *kobject,
+       char            *buf)
+{
+       return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.always_cow);
+}
+XFS_SYSFS_ATTR_RW(always_cow);
+
 static struct attribute *xfs_dbg_attrs[] = {
        ATTR_LIST(bug_on_assert),
        ATTR_LIST(log_recovery_delay),
        ATTR_LIST(mount_delay),
+       ATTR_LIST(always_cow),
        NULL,
 };