]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/commitdiff
Merge branch 'xfs-4.10-misc-fixes-2' into iomap-4.10-directio
authorDave Chinner <david@fromorbit.com>
Wed, 30 Nov 2016 01:49:38 +0000 (12:49 +1100)
committerDave Chinner <david@fromorbit.com>
Wed, 30 Nov 2016 01:49:38 +0000 (12:49 +1100)
30 files changed:
fs/iomap.c
fs/xfs/libxfs/xfs_bmap.c
fs/xfs/libxfs/xfs_bmap.h
fs/xfs/libxfs/xfs_btree.c
fs/xfs/libxfs/xfs_defer.c
fs/xfs/libxfs/xfs_dir2.c
fs/xfs/libxfs/xfs_dquot_buf.c
fs/xfs/libxfs/xfs_format.h
fs/xfs/libxfs/xfs_inode_buf.c
fs/xfs/libxfs/xfs_inode_buf.h
fs/xfs/libxfs/xfs_inode_fork.c
fs/xfs/libxfs/xfs_inode_fork.h
fs/xfs/libxfs/xfs_types.h
fs/xfs/xfs_aops.c
fs/xfs/xfs_bmap_util.c
fs/xfs/xfs_buf.h
fs/xfs/xfs_file.c
fs/xfs/xfs_icache.c
fs/xfs/xfs_icreate_item.c
fs/xfs/xfs_inode.h
fs/xfs/xfs_inode_item.c
fs/xfs/xfs_ioctl.c
fs/xfs/xfs_iomap.c
fs/xfs/xfs_mount.c
fs/xfs/xfs_qm.c
fs/xfs/xfs_reflink.c
fs/xfs/xfs_reflink.h
fs/xfs/xfs_sysfs.c
fs/xfs/xfs_trace.h
include/linux/iomap.h

index 51a02573405e5b1ccba00d0cdf92b0895881371f..13dd413b2b9c6a52e4ff0a966aff6902fc29df9a 100644 (file)
@@ -433,8 +433,7 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
        struct page *page = data;
        int ret;
 
-       ret = __block_write_begin_int(page, pos & ~PAGE_MASK, length,
-                       NULL, iomap);
+       ret = __block_write_begin_int(page, pos, length, NULL, iomap);
        if (ret)
                return ret;
 
@@ -562,7 +561,7 @@ int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fi,
        }
 
        while (len > 0) {
-               ret = iomap_apply(inode, start, len, 0, ops, &ctx,
+               ret = iomap_apply(inode, start, len, IOMAP_REPORT, ops, &ctx,
                                iomap_fiemap_actor);
                /* inode with no (attribute) mapping will give ENOENT */
                if (ret == -ENOENT)
index c27344cf38e177187f048eb799297f281eea5a2f..6b7e6eb294145392c0d84b98ffe65a3da4d78ba6 100644 (file)
@@ -49,6 +49,8 @@
 #include "xfs_rmap.h"
 #include "xfs_ag_resv.h"
 #include "xfs_refcount.h"
+#include "xfs_rmap_btree.h"
+#include "xfs_icache.h"
 
 
 kmem_zone_t            *xfs_bmap_free_item_zone;
@@ -190,8 +192,12 @@ xfs_bmap_worst_indlen(
        int             maxrecs;        /* maximum record count at this level */
        xfs_mount_t     *mp;            /* mount structure */
        xfs_filblks_t   rval;           /* return value */
+       xfs_filblks_t   orig_len;
 
        mp = ip->i_mount;
+
+       /* Calculate the worst-case size of the bmbt. */
+       orig_len = len;
        maxrecs = mp->m_bmap_dmxr[0];
        for (level = 0, rval = 0;
             level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
@@ -199,12 +205,20 @@ xfs_bmap_worst_indlen(
                len += maxrecs - 1;
                do_div(len, maxrecs);
                rval += len;
-               if (len == 1)
-                       return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
+               if (len == 1) {
+                       rval += XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
                                level - 1;
+                       break;
+               }
                if (level == 0)
                        maxrecs = mp->m_bmap_dmxr[1];
        }
+
+       /* Calculate the worst-case size of the rmapbt. */
+       if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+               rval += 1 + xfs_rmapbt_calc_size(mp, orig_len) +
+                               mp->m_rmap_maxlevels;
+
        return rval;
 }
 
@@ -515,7 +529,7 @@ xfs_bmap_trace_exlist(
                state |= BMAP_ATTRFORK;
 
        ifp = XFS_IFORK_PTR(ip, whichfork);
-       ASSERT(cnt == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
+       ASSERT(cnt == xfs_iext_count(ifp));
        for (idx = 0; idx < cnt; idx++)
                trace_xfs_extlist(ip, idx, whichfork, caller_ip);
 }
@@ -811,7 +825,7 @@ try_another_ag:
                                XFS_BTREE_LONG_PTRS);
 
        arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
-       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+       nextents =  xfs_iext_count(ifp);
        for (cnt = i = 0; i < nextents; i++) {
                ep = xfs_iext_get_ext(ifp, i);
                if (!isnullstartblock(xfs_bmbt_get_startblock(ep))) {
@@ -1296,7 +1310,7 @@ xfs_bmap_read_extents(
        /*
         * Here with bp and block set to the leftmost leaf node in the tree.
         */
-       room = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+       room = xfs_iext_count(ifp);
        i = 0;
        /*
         * Loop over all leaf nodes.  Copy information to the extent records.
@@ -1361,7 +1375,7 @@ xfs_bmap_read_extents(
                        return error;
                block = XFS_BUF_TO_BLOCK(bp);
        }
-       ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
+       ASSERT(i == xfs_iext_count(ifp));
        ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork));
        XFS_BMAP_TRACE_EXLIST(ip, i, whichfork);
        return 0;
@@ -1370,97 +1384,6 @@ error0:
        return -EFSCORRUPTED;
 }
 
-
-/*
- * Search the extent records for the entry containing block bno.
- * If bno lies in a hole, point to the next entry.  If bno lies
- * past eof, *eofp will be set, and *prevp will contain the last
- * entry (null if none).  Else, *lastxp will be set to the index
- * of the found entry; *gotp will contain the entry.
- */
-STATIC xfs_bmbt_rec_host_t *           /* pointer to found extent entry */
-xfs_bmap_search_multi_extents(
-       xfs_ifork_t     *ifp,           /* inode fork pointer */
-       xfs_fileoff_t   bno,            /* block number searched for */
-       int             *eofp,          /* out: end of file found */
-       xfs_extnum_t    *lastxp,        /* out: last extent index */
-       xfs_bmbt_irec_t *gotp,          /* out: extent entry found */
-       xfs_bmbt_irec_t *prevp)         /* out: previous extent entry found */
-{
-       xfs_bmbt_rec_host_t *ep;                /* extent record pointer */
-       xfs_extnum_t    lastx;          /* last extent index */
-
-       /*
-        * Initialize the extent entry structure to catch access to
-        * uninitialized br_startblock field.
-        */
-       gotp->br_startoff = 0xffa5a5a5a5a5a5a5LL;
-       gotp->br_blockcount = 0xa55a5a5a5a5a5a5aLL;
-       gotp->br_state = XFS_EXT_INVALID;
-       gotp->br_startblock = 0xffffa5a5a5a5a5a5LL;
-       prevp->br_startoff = NULLFILEOFF;
-
-       ep = xfs_iext_bno_to_ext(ifp, bno, &lastx);
-       if (lastx > 0) {
-               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx - 1), prevp);
-       }
-       if (lastx < (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) {
-               xfs_bmbt_get_all(ep, gotp);
-               *eofp = 0;
-       } else {
-               if (lastx > 0) {
-                       *gotp = *prevp;
-               }
-               *eofp = 1;
-               ep = NULL;
-       }
-       *lastxp = lastx;
-       return ep;
-}
-
-/*
- * Search the extents list for the inode, for the extent containing bno.
- * If bno lies in a hole, point to the next entry.  If bno lies past eof,
- * *eofp will be set, and *prevp will contain the last entry (null if none).
- * Else, *lastxp will be set to the index of the found
- * entry; *gotp will contain the entry.
- */
-xfs_bmbt_rec_host_t *                 /* pointer to found extent entry */
-xfs_bmap_search_extents(
-       xfs_inode_t     *ip,            /* incore inode pointer */
-       xfs_fileoff_t   bno,            /* block number searched for */
-       int             fork,           /* data or attr fork */
-       int             *eofp,          /* out: end of file found */
-       xfs_extnum_t    *lastxp,        /* out: last extent index */
-       xfs_bmbt_irec_t *gotp,          /* out: extent entry found */
-       xfs_bmbt_irec_t *prevp)         /* out: previous extent entry found */
-{
-       xfs_ifork_t     *ifp;           /* inode fork pointer */
-       xfs_bmbt_rec_host_t  *ep;            /* extent record pointer */
-
-       XFS_STATS_INC(ip->i_mount, xs_look_exlist);
-       ifp = XFS_IFORK_PTR(ip, fork);
-
-       ep = xfs_bmap_search_multi_extents(ifp, bno, eofp, lastxp, gotp, prevp);
-
-       if (unlikely(!(gotp->br_startblock) && (*lastxp != NULLEXTNUM) &&
-                    !(XFS_IS_REALTIME_INODE(ip) && fork == XFS_DATA_FORK))) {
-               xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO,
-                               "Access to block zero in inode %llu "
-                               "start_block: %llx start_off: %llx "
-                               "blkcnt: %llx extent-state: %x lastx: %x",
-                       (unsigned long long)ip->i_ino,
-                       (unsigned long long)gotp->br_startblock,
-                       (unsigned long long)gotp->br_startoff,
-                       (unsigned long long)gotp->br_blockcount,
-                       gotp->br_state, *lastxp);
-               *lastxp = NULLEXTNUM;
-               *eofp = 1;
-               return NULL;
-       }
-       return ep;
-}
-
 /*
  * Returns the file-relative block number of the first unused block(s)
  * in the file with at least "len" logically contiguous blocks free.
@@ -1497,7 +1420,7 @@ xfs_bmap_first_unused(
            (error = xfs_iread_extents(tp, ip, whichfork)))
                return error;
        lowest = *first_unused;
-       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+       nextents = xfs_iext_count(ifp);
        for (idx = 0, lastaddr = 0, max = lowest; idx < nextents; idx++) {
                xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, idx);
                off = xfs_bmbt_get_startoff(ep);
@@ -1523,44 +1446,44 @@ xfs_bmap_first_unused(
  */
 int                                            /* error */
 xfs_bmap_last_before(
-       xfs_trans_t     *tp,                    /* transaction pointer */
-       xfs_inode_t     *ip,                    /* incore inode */
-       xfs_fileoff_t   *last_block,            /* last block */
-       int             whichfork)              /* data or attr fork */
+       struct xfs_trans        *tp,            /* transaction pointer */
+       struct xfs_inode        *ip,            /* incore inode */
+       xfs_fileoff_t           *last_block,    /* last block */
+       int                     whichfork)      /* data or attr fork */
 {
-       xfs_fileoff_t   bno;                    /* input file offset */
-       int             eof;                    /* hit end of file */
-       xfs_bmbt_rec_host_t *ep;                /* pointer to last extent */
-       int             error;                  /* error return value */
-       xfs_bmbt_irec_t got;                    /* current extent value */
-       xfs_ifork_t     *ifp;                   /* inode fork pointer */
-       xfs_extnum_t    lastx;                  /* last extent used */
-       xfs_bmbt_irec_t prev;                   /* previous extent value */
+       struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
+       struct xfs_bmbt_irec    got;
+       xfs_extnum_t            idx;
+       int                     error;
 
-       if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
-           XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
-           XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL)
-              return -EIO;
-       if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
+       switch (XFS_IFORK_FORMAT(ip, whichfork)) {
+       case XFS_DINODE_FMT_LOCAL:
                *last_block = 0;
                return 0;
+       case XFS_DINODE_FMT_BTREE:
+       case XFS_DINODE_FMT_EXTENTS:
+               break;
+       default:
+               return -EIO;
        }
-       ifp = XFS_IFORK_PTR(ip, whichfork);
-       if (!(ifp->if_flags & XFS_IFEXTENTS) &&
-           (error = xfs_iread_extents(tp, ip, whichfork)))
-               return error;
-       bno = *last_block - 1;
-       ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
-               &prev);
-       if (eof || xfs_bmbt_get_startoff(ep) > bno) {
-               if (prev.br_startoff == NULLFILEOFF)
-                       *last_block = 0;
-               else
-                       *last_block = prev.br_startoff + prev.br_blockcount;
+
+       if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+               error = xfs_iread_extents(tp, ip, whichfork);
+               if (error)
+                       return error;
        }
-       /*
-        * Otherwise *last_block is already the right answer.
-        */
+
+       if (xfs_iext_lookup_extent(ip, ifp, *last_block - 1, &idx, &got)) {
+               if (got.br_startoff <= *last_block - 1)
+                       return 0;
+       }
+
+       if (xfs_iext_get_extent(ifp, idx - 1, &got)) {
+               *last_block = got.br_startoff + got.br_blockcount;
+               return 0;
+       }
+
+       *last_block = 0;
        return 0;
 }
 
@@ -1582,7 +1505,7 @@ xfs_bmap_last_extent(
                        return error;
        }
 
-       nextents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
+       nextents = xfs_iext_count(ifp);
        if (nextents == 0) {
                *is_empty = 1;
                return 0;
@@ -1735,7 +1658,7 @@ xfs_bmap_add_extent_delay_real(
                                                &bma->ip->i_d.di_nextents);
 
        ASSERT(bma->idx >= 0);
-       ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
+       ASSERT(bma->idx <= xfs_iext_count(ifp));
        ASSERT(!isnullstartblock(new->br_startblock));
        ASSERT(!bma->cur ||
               (bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
@@ -1794,7 +1717,7 @@ xfs_bmap_add_extent_delay_real(
         * Don't set contiguous if the combined extent would be too large.
         * Also check for all-three-contiguous being too large.
         */
-       if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
+       if (bma->idx < xfs_iext_count(ifp) - 1) {
                state |= BMAP_RIGHT_VALID;
                xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx + 1), &RIGHT);
 
@@ -2300,7 +2223,7 @@ xfs_bmap_add_extent_unwritten_real(
        ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
 
        ASSERT(*idx >= 0);
-       ASSERT(*idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
+       ASSERT(*idx <= xfs_iext_count(ifp));
        ASSERT(!isnullstartblock(new->br_startblock));
 
        XFS_STATS_INC(mp, xs_add_exlist);
@@ -2356,7 +2279,7 @@ xfs_bmap_add_extent_unwritten_real(
         * Don't set contiguous if the combined extent would be too large.
         * Also check for all-three-contiguous being too large.
         */
-       if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
+       if (*idx < xfs_iext_count(&ip->i_df) - 1) {
                state |= BMAP_RIGHT_VALID;
                xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT);
                if (isnullstartblock(RIGHT.br_startblock))
@@ -2836,7 +2759,7 @@ xfs_bmap_add_extent_hole_delay(
         * Check and set flags if the current (right) segment exists.
         * If it doesn't exist, we're converting the hole at end-of-file.
         */
-       if (*idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
+       if (*idx < xfs_iext_count(ifp)) {
                state |= BMAP_RIGHT_VALID;
                xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right);
 
@@ -2966,7 +2889,7 @@ xfs_bmap_add_extent_hole_real(
        ifp = XFS_IFORK_PTR(bma->ip, whichfork);
 
        ASSERT(bma->idx >= 0);
-       ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
+       ASSERT(bma->idx <= xfs_iext_count(ifp));
        ASSERT(!isnullstartblock(new->br_startblock));
        ASSERT(!bma->cur ||
               !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
@@ -2992,7 +2915,7 @@ xfs_bmap_add_extent_hole_real(
         * Check and set flags if this segment has a current value.
         * Not true if we're inserting into the "hole" at eof.
         */
-       if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
+       if (bma->idx < xfs_iext_count(ifp)) {
                state |= BMAP_RIGHT_VALID;
                xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &right);
                if (isnullstartblock(right.br_startblock))
@@ -3974,9 +3897,6 @@ xfs_bmap_remap_alloc(
         * allocating, so skip that check by pretending to be freeing.
         */
        error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
-       if (error)
-               goto error0;
-error0:
        xfs_perag_put(args.pag);
        if (error)
                trace_xfs_bmap_remap_alloc_error(ap->ip, error, _RET_IP_);
@@ -3999,6 +3919,39 @@ xfs_bmap_alloc(
        return xfs_bmap_btalloc(ap);
 }
 
+/* Trim extent to fit a logical block range. */
+void
+xfs_trim_extent(
+       struct xfs_bmbt_irec    *irec,
+       xfs_fileoff_t           bno,
+       xfs_filblks_t           len)
+{
+       xfs_fileoff_t           distance;
+       xfs_fileoff_t           end = bno + len;
+
+       if (irec->br_startoff + irec->br_blockcount <= bno ||
+           irec->br_startoff >= end) {
+               irec->br_blockcount = 0;
+               return;
+       }
+
+       if (irec->br_startoff < bno) {
+               distance = bno - irec->br_startoff;
+               if (isnullstartblock(irec->br_startblock))
+                       irec->br_startblock = DELAYSTARTBLOCK;
+               if (irec->br_startblock != DELAYSTARTBLOCK &&
+                   irec->br_startblock != HOLESTARTBLOCK)
+                       irec->br_startblock += distance;
+               irec->br_startoff += distance;
+               irec->br_blockcount -= distance;
+       }
+
+       if (end < irec->br_startoff + irec->br_blockcount) {
+               distance = irec->br_startoff + irec->br_blockcount - end;
+               irec->br_blockcount -= distance;
+       }
+}
+
 /*
  * Trim the returned map to the required bounds
  */
@@ -4115,12 +4068,11 @@ xfs_bmapi_read(
        struct xfs_mount        *mp = ip->i_mount;
        struct xfs_ifork        *ifp;
        struct xfs_bmbt_irec    got;
-       struct xfs_bmbt_irec    prev;
        xfs_fileoff_t           obno;
        xfs_fileoff_t           end;
-       xfs_extnum_t            lastx;
+       xfs_extnum_t            idx;
        int                     error;
-       int                     eof;
+       bool                    eof = false;
        int                     n = 0;
        int                     whichfork = xfs_bmapi_whichfork(flags);
 
@@ -4160,7 +4112,8 @@ xfs_bmapi_read(
                        return error;
        }
 
-       xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, &prev);
+       if (!xfs_iext_lookup_extent(ip, ifp, bno, &idx, &got))
+               eof = true;
        end = bno + len;
        obno = bno;
 
@@ -4191,10 +4144,8 @@ xfs_bmapi_read(
                        break;
 
                /* Else go on to the next record. */
-               if (++lastx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t))
-                       xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx), &got);
-               else
-                       eof = 1;
+               if (!xfs_iext_get_extent(ifp, ++idx, &got))
+                       eof = true;
        }
        *nmap = n;
        return 0;
@@ -4204,10 +4155,10 @@ int
 xfs_bmapi_reserve_delalloc(
        struct xfs_inode        *ip,
        int                     whichfork,
-       xfs_fileoff_t           aoff,
+       xfs_fileoff_t           off,
        xfs_filblks_t           len,
+       xfs_filblks_t           prealloc,
        struct xfs_bmbt_irec    *got,
-       struct xfs_bmbt_irec    *prev,
        xfs_extnum_t            *lastx,
        int                     eof)
 {
@@ -4218,10 +4169,17 @@ xfs_bmapi_reserve_delalloc(
        char                    rt = XFS_IS_REALTIME_INODE(ip);
        xfs_extlen_t            extsz;
        int                     error;
+       xfs_fileoff_t           aoff = off;
 
-       alen = XFS_FILBLKS_MIN(len, MAXEXTLEN);
+       /*
+        * Cap the alloc length. Keep track of prealloc so we know whether to
+        * tag the inode before we return.
+        */
+       alen = XFS_FILBLKS_MIN(len + prealloc, MAXEXTLEN);
        if (!eof)
                alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff);
+       if (prealloc && alen >= len)
+               prealloc = alen - len;
 
        /* Figure out the extent size, adjust alen */
        if (whichfork == XFS_COW_FORK)
@@ -4229,7 +4187,12 @@ xfs_bmapi_reserve_delalloc(
        else
                extsz = xfs_get_extsz_hint(ip);
        if (extsz) {
-               error = xfs_bmap_extsize_align(mp, got, prev, extsz, rt, eof,
+               struct xfs_bmbt_irec    prev;
+
+               if (!xfs_iext_get_extent(ifp, *lastx - 1, &prev))
+                       prev.br_startoff = NULLFILEOFF;
+
+               error = xfs_bmap_extsize_align(mp, got, &prev, extsz, rt, eof,
                                               1, 0, &aoff, &alen);
                ASSERT(!error);
        }
@@ -4282,6 +4245,16 @@ xfs_bmapi_reserve_delalloc(
         */
        xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got);
 
+       /*
+        * Tag the inode if blocks were preallocated. Note that COW fork
+        * preallocation can occur at the start or end of the extent, even when
+        * prealloc == 0, so we must also check the aligned offset and length.
+        */
+       if (whichfork == XFS_DATA_FORK && prealloc)
+               xfs_inode_set_eofblocks_tag(ip);
+       if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len))
+               xfs_inode_set_cowblocks_tag(ip);
+
        ASSERT(got->br_startoff <= aoff);
        ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen);
        ASSERT(isnullstartblock(got->br_startblock));
@@ -4319,7 +4292,7 @@ xfs_bmapi_allocate(
        if (bma->wasdel) {
                bma->length = (xfs_extlen_t)bma->got.br_blockcount;
                bma->offset = bma->got.br_startoff;
-               if (bma->idx != NULLEXTNUM && bma->idx) {
+               if (bma->idx) {
                        xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1),
                                         &bma->prev);
                }
@@ -4533,7 +4506,7 @@ xfs_bmapi_write(
        struct xfs_ifork        *ifp;
        struct xfs_bmalloca     bma = { NULL }; /* args for xfs_bmap_alloc */
        xfs_fileoff_t           end;            /* end of mapped file region */
-       int                     eof;            /* after the end of extents */
+       bool                    eof = false;    /* after the end of extents */
        int                     error;          /* error return */
        int                     n;              /* current extent index */
        xfs_fileoff_t           obno;           /* old block number (offset) */
@@ -4611,12 +4584,14 @@ xfs_bmapi_write(
                        goto error0;
        }
 
-       xfs_bmap_search_extents(ip, bno, whichfork, &eof, &bma.idx, &bma.got,
-                               &bma.prev);
        n = 0;
        end = bno + len;
        obno = bno;
 
+       if (!xfs_iext_lookup_extent(ip, ifp, bno, &bma.idx, &bma.got))
+               eof = true;
+       if (!xfs_iext_get_extent(ifp, bma.idx - 1, &bma.prev))
+               bma.prev.br_startoff = NULLFILEOFF;
        bma.tp = tp;
        bma.ip = ip;
        bma.total = total;
@@ -4703,11 +4678,8 @@ xfs_bmapi_write(
 
                /* Else go on to the next record. */
                bma.prev = bma.got;
-               if (++bma.idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) {
-                       xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma.idx),
-                                        &bma.got);
-               } else
-                       eof = 1;
+               if (!xfs_iext_get_extent(ifp, ++bma.idx, &bma.got))
+                       eof = true;
        }
        *nmap = n;
 
@@ -4829,6 +4801,219 @@ xfs_bmap_split_indlen(
        return stolen;
 }
 
+int
+xfs_bmap_del_extent_delay(
+       struct xfs_inode        *ip,
+       int                     whichfork,
+       xfs_extnum_t            *idx,
+       struct xfs_bmbt_irec    *got,
+       struct xfs_bmbt_irec    *del)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
+       struct xfs_bmbt_irec    new;
+       int64_t                 da_old, da_new, da_diff = 0;
+       xfs_fileoff_t           del_endoff, got_endoff;
+       xfs_filblks_t           got_indlen, new_indlen, stolen;
+       int                     error = 0, state = 0;
+       bool                    isrt;
+
+       XFS_STATS_INC(mp, xs_del_exlist);
+
+       isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
+       del_endoff = del->br_startoff + del->br_blockcount;
+       got_endoff = got->br_startoff + got->br_blockcount;
+       da_old = startblockval(got->br_startblock);
+       da_new = 0;
+
+       ASSERT(*idx >= 0);
+       ASSERT(*idx <= xfs_iext_count(ifp));
+       ASSERT(del->br_blockcount > 0);
+       ASSERT(got->br_startoff <= del->br_startoff);
+       ASSERT(got_endoff >= del_endoff);
+
+       if (isrt) {
+               int64_t rtexts = XFS_FSB_TO_B(mp, del->br_blockcount);
+
+               do_div(rtexts, mp->m_sb.sb_rextsize);
+               xfs_mod_frextents(mp, rtexts);
+       }
+
+       /*
+        * Update the inode delalloc counter now and wait to update the
+        * sb counters as we might have to borrow some blocks for the
+        * indirect block accounting.
+        */
+       xfs_trans_reserve_quota_nblks(NULL, ip, -((long)del->br_blockcount), 0,
+                       isrt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
+       ip->i_delayed_blks -= del->br_blockcount;
+
+       if (whichfork == XFS_COW_FORK)
+               state |= BMAP_COWFORK;
+
+       if (got->br_startoff == del->br_startoff)
+               state |= BMAP_LEFT_CONTIG;
+       if (got_endoff == del_endoff)
+               state |= BMAP_RIGHT_CONTIG;
+
+       switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
+       case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
+               /*
+                * Matches the whole extent.  Delete the entry.
+                */
+               xfs_iext_remove(ip, *idx, 1, state);
+               --*idx;
+               break;
+       case BMAP_LEFT_CONTIG:
+               /*
+                * Deleting the first part of the extent.
+                */
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+               got->br_startoff = del_endoff;
+               got->br_blockcount -= del->br_blockcount;
+               da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
+                               got->br_blockcount), da_old);
+               got->br_startblock = nullstartblock((int)da_new);
+               xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+               break;
+       case BMAP_RIGHT_CONTIG:
+               /*
+                * Deleting the last part of the extent.
+                */
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+               got->br_blockcount = got->br_blockcount - del->br_blockcount;
+               da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
+                               got->br_blockcount), da_old);
+               got->br_startblock = nullstartblock((int)da_new);
+               xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+               break;
+       case 0:
+               /*
+                * Deleting the middle of the extent.
+                *
+                * Distribute the original indlen reservation across the two new
+                * extents.  Steal blocks from the deleted extent if necessary.
+                * Stealing blocks simply fudges the fdblocks accounting below.
+                * Warn if either of the new indlen reservations is zero as this
+                * can lead to delalloc problems.
+                */
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+
+               got->br_blockcount = del->br_startoff - got->br_startoff;
+               got_indlen = xfs_bmap_worst_indlen(ip, got->br_blockcount);
+
+               new.br_blockcount = got_endoff - del_endoff;
+               new_indlen = xfs_bmap_worst_indlen(ip, new.br_blockcount);
+
+               WARN_ON_ONCE(!got_indlen || !new_indlen);
+               stolen = xfs_bmap_split_indlen(da_old, &got_indlen, &new_indlen,
+                                                      del->br_blockcount);
+
+               got->br_startblock = nullstartblock((int)got_indlen);
+               xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
+               trace_xfs_bmap_post_update(ip, *idx, 0, _THIS_IP_);
+
+               new.br_startoff = del_endoff;
+               new.br_state = got->br_state;
+               new.br_startblock = nullstartblock((int)new_indlen);
+
+               ++*idx;
+               xfs_iext_insert(ip, *idx, 1, &new, state);
+
+               da_new = got_indlen + new_indlen - stolen;
+               del->br_blockcount -= stolen;
+               break;
+       }
+
+       ASSERT(da_old >= da_new);
+       da_diff = da_old - da_new;
+       if (!isrt)
+               da_diff += del->br_blockcount;
+       if (da_diff)
+               xfs_mod_fdblocks(mp, da_diff, false);
+       return error;
+}
+
+void
+xfs_bmap_del_extent_cow(
+       struct xfs_inode        *ip,
+       xfs_extnum_t            *idx,
+       struct xfs_bmbt_irec    *got,
+       struct xfs_bmbt_irec    *del)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+       struct xfs_bmbt_irec    new;
+       xfs_fileoff_t           del_endoff, got_endoff;
+       int                     state = BMAP_COWFORK;
+
+       XFS_STATS_INC(mp, xs_del_exlist);
+
+       del_endoff = del->br_startoff + del->br_blockcount;
+       got_endoff = got->br_startoff + got->br_blockcount;
+
+       ASSERT(*idx >= 0);
+       ASSERT(*idx <= xfs_iext_count(ifp));
+       ASSERT(del->br_blockcount > 0);
+       ASSERT(got->br_startoff <= del->br_startoff);
+       ASSERT(got_endoff >= del_endoff);
+       ASSERT(!isnullstartblock(got->br_startblock));
+
+       if (got->br_startoff == del->br_startoff)
+               state |= BMAP_LEFT_CONTIG;
+       if (got_endoff == del_endoff)
+               state |= BMAP_RIGHT_CONTIG;
+
+       switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
+       case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
+               /*
+                * Matches the whole extent.  Delete the entry.
+                */
+               xfs_iext_remove(ip, *idx, 1, state);
+               --*idx;
+               break;
+       case BMAP_LEFT_CONTIG:
+               /*
+                * Deleting the first part of the extent.
+                */
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+               got->br_startoff = del_endoff;
+               got->br_blockcount -= del->br_blockcount;
+               got->br_startblock = del->br_startblock + del->br_blockcount;
+               xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+               break;
+       case BMAP_RIGHT_CONTIG:
+               /*
+                * Deleting the last part of the extent.
+                */
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+               got->br_blockcount -= del->br_blockcount;
+               xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+               break;
+       case 0:
+               /*
+                * Deleting the middle of the extent.
+                */
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+               got->br_blockcount = del->br_startoff - got->br_startoff;
+               xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+
+               new.br_startoff = del_endoff;
+               new.br_blockcount = got_endoff - del_endoff;
+               new.br_state = got->br_state;
+               new.br_startblock = del->br_startblock + del->br_blockcount;
+
+               ++*idx;
+               xfs_iext_insert(ip, *idx, 1, &new, state);
+               break;
+       }
+}
+
 /*
  * Called by xfs_bmapi to update file extent records and the btree
  * after removing space (or undoing a delayed allocation).
@@ -4876,8 +5061,7 @@ xfs_bmap_del_extent(
                state |= BMAP_COWFORK;
 
        ifp = XFS_IFORK_PTR(ip, whichfork);
-       ASSERT((*idx >= 0) && (*idx < ifp->if_bytes /
-               (uint)sizeof(xfs_bmbt_rec_t)));
+       ASSERT((*idx >= 0) && (*idx < xfs_iext_count(ifp)));
        ASSERT(del->br_blockcount > 0);
        ep = xfs_iext_get_ext(ifp, *idx);
        xfs_bmbt_get_all(ep, &got);
@@ -5171,175 +5355,6 @@ done:
        return error;
 }
 
-/* Remove an extent from the CoW fork.  Similar to xfs_bmap_del_extent. */
-int
-xfs_bunmapi_cow(
-       struct xfs_inode                *ip,
-       struct xfs_bmbt_irec            *del)
-{
-       xfs_filblks_t                   da_new;
-       xfs_filblks_t                   da_old;
-       xfs_fsblock_t                   del_endblock = 0;
-       xfs_fileoff_t                   del_endoff;
-       int                             delay;
-       struct xfs_bmbt_rec_host        *ep;
-       int                             error;
-       struct xfs_bmbt_irec            got;
-       xfs_fileoff_t                   got_endoff;
-       struct xfs_ifork                *ifp;
-       struct xfs_mount                *mp;
-       xfs_filblks_t                   nblks;
-       struct xfs_bmbt_irec            new;
-       /* REFERENCED */
-       uint                            qfield;
-       xfs_filblks_t                   temp;
-       xfs_filblks_t                   temp2;
-       int                             state = BMAP_COWFORK;
-       int                             eof;
-       xfs_extnum_t                    eidx;
-
-       mp = ip->i_mount;
-       XFS_STATS_INC(mp, xs_del_exlist);
-
-       ep = xfs_bmap_search_extents(ip, del->br_startoff, XFS_COW_FORK, &eof,
-                       &eidx, &got, &new);
-
-       ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); ifp = ifp;
-       ASSERT((eidx >= 0) && (eidx < ifp->if_bytes /
-               (uint)sizeof(xfs_bmbt_rec_t)));
-       ASSERT(del->br_blockcount > 0);
-       ASSERT(got.br_startoff <= del->br_startoff);
-       del_endoff = del->br_startoff + del->br_blockcount;
-       got_endoff = got.br_startoff + got.br_blockcount;
-       ASSERT(got_endoff >= del_endoff);
-       delay = isnullstartblock(got.br_startblock);
-       ASSERT(isnullstartblock(del->br_startblock) == delay);
-       qfield = 0;
-       error = 0;
-       /*
-        * If deleting a real allocation, must free up the disk space.
-        */
-       if (!delay) {
-               nblks = del->br_blockcount;
-               qfield = XFS_TRANS_DQ_BCOUNT;
-               /*
-                * Set up del_endblock and cur for later.
-                */
-               del_endblock = del->br_startblock + del->br_blockcount;
-               da_old = da_new = 0;
-       } else {
-               da_old = startblockval(got.br_startblock);
-               da_new = 0;
-               nblks = 0;
-       }
-       qfield = qfield;
-       nblks = nblks;
-
-       /*
-        * Set flag value to use in switch statement.
-        * Left-contig is 2, right-contig is 1.
-        */
-       switch (((got.br_startoff == del->br_startoff) << 1) |
-               (got_endoff == del_endoff)) {
-       case 3:
-               /*
-                * Matches the whole extent.  Delete the entry.
-                */
-               xfs_iext_remove(ip, eidx, 1, BMAP_COWFORK);
-               --eidx;
-               break;
-
-       case 2:
-               /*
-                * Deleting the first part of the extent.
-                */
-               trace_xfs_bmap_pre_update(ip, eidx, state, _THIS_IP_);
-               xfs_bmbt_set_startoff(ep, del_endoff);
-               temp = got.br_blockcount - del->br_blockcount;
-               xfs_bmbt_set_blockcount(ep, temp);
-               if (delay) {
-                       temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
-                               da_old);
-                       xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
-                       trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_);
-                       da_new = temp;
-                       break;
-               }
-               xfs_bmbt_set_startblock(ep, del_endblock);
-               trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_);
-               break;
-
-       case 1:
-               /*
-                * Deleting the last part of the extent.
-                */
-               temp = got.br_blockcount - del->br_blockcount;
-               trace_xfs_bmap_pre_update(ip, eidx, state, _THIS_IP_);
-               xfs_bmbt_set_blockcount(ep, temp);
-               if (delay) {
-                       temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
-                               da_old);
-                       xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
-                       trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_);
-                       da_new = temp;
-                       break;
-               }
-               trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_);
-               break;
-
-       case 0:
-               /*
-                * Deleting the middle of the extent.
-                */
-               temp = del->br_startoff - got.br_startoff;
-               trace_xfs_bmap_pre_update(ip, eidx, state, _THIS_IP_);
-               xfs_bmbt_set_blockcount(ep, temp);
-               new.br_startoff = del_endoff;
-               temp2 = got_endoff - del_endoff;
-               new.br_blockcount = temp2;
-               new.br_state = got.br_state;
-               if (!delay) {
-                       new.br_startblock = del_endblock;
-               } else {
-                       temp = xfs_bmap_worst_indlen(ip, temp);
-                       xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
-                       temp2 = xfs_bmap_worst_indlen(ip, temp2);
-                       new.br_startblock = nullstartblock((int)temp2);
-                       da_new = temp + temp2;
-                       while (da_new > da_old) {
-                               if (temp) {
-                                       temp--;
-                                       da_new--;
-                                       xfs_bmbt_set_startblock(ep,
-                                               nullstartblock((int)temp));
-                               }
-                               if (da_new == da_old)
-                                       break;
-                               if (temp2) {
-                                       temp2--;
-                                       da_new--;
-                                       new.br_startblock =
-                                               nullstartblock((int)temp2);
-                               }
-                       }
-               }
-               trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_);
-               xfs_iext_insert(ip, eidx + 1, 1, &new, state);
-               ++eidx;
-               break;
-       }
-
-       /*
-        * Account for change in delayed indirect blocks.
-        * Nothing to do for disk quota accounting here.
-        */
-       ASSERT(da_old >= da_new);
-       if (da_old > da_new)
-               xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new), false);
-
-       return error;
-}
-
 /*
  * Unmap (remove) blocks from a file.
  * If nexts is nonzero then the number of extents to remove is limited to
@@ -5360,8 +5375,6 @@ __xfs_bunmapi(
 {
        xfs_btree_cur_t         *cur;           /* bmap btree cursor */
        xfs_bmbt_irec_t         del;            /* extent being deleted */
-       int                     eof;            /* is deleting at eof */
-       xfs_bmbt_rec_host_t     *ep;            /* extent record pointer */
        int                     error;          /* error return value */
        xfs_extnum_t            extno;          /* extent number in list */
        xfs_bmbt_irec_t         got;            /* current extent record */
@@ -5371,8 +5384,6 @@ __xfs_bunmapi(
        int                     logflags;       /* transaction logging flags */
        xfs_extlen_t            mod;            /* rt extent offset */
        xfs_mount_t             *mp;            /* mount structure */
-       xfs_extnum_t            nextents;       /* number of file extents */
-       xfs_bmbt_irec_t         prev;           /* previous extent record */
        xfs_fileoff_t           start;          /* first file offset deleted */
        int                     tmp_logflags;   /* partial logging flags */
        int                     wasdel;         /* was a delayed alloc extent */
@@ -5403,8 +5414,7 @@ __xfs_bunmapi(
        if (!(ifp->if_flags & XFS_IFEXTENTS) &&
            (error = xfs_iread_extents(tp, ip, whichfork)))
                return error;
-       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-       if (nextents == 0) {
+       if (xfs_iext_count(ifp) == 0) {
                *rlen = 0;
                return 0;
        }
@@ -5412,18 +5422,17 @@ __xfs_bunmapi(
        isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
        start = bno;
        bno = start + len - 1;
-       ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
-               &prev);
 
        /*
         * Check to see if the given block number is past the end of the
         * file, back up to the last block if so...
         */
-       if (eof) {
-               ep = xfs_iext_get_ext(ifp, --lastx);
-               xfs_bmbt_get_all(ep, &got);
+       if (!xfs_iext_lookup_extent(ip, ifp, bno, &lastx, &got)) {
+               ASSERT(lastx > 0);
+               xfs_iext_get_extent(ifp, --lastx, &got);
                bno = got.br_startoff + got.br_blockcount - 1;
        }
+
        logflags = 0;
        if (ifp->if_flags & XFS_IFBROOT) {
                ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
@@ -5454,8 +5463,7 @@ __xfs_bunmapi(
                if (got.br_startoff > bno) {
                        if (--lastx < 0)
                                break;
-                       ep = xfs_iext_get_ext(ifp, lastx);
-                       xfs_bmbt_get_all(ep, &got);
+                       xfs_iext_get_extent(ifp, lastx, &got);
                }
                /*
                 * Is the last block of this extent before the range
@@ -5469,7 +5477,6 @@ __xfs_bunmapi(
                 * Then deal with the (possibly delayed) allocated space
                 * we found.
                 */
-               ASSERT(ep != NULL);
                del = got;
                wasdel = isnullstartblock(del.br_startblock);
                if (got.br_startoff < start) {
@@ -5550,15 +5557,12 @@ __xfs_bunmapi(
                                 */
                                ASSERT(bno >= del.br_blockcount);
                                bno -= del.br_blockcount;
-                               if (got.br_startoff > bno) {
-                                       if (--lastx >= 0) {
-                                               ep = xfs_iext_get_ext(ifp,
-                                                                     lastx);
-                                               xfs_bmbt_get_all(ep, &got);
-                                       }
-                               }
+                               if (got.br_startoff > bno && --lastx >= 0)
+                                       xfs_iext_get_extent(ifp, lastx, &got);
                                continue;
                        } else if (del.br_state == XFS_EXT_UNWRITTEN) {
+                               struct xfs_bmbt_irec    prev;
+
                                /*
                                 * This one is already unwritten.
                                 * It must have a written left neighbor.
@@ -5566,8 +5570,7 @@ __xfs_bunmapi(
                                 * try again.
                                 */
                                ASSERT(lastx > 0);
-                               xfs_bmbt_get_all(xfs_iext_get_ext(ifp,
-                                               lastx - 1), &prev);
+                               xfs_iext_get_extent(ifp, lastx - 1, &prev);
                                ASSERT(prev.br_state == XFS_EXT_NORM);
                                ASSERT(!isnullstartblock(prev.br_startblock));
                                ASSERT(del.br_startblock ==
@@ -5665,13 +5668,9 @@ nodelete:
                 */
                if (bno != (xfs_fileoff_t)-1 && bno >= start) {
                        if (lastx >= 0) {
-                               ep = xfs_iext_get_ext(ifp, lastx);
-                               if (xfs_bmbt_get_startoff(ep) > bno) {
-                                       if (--lastx >= 0)
-                                               ep = xfs_iext_get_ext(ifp,
-                                                                     lastx);
-                               }
-                               xfs_bmbt_get_all(ep, &got);
+                               xfs_iext_get_extent(ifp, lastx, &got);
+                               if (got.br_startoff > bno && --lastx >= 0)
+                                       xfs_iext_get_extent(ifp, lastx, &got);
                        }
                        extno++;
                }
@@ -5889,7 +5888,7 @@ xfs_bmse_shift_one(
 
        mp = ip->i_mount;
        ifp = XFS_IFORK_PTR(ip, whichfork);
-       total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
+       total_extents = xfs_iext_count(ifp);
 
        xfs_bmbt_get_all(gotp, &got);
 
@@ -6066,7 +6065,7 @@ xfs_bmap_shift_extents(
         * are collapsing out, so we cannot use the count of real extents here.
         * Instead we have to calculate it from the incore fork.
         */
-       total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
+       total_extents = xfs_iext_count(ifp);
        if (total_extents == 0) {
                *done = 1;
                goto del_cursor;
@@ -6126,7 +6125,7 @@ xfs_bmap_shift_extents(
                 * count can change. Update the total and grade the next record.
                 */
                if (direction == SHIFT_LEFT) {
-                       total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
+                       total_extents = xfs_iext_count(ifp);
                        stop_extent = total_extents;
                }
 
index f97db7132564569dc7f2aefbf05f27da719c8e10..cecd094404cc53d821567be873bafdbba98880fa 100644 (file)
@@ -190,6 +190,8 @@ void        xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
 #define        XFS_BMAP_TRACE_EXLIST(ip,c,w)
 #endif
 
+void   xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno,
+               xfs_filblks_t len);
 int    xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd);
 void   xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork);
 void   xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
@@ -221,7 +223,11 @@ int        xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,
                xfs_fileoff_t bno, xfs_filblks_t len, int flags,
                xfs_extnum_t nexts, xfs_fsblock_t *firstblock,
                struct xfs_defer_ops *dfops, int *done);
-int    xfs_bunmapi_cow(struct xfs_inode *ip, struct xfs_bmbt_irec *del);
+int    xfs_bmap_del_extent_delay(struct xfs_inode *ip, int whichfork,
+               xfs_extnum_t *idx, struct xfs_bmbt_irec *got,
+               struct xfs_bmbt_irec *del);
+void   xfs_bmap_del_extent_cow(struct xfs_inode *ip, xfs_extnum_t *idx,
+               struct xfs_bmbt_irec *got, struct xfs_bmbt_irec *del);
 int    xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx,
                xfs_extnum_t num);
 uint   xfs_default_attroffset(struct xfs_inode *ip);
@@ -231,14 +237,9 @@ int        xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip,
                struct xfs_defer_ops *dfops, enum shift_direction direction,
                int num_exts);
 int    xfs_bmap_split_extent(struct xfs_inode *ip, xfs_fileoff_t split_offset);
-struct xfs_bmbt_rec_host *
-       xfs_bmap_search_extents(struct xfs_inode *ip, xfs_fileoff_t bno,
-               int fork, int *eofp, xfs_extnum_t *lastxp,
-               struct xfs_bmbt_irec *gotp, struct xfs_bmbt_irec *prevp);
 int    xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, int whichfork,
-               xfs_fileoff_t aoff, xfs_filblks_t len,
-               struct xfs_bmbt_irec *got, struct xfs_bmbt_irec *prev,
-               xfs_extnum_t *lastx, int eof);
+               xfs_fileoff_t off, xfs_filblks_t len, xfs_filblks_t prealloc,
+               struct xfs_bmbt_irec *got, xfs_extnum_t *lastx, int eof);
 
 enum xfs_bmap_intent_type {
        XFS_BMAP_MAP = 1,
index 5c8e6f2ce44f461d343a98b6b49ad6b0b09a3b8b..0e80993c8a5914d3dfa1f861b2b459d7a513d67a 100644 (file)
@@ -4826,7 +4826,7 @@ xfs_btree_calc_size(
        return rval;
 }
 
-int
+static int
 xfs_btree_count_blocks_helper(
        struct xfs_btree_cur    *cur,
        int                     level,
index 613c5cf1943646764880ba3beeffb98667dc5268..5c2929f94bd3bf27411f860b57697b9d899312d1 100644 (file)
@@ -199,9 +199,9 @@ xfs_defer_intake_work(
        struct xfs_defer_pending        *dfp;
 
        list_for_each_entry(dfp, &dop->dop_intake, dfp_list) {
-               trace_xfs_defer_intake_work(tp->t_mountp, dfp);
                dfp->dfp_intent = dfp->dfp_type->create_intent(tp,
                                dfp->dfp_count);
+               trace_xfs_defer_intake_work(tp->t_mountp, dfp);
                list_sort(tp->t_mountp, &dfp->dfp_work,
                                dfp->dfp_type->diff_items);
                list_for_each(li, &dfp->dfp_work)
@@ -221,21 +221,14 @@ xfs_defer_trans_abort(
        struct xfs_defer_pending        *dfp;
 
        trace_xfs_defer_trans_abort(tp->t_mountp, dop);
-       /*
-        * If the transaction was committed, drop the intent reference
-        * since we're bailing out of here. The other reference is
-        * dropped when the intent hits the AIL.  If the transaction
-        * was not committed, the intent is freed by the intent item
-        * unlock handler on abort.
-        */
-       if (!dop->dop_committed)
-               return;
 
-       /* Abort intent items. */
+       /* Abort intent items that don't have a done item. */
        list_for_each_entry(dfp, &dop->dop_pending, dfp_list) {
                trace_xfs_defer_pending_abort(tp->t_mountp, dfp);
-               if (!dfp->dfp_done)
+               if (dfp->dfp_intent && !dfp->dfp_done) {
                        dfp->dfp_type->abort_intent(dfp->dfp_intent);
+                       dfp->dfp_intent = NULL;
+               }
        }
 
        /* Shut down FS. */
index 20a96dd5af7eb6d4ebbaf07a1f6a1e4b10e7f981..c58d72c220f58593cd05b90b3227f1ecb4f2d06f 100644 (file)
@@ -93,7 +93,7 @@ xfs_ascii_ci_compname(
        return result;
 }
 
-static struct xfs_nameops xfs_ascii_ci_nameops = {
+static const struct xfs_nameops xfs_ascii_ci_nameops = {
        .hashname       = xfs_ascii_ci_hashname,
        .compname       = xfs_ascii_ci_compname,
 };
index 3cc3cf7674746f279fcb0acf4eae27d49d089814..ac9a003dd29acf80d7c766788cb40d1f98d9132c 100644 (file)
@@ -191,8 +191,7 @@ xfs_dquot_buf_verify_crc(
        if (mp->m_quotainfo)
                ndquots = mp->m_quotainfo->qi_dqperchunk;
        else
-               ndquots = xfs_calc_dquots_per_chunk(
-                                       XFS_BB_TO_FSB(mp, bp->b_length));
+               ndquots = xfs_calc_dquots_per_chunk(bp->b_length);
 
        for (i = 0; i < ndquots; i++, d++) {
                if (!xfs_verify_cksum((char *)d, sizeof(struct xfs_dqblk),
index f6547fc5e016e75a130a738c3e4a1b3a087d7281..6b7579e7b60a228ee6e633221bc64952f4a93a41 100644 (file)
@@ -865,7 +865,6 @@ typedef struct xfs_timestamp {
  * padding field for v3 inodes.
  */
 #define        XFS_DINODE_MAGIC                0x494e  /* 'IN' */
-#define XFS_DINODE_GOOD_VERSION(v)     ((v) >= 1 && (v) <= 3)
 typedef struct xfs_dinode {
        __be16          di_magic;       /* inode magic # = XFS_DINODE_MAGIC */
        __be16          di_mode;        /* mode and type of file */
index 8de9a3a29589bd59c0684c8eab278db3edceba53..134424fac434fdd7fdd3cecf12d3007712b9734b 100644 (file)
@@ -57,6 +57,17 @@ xfs_inobp_check(
 }
 #endif
 
+bool
+xfs_dinode_good_version(
+       struct xfs_mount *mp,
+       __u8            version)
+{
+       if (xfs_sb_version_hascrc(&mp->m_sb))
+               return version == 3;
+
+       return version == 1 || version == 2;
+}
+
 /*
  * If we are doing readahead on an inode buffer, we might be in log recovery
  * reading an inode allocation buffer that hasn't yet been replayed, and hence
@@ -91,7 +102,7 @@ xfs_inode_buf_verify(
 
                dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog));
                di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
-                           XFS_DINODE_GOOD_VERSION(dip->di_version);
+                       xfs_dinode_good_version(mp, dip->di_version);
                if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
                                                XFS_ERRTAG_ITOBP_INOTOBP,
                                                XFS_RANDOM_ITOBP_INOTOBP))) {
index 62d9d4681c8c28a1294b575903f0720693bc6666..3cfe12a4f58ac8560e1cd92e529a85477ff5ee02 100644 (file)
@@ -74,6 +74,8 @@ void  xfs_inode_from_disk(struct xfs_inode *ip, struct xfs_dinode *from);
 void   xfs_log_dinode_to_disk(struct xfs_log_dinode *from,
                               struct xfs_dinode *to);
 
+bool   xfs_dinode_good_version(struct xfs_mount *mp, __u8 version);
+
 #if defined(DEBUG)
 void   xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
 #else
index 5dd56d3dbb3aeb41e7e46b921ca9052dd9066c41..222e103356c6d838742703315a5f558551faf51e 100644 (file)
@@ -775,6 +775,13 @@ xfs_idestroy_fork(
        }
 }
 
+/* Count number of incore extents based on if_bytes */
+xfs_extnum_t
+xfs_iext_count(struct xfs_ifork *ifp)
+{
+       return ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+}
+
 /*
  * Convert in-core extents to on-disk form
  *
@@ -803,7 +810,7 @@ xfs_iextents_copy(
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
        ASSERT(ifp->if_bytes > 0);
 
-       nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+       nrecs = xfs_iext_count(ifp);
        XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork);
        ASSERT(nrecs > 0);
 
@@ -941,7 +948,7 @@ xfs_iext_get_ext(
        xfs_extnum_t    idx)            /* index of target extent */
 {
        ASSERT(idx >= 0);
-       ASSERT(idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
+       ASSERT(idx < xfs_iext_count(ifp));
 
        if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) {
                return ifp->if_u1.if_ext_irec->er_extbuf;
@@ -1017,7 +1024,7 @@ xfs_iext_add(
        int             new_size;       /* size of extents after adding */
        xfs_extnum_t    nextents;       /* number of extents in file */
 
-       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+       nextents = xfs_iext_count(ifp);
        ASSERT((idx >= 0) && (idx <= nextents));
        byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t);
        new_size = ifp->if_bytes + byte_diff;
@@ -1241,7 +1248,7 @@ xfs_iext_remove(
        trace_xfs_iext_remove(ip, idx, state, _RET_IP_);
 
        ASSERT(ext_diff > 0);
-       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+       nextents = xfs_iext_count(ifp);
        new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t);
 
        if (new_size == 0) {
@@ -1270,7 +1277,7 @@ xfs_iext_remove_inline(
 
        ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
        ASSERT(idx < XFS_INLINE_EXTS);
-       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+       nextents = xfs_iext_count(ifp);
        ASSERT(((nextents - ext_diff) > 0) &&
                (nextents - ext_diff) < XFS_INLINE_EXTS);
 
@@ -1309,7 +1316,7 @@ xfs_iext_remove_direct(
        ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
        new_size = ifp->if_bytes -
                (ext_diff * sizeof(xfs_bmbt_rec_t));
-       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+       nextents = xfs_iext_count(ifp);
 
        if (new_size == 0) {
                xfs_iext_destroy(ifp);
@@ -1546,7 +1553,7 @@ xfs_iext_indirect_to_direct(
        int             size;           /* size of file extents */
 
        ASSERT(ifp->if_flags & XFS_IFEXTIREC);
-       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+       nextents = xfs_iext_count(ifp);
        ASSERT(nextents <= XFS_LINEAR_EXTS);
        size = nextents * sizeof(xfs_bmbt_rec_t);
 
@@ -1620,7 +1627,7 @@ xfs_iext_bno_to_ext(
        xfs_extnum_t    nextents;       /* number of file extents */
        xfs_fileoff_t   startoff = 0;   /* start offset of extent */
 
-       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+       nextents = xfs_iext_count(ifp);
        if (nextents == 0) {
                *idxp = 0;
                return NULL;
@@ -1733,8 +1740,8 @@ xfs_iext_idx_to_irec(
 
        ASSERT(ifp->if_flags & XFS_IFEXTIREC);
        ASSERT(page_idx >= 0);
-       ASSERT(page_idx <= ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
-       ASSERT(page_idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t) || realloc);
+       ASSERT(page_idx <= xfs_iext_count(ifp));
+       ASSERT(page_idx < xfs_iext_count(ifp) || realloc);
 
        nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
        erp_idx = 0;
@@ -1782,7 +1789,7 @@ xfs_iext_irec_init(
        xfs_extnum_t    nextents;       /* number of extents in file */
 
        ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
-       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+       nextents = xfs_iext_count(ifp);
        ASSERT(nextents <= XFS_LINEAR_EXTS);
 
        erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS);
@@ -1906,7 +1913,7 @@ xfs_iext_irec_compact(
 
        ASSERT(ifp->if_flags & XFS_IFEXTIREC);
        nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
-       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+       nextents = xfs_iext_count(ifp);
 
        if (nextents == 0) {
                xfs_iext_destroy(ifp);
@@ -1996,3 +2003,49 @@ xfs_ifork_init_cow(
        ip->i_cformat = XFS_DINODE_FMT_EXTENTS;
        ip->i_cnextents = 0;
 }
+
+/*
+ * Lookup the extent covering bno.
+ *
+ * If there is an extent covering bno return the extent index, and store the
+ * expanded extent structure in *gotp, and the extent index in *idx.
+ * If there is no extent covering bno, but there is an extent after it (e.g.
+ * it lies in a hole) return that extent in *gotp and its index in *idx
+ * instead.
+ * If bno is beyond the last extent return false, and return the index after
+ * the last valid index in *idxp.
+ */
+bool
+xfs_iext_lookup_extent(
+       struct xfs_inode        *ip,
+       struct xfs_ifork        *ifp,
+       xfs_fileoff_t           bno,
+       xfs_extnum_t            *idxp,
+       struct xfs_bmbt_irec    *gotp)
+{
+       struct xfs_bmbt_rec_host *ep;
+
+       XFS_STATS_INC(ip->i_mount, xs_look_exlist);
+
+       ep = xfs_iext_bno_to_ext(ifp, bno, idxp);
+       if (!ep)
+               return false;
+       xfs_bmbt_get_all(ep, gotp);
+       return true;
+}
+
+/*
+ * Return true if there is an extent at index idx, and return the expanded
+ * extent structure at idx in that case.  Else return false.
+ */
+bool
+xfs_iext_get_extent(
+       struct xfs_ifork        *ifp,
+       xfs_extnum_t            idx,
+       struct xfs_bmbt_irec    *gotp)
+{
+       if (idx < 0 || idx >= xfs_iext_count(ifp))
+               return false;
+       xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), gotp);
+       return true;
+}
index c9476f50e32d116109d1f71dad3895c5659496b8..7fb8365326d1a745583c4f133bc5a63668316b33 100644 (file)
@@ -152,6 +152,7 @@ void                xfs_init_local_fork(struct xfs_inode *, int, const void *, int);
 
 struct xfs_bmbt_rec_host *
                xfs_iext_get_ext(struct xfs_ifork *, xfs_extnum_t);
+xfs_extnum_t   xfs_iext_count(struct xfs_ifork *);
 void           xfs_iext_insert(struct xfs_inode *, xfs_extnum_t, xfs_extnum_t,
                                struct xfs_bmbt_irec *, int);
 void           xfs_iext_add(struct xfs_ifork *, xfs_extnum_t, int);
@@ -181,6 +182,12 @@ void               xfs_iext_irec_compact_pages(struct xfs_ifork *);
 void           xfs_iext_irec_compact_full(struct xfs_ifork *);
 void           xfs_iext_irec_update_extoffs(struct xfs_ifork *, int, int);
 
+bool           xfs_iext_lookup_extent(struct xfs_inode *ip,
+                       struct xfs_ifork *ifp, xfs_fileoff_t bno,
+                       xfs_extnum_t *idxp, struct xfs_bmbt_irec *gotp);
+bool           xfs_iext_get_extent(struct xfs_ifork *ifp, xfs_extnum_t idx,
+                       struct xfs_bmbt_irec *gotp);
+
 extern struct kmem_zone        *xfs_ifork_zone;
 
 extern void xfs_ifork_init_cow(struct xfs_inode *ip);
index 8d74870468c24bb5661a0afa0e7ab436f8b4ded0..f9a1076de911e49572d1153c42ff1072a1b2a184 100644 (file)
@@ -57,7 +57,6 @@ typedef __int64_t     xfs_sfiloff_t;  /* signed block number in a file */
 
 #define        NULLAGBLOCK     ((xfs_agblock_t)-1)
 #define        NULLAGNUMBER    ((xfs_agnumber_t)-1)
-#define        NULLEXTNUM      ((xfs_extnum_t)-1)
 
 #define NULLCOMMITLSN  ((xfs_lsn_t)-1)
 
index 561cf1456c6ca1ed07484e5daf455c6f40015959..ab266d66124da0381b9b5669eec5fe47d1e3d707 100644 (file)
@@ -777,7 +777,7 @@ xfs_map_cow(
 {
        struct xfs_inode        *ip = XFS_I(inode);
        struct xfs_bmbt_irec    imap;
-       bool                    is_cow = false, need_alloc = false;
+       bool                    is_cow = false;
        int                     error;
 
        /*
@@ -795,7 +795,7 @@ xfs_map_cow(
         * Else we need to check if there is a COW mapping at this offset.
         */
        xfs_ilock(ip, XFS_ILOCK_SHARED);
-       is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap, &need_alloc);
+       is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap);
        xfs_iunlock(ip, XFS_ILOCK_SHARED);
 
        if (!is_cow)
@@ -805,7 +805,7 @@ xfs_map_cow(
         * And if the COW mapping has a delayed extent here we need to
         * allocate real space for it now.
         */
-       if (need_alloc) {
+       if (isnullstartblock(imap.br_startblock)) {
                error = xfs_iomap_write_allocate(ip, XFS_COW_FORK, offset,
                                &imap);
                if (error)
@@ -1311,7 +1311,6 @@ __xfs_get_blocks(
        ssize_t                 size;
        int                     new = 0;
        bool                    is_cow = false;
-       bool                    need_alloc = false;
 
        BUG_ON(create && !direct);
 
@@ -1337,9 +1336,11 @@ __xfs_get_blocks(
        end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
        offset_fsb = XFS_B_TO_FSBT(mp, offset);
 
-       if (create && direct && xfs_is_reflink_inode(ip))
-               is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap,
-                                       &need_alloc);
+       if (create && direct && xfs_is_reflink_inode(ip)) {
+               is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap);
+               ASSERT(!is_cow || !isnullstartblock(imap.br_startblock));
+       }
+
        if (!is_cow) {
                error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
                                        &imap, &nimaps, XFS_BMAPI_ENTIRE);
@@ -1356,10 +1357,29 @@ __xfs_get_blocks(
                        xfs_reflink_trim_irec_to_next_cow(ip, offset_fsb,
                                        &imap);
        }
-       ASSERT(!need_alloc);
        if (error)
                goto out_unlock;
 
+       /*
+        * The only time we can ever safely find delalloc blocks on direct I/O
+        * is a dio write to post-eof speculative preallocation. All other
+        * scenarios are indicative of a problem or misuse (such as mixing
+        * direct and mapped I/O).
+        *
+        * The file may be unmapped by the time we get here so we cannot
+        * reliably fail the I/O based on mapping. Instead, fail the I/O if this
+        * is a read or a write within eof. Otherwise, carry on but warn as a
+        * precuation if the file happens to be mapped.
+        */
+       if (direct && imap.br_startblock == DELAYSTARTBLOCK) {
+               if (!create || offset < i_size_read(VFS_I(ip))) {
+                       WARN_ON_ONCE(1);
+                       error = -EIO;
+                       goto out_unlock;
+               }
+               WARN_ON_ONCE(mapping_mapped(VFS_I(ip)->i_mapping));
+       }
+
        /* for DAX, we convert unwritten extents directly */
        if (create &&
            (!nimaps ||
@@ -1444,8 +1464,6 @@ __xfs_get_blocks(
             (new || ISUNWRITTEN(&imap))))
                set_buffer_new(bh_result);
 
-       BUG_ON(direct && imap.br_startblock == DELAYSTARTBLOCK);
-
        return 0;
 
 out_unlock:
index 552465e011ecb0df23aaeae57b94108dcdd24360..0670a8bd5818aa7c99dbfd69203ce1bc0ebf276d 100644 (file)
@@ -359,9 +359,7 @@ xfs_bmap_count_blocks(
        mp = ip->i_mount;
        ifp = XFS_IFORK_PTR(ip, whichfork);
        if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) {
-               xfs_bmap_count_leaves(ifp, 0,
-                       ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t),
-                       count);
+               xfs_bmap_count_leaves(ifp, 0, xfs_iext_count(ifp), count);
                return 0;
        }
 
@@ -426,7 +424,7 @@ xfs_getbmapx_fix_eof_hole(
                ifp = XFS_IFORK_PTR(ip, whichfork);
                if (!moretocome &&
                    xfs_iext_bno_to_ext(ifp, fileblock, &lastx) &&
-                  (lastx == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))-1))
+                  (lastx == xfs_iext_count(ifp) - 1))
                        out->bmv_oflags |= BMV_OF_LAST;
        }
 
@@ -1792,6 +1790,7 @@ xfs_swap_extent_forks(
        struct xfs_ifork        tempifp, *ifp, *tifp;
        int                     aforkblks = 0;
        int                     taforkblks = 0;
+       xfs_extnum_t            nextents;
        __uint64_t              tmp;
        int                     error;
 
@@ -1877,14 +1876,13 @@ xfs_swap_extent_forks(
 
        switch (ip->i_d.di_format) {
        case XFS_DINODE_FMT_EXTENTS:
-               /* If the extents fit in the inode, fix the
-                * pointer.  Otherwise it's already NULL or
-                * pointing to the extent.
+               /*
+                * If the extents fit in the inode, fix the pointer.  Otherwise
+                * it's already NULL or pointing to the extent.
                 */
-               if (ip->i_d.di_nextents <= XFS_INLINE_EXTS) {
-                       ifp->if_u1.if_extents =
-                               ifp->if_u2.if_inline_ext;
-               }
+               nextents = xfs_iext_count(&ip->i_df);
+               if (nextents <= XFS_INLINE_EXTS)
+                       ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
                (*src_log_flags) |= XFS_ILOG_DEXT;
                break;
        case XFS_DINODE_FMT_BTREE:
@@ -1896,14 +1894,13 @@ xfs_swap_extent_forks(
 
        switch (tip->i_d.di_format) {
        case XFS_DINODE_FMT_EXTENTS:
-               /* If the extents fit in the inode, fix the
-                * pointer.  Otherwise it's already NULL or
-                * pointing to the extent.
+               /*
+                * If the extents fit in the inode, fix the pointer.  Otherwise
+                * it's already NULL or pointing to the extent.
                 */
-               if (tip->i_d.di_nextents <= XFS_INLINE_EXTS) {
-                       tifp->if_u1.if_extents =
-                               tifp->if_u2.if_inline_ext;
-               }
+               nextents = xfs_iext_count(&tip->i_df);
+               if (nextents <= XFS_INLINE_EXTS)
+                       tifp->if_u1.if_extents = tifp->if_u2.if_inline_ext;
                (*target_log_flags) |= XFS_ILOG_DEXT;
                break;
        case XFS_DINODE_FMT_BTREE:
index 1c2e52b2d926140b27e7a0e75f568165b7a07534..38c9a95bda7d7c42763dbb3a63f565bd67f8a98f 100644 (file)
@@ -71,6 +71,7 @@ typedef unsigned int xfs_buf_flags_t;
        { XBF_READ,             "READ" }, \
        { XBF_WRITE,            "WRITE" }, \
        { XBF_READ_AHEAD,       "READ_AHEAD" }, \
+       { XBF_NO_IOACCT,        "NO_IOACCT" }, \
        { XBF_ASYNC,            "ASYNC" }, \
        { XBF_DONE,             "DONE" }, \
        { XBF_STALE,            "STALE" }, \
index ca2ab738fae24406d9fee1b1fada77e57436be29..d818c160451f50486fd6f72fa88fe6b3d889e1e6 100644 (file)
@@ -249,6 +249,7 @@ xfs_file_dio_aio_read(
        struct xfs_inode        *ip = XFS_I(inode);
        loff_t                  isize = i_size_read(inode);
        size_t                  count = iov_iter_count(to);
+       loff_t                  end = iocb->ki_pos + count - 1;
        struct iov_iter         data;
        struct xfs_buftarg      *target;
        ssize_t                 ret = 0;
@@ -272,49 +273,21 @@ xfs_file_dio_aio_read(
 
        file_accessed(iocb->ki_filp);
 
-       /*
-        * Locking is a bit tricky here. If we take an exclusive lock for direct
-        * IO, we effectively serialise all new concurrent read IO to this file
-        * and block it behind IO that is currently in progress because IO in
-        * progress holds the IO lock shared. We only need to hold the lock
-        * exclusive to blow away the page cache, so only take lock exclusively
-        * if the page cache needs invalidation. This allows the normal direct
-        * IO case of no page cache pages to proceeed concurrently without
-        * serialisation.
-        */
        xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
        if (mapping->nrpages) {
-               xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
-               xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
+               ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end);
+               if (ret)
+                       goto out_unlock;
 
                /*
-                * The generic dio code only flushes the range of the particular
-                * I/O. Because we take an exclusive lock here, this whole
-                * sequence is considerably more expensive for us. This has a
-                * noticeable performance impact for any file with cached pages,
-                * even when outside of the range of the particular I/O.
-                *
-                * Hence, amortize the cost of the lock against a full file
-                * flush and reduce the chances of repeated iolock cycles going
-                * forward.
+                * Invalidate whole pages. This can return an error if we fail
+                * to invalidate a page, but this should never happen on XFS.
+                * Warn if it does fail.
                 */
-               if (mapping->nrpages) {
-                       ret = filemap_write_and_wait(mapping);
-                       if (ret) {
-                               xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
-                               return ret;
-                       }
-
-                       /*
-                        * Invalidate whole pages. This can return an error if
-                        * we fail to invalidate a page, but this should never
-                        * happen on XFS. Warn if it does fail.
-                        */
-                       ret = invalidate_inode_pages2(mapping);
-                       WARN_ON_ONCE(ret);
-                       ret = 0;
-               }
-               xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
+               ret = invalidate_inode_pages2_range(mapping,
+                               iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
+               WARN_ON_ONCE(ret);
+               ret = 0;
        }
 
        data = *to;
@@ -324,8 +297,9 @@ xfs_file_dio_aio_read(
                iocb->ki_pos += ret;
                iov_iter_advance(to, ret);
        }
-       xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
 
+out_unlock:
+       xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
        return ret;
 }
 
@@ -570,61 +544,49 @@ xfs_file_dio_aio_write(
        if ((iocb->ki_pos | count) & target->bt_logical_sectormask)
                return -EINVAL;
 
-       /* "unaligned" here means not aligned to a filesystem block */
-       if ((iocb->ki_pos & mp->m_blockmask) ||
-           ((iocb->ki_pos + count) & mp->m_blockmask))
-               unaligned_io = 1;
-
        /*
-        * We don't need to take an exclusive lock unless there page cache needs
-        * to be invalidated or unaligned IO is being executed. We don't need to
-        * consider the EOF extension case here because
-        * xfs_file_aio_write_checks() will relock the inode as necessary for
-        * EOF zeroing cases and fill out the new inode size as appropriate.
+        * Don't take the exclusive iolock here unless the I/O is unaligned to
+        * the file system block size.  We don't need to consider the EOF
+        * extension case here because xfs_file_aio_write_checks() will relock
+        * the inode as necessary for EOF zeroing cases and fill out the new
+        * inode size as appropriate.
         */
-       if (unaligned_io || mapping->nrpages)
+       if ((iocb->ki_pos & mp->m_blockmask) ||
+           ((iocb->ki_pos + count) & mp->m_blockmask)) {
+               unaligned_io = 1;
                iolock = XFS_IOLOCK_EXCL;
-       else
+       } else {
                iolock = XFS_IOLOCK_SHARED;
-       xfs_rw_ilock(ip, iolock);
-
-       /*
-        * Recheck if there are cached pages that need invalidate after we got
-        * the iolock to protect against other threads adding new pages while
-        * we were waiting for the iolock.
-        */
-       if (mapping->nrpages && iolock == XFS_IOLOCK_SHARED) {
-               xfs_rw_iunlock(ip, iolock);
-               iolock = XFS_IOLOCK_EXCL;
-               xfs_rw_ilock(ip, iolock);
        }
 
+       xfs_rw_ilock(ip, iolock);
+
        ret = xfs_file_aio_write_checks(iocb, from, &iolock);
        if (ret)
                goto out;
        count = iov_iter_count(from);
        end = iocb->ki_pos + count - 1;
 
-       /*
-        * See xfs_file_dio_aio_read() for why we do a full-file flush here.
-        */
        if (mapping->nrpages) {
-               ret = filemap_write_and_wait(VFS_I(ip)->i_mapping);
+               ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end);
                if (ret)
                        goto out;
+
                /*
                 * Invalidate whole pages. This can return an error if we fail
                 * to invalidate a page, but this should never happen on XFS.
                 * Warn if it does fail.
                 */
-               ret = invalidate_inode_pages2(VFS_I(ip)->i_mapping);
+               ret = invalidate_inode_pages2_range(mapping,
+                               iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
                WARN_ON_ONCE(ret);
                ret = 0;
        }
 
        /*
         * If we are doing unaligned IO, wait for all other IO to drain,
-        * otherwise demote the lock if we had to flush cached pages
+        * otherwise demote the lock if we had to take the exclusive lock
+        * for other reasons in xfs_file_aio_write_checks.
         */
        if (unaligned_io)
                inode_dio_wait(inode);
@@ -947,134 +909,6 @@ out_unlock:
        return error;
 }
 
-/*
- * Flush all file writes out to disk.
- */
-static int
-xfs_file_wait_for_io(
-       struct inode    *inode,
-       loff_t          offset,
-       size_t          len)
-{
-       loff_t          rounding;
-       loff_t          ioffset;
-       loff_t          iendoffset;
-       loff_t          bs;
-       int             ret;
-
-       bs = inode->i_sb->s_blocksize;
-       inode_dio_wait(inode);
-
-       rounding = max_t(xfs_off_t, bs, PAGE_SIZE);
-       ioffset = round_down(offset, rounding);
-       iendoffset = round_up(offset + len, rounding) - 1;
-       ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
-                                          iendoffset);
-       return ret;
-}
-
-/* Hook up to the VFS reflink function */
-STATIC int
-xfs_file_share_range(
-       struct file     *file_in,
-       loff_t          pos_in,
-       struct file     *file_out,
-       loff_t          pos_out,
-       u64             len,
-       bool            is_dedupe)
-{
-       struct inode    *inode_in;
-       struct inode    *inode_out;
-       ssize_t         ret;
-       loff_t          bs;
-       loff_t          isize;
-       int             same_inode;
-       loff_t          blen;
-       unsigned int    flags = 0;
-
-       inode_in = file_inode(file_in);
-       inode_out = file_inode(file_out);
-       bs = inode_out->i_sb->s_blocksize;
-
-       /* Don't touch certain kinds of inodes */
-       if (IS_IMMUTABLE(inode_out))
-               return -EPERM;
-       if (IS_SWAPFILE(inode_in) ||
-           IS_SWAPFILE(inode_out))
-               return -ETXTBSY;
-
-       /* Reflink only works within this filesystem. */
-       if (inode_in->i_sb != inode_out->i_sb)
-               return -EXDEV;
-       same_inode = (inode_in->i_ino == inode_out->i_ino);
-
-       /* Don't reflink dirs, pipes, sockets... */
-       if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
-               return -EISDIR;
-       if (S_ISFIFO(inode_in->i_mode) || S_ISFIFO(inode_out->i_mode))
-               return -EINVAL;
-       if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
-               return -EINVAL;
-
-       /* Don't share DAX file data for now. */
-       if (IS_DAX(inode_in) || IS_DAX(inode_out))
-               return -EINVAL;
-
-       /* Are we going all the way to the end? */
-       isize = i_size_read(inode_in);
-       if (isize == 0)
-               return 0;
-       if (len == 0)
-               len = isize - pos_in;
-
-       /* Ensure offsets don't wrap and the input is inside i_size */
-       if (pos_in + len < pos_in || pos_out + len < pos_out ||
-           pos_in + len > isize)
-               return -EINVAL;
-
-       /* Don't allow dedupe past EOF in the dest file */
-       if (is_dedupe) {
-               loff_t  disize;
-
-               disize = i_size_read(inode_out);
-               if (pos_out >= disize || pos_out + len > disize)
-                       return -EINVAL;
-       }
-
-       /* If we're linking to EOF, continue to the block boundary. */
-       if (pos_in + len == isize)
-               blen = ALIGN(isize, bs) - pos_in;
-       else
-               blen = len;
-
-       /* Only reflink if we're aligned to block boundaries */
-       if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) ||
-           !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs))
-               return -EINVAL;
-
-       /* Don't allow overlapped reflink within the same file */
-       if (same_inode && pos_out + blen > pos_in && pos_out < pos_in + blen)
-               return -EINVAL;
-
-       /* Wait for the completion of any pending IOs on srcfile */
-       ret = xfs_file_wait_for_io(inode_in, pos_in, len);
-       if (ret)
-               goto out;
-       ret = xfs_file_wait_for_io(inode_out, pos_out, len);
-       if (ret)
-               goto out;
-
-       if (is_dedupe)
-               flags |= XFS_REFLINK_DEDUPE;
-       ret = xfs_reflink_remap_range(XFS_I(inode_in), pos_in, XFS_I(inode_out),
-                       pos_out, len, flags);
-       if (ret < 0)
-               goto out;
-
-out:
-       return ret;
-}
-
 STATIC ssize_t
 xfs_file_copy_range(
        struct file     *file_in,
@@ -1086,7 +920,7 @@ xfs_file_copy_range(
 {
        int             error;
 
-       error = xfs_file_share_range(file_in, pos_in, file_out, pos_out,
+       error = xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
                                     len, false);
        if (error)
                return error;
@@ -1101,7 +935,7 @@ xfs_file_clone_range(
        loff_t          pos_out,
        u64             len)
 {
-       return xfs_file_share_range(file_in, pos_in, file_out, pos_out,
+       return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
                                     len, false);
 }
 
@@ -1124,7 +958,7 @@ xfs_file_dedupe_range(
        if (len > XFS_MAX_DEDUPE_LEN)
                len = XFS_MAX_DEDUPE_LEN;
 
-       error = xfs_file_share_range(src_file, loff, dst_file, dst_loff,
+       error = xfs_reflink_remap_range(src_file, loff, dst_file, dst_loff,
                                     len, true);
        if (error)
                return error;
index 14796b744e0a1ebb9f8d428131d2522316262e82..9c3e5c6ddf20d1bf53b9c8bfd7cb028b62265547 100644 (file)
@@ -123,7 +123,6 @@ __xfs_inode_free(
 {
        /* asserts to verify all state is correct here */
        ASSERT(atomic_read(&ip->i_pincount) == 0);
-       ASSERT(!xfs_isiflocked(ip));
        XFS_STATS_DEC(ip->i_mount, vn_active);
 
        call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback);
@@ -133,6 +132,8 @@ void
 xfs_inode_free(
        struct xfs_inode        *ip)
 {
+       ASSERT(!xfs_isiflocked(ip));
+
        /*
         * Because we use RCU freeing we need to ensure the inode always
         * appears to be reclaimed with an invalid inode number when in the
@@ -981,6 +982,7 @@ restart:
 
        if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
                xfs_iunpin_wait(ip);
+               /* xfs_iflush_abort() drops the flush lock */
                xfs_iflush_abort(ip, false);
                goto reclaim;
        }
@@ -989,10 +991,10 @@ restart:
                        goto out_ifunlock;
                xfs_iunpin_wait(ip);
        }
-       if (xfs_iflags_test(ip, XFS_ISTALE))
-               goto reclaim;
-       if (xfs_inode_clean(ip))
+       if (xfs_iflags_test(ip, XFS_ISTALE) || xfs_inode_clean(ip)) {
+               xfs_ifunlock(ip);
                goto reclaim;
+       }
 
        /*
         * Never flush out dirty data during non-blocking reclaim, as it would
@@ -1030,25 +1032,24 @@ restart:
                xfs_buf_relse(bp);
        }
 
-       xfs_iflock(ip);
 reclaim:
+       ASSERT(!xfs_isiflocked(ip));
+
        /*
         * Because we use RCU freeing we need to ensure the inode always appears
         * to be reclaimed with an invalid inode number when in the free state.
-        * We do this as early as possible under the ILOCK and flush lock so
-        * that xfs_iflush_cluster() can be guaranteed to detect races with us
-        * here. By doing this, we guarantee that once xfs_iflush_cluster has
-        * locked both the XFS_ILOCK and the flush lock that it will see either
-        * a valid, flushable inode that will serialise correctly against the
-        * locks below, or it will see a clean (and invalid) inode that it can
-        * skip.
+        * We do this as early as possible under the ILOCK so that
+        * xfs_iflush_cluster() can be guaranteed to detect races with us here.
+        * By doing this, we guarantee that once xfs_iflush_cluster has locked
+        * XFS_ILOCK that it will see either a valid, flushable inode that will
+        * serialise correctly, or it will see a clean (and invalid) inode that
+        * it can skip.
         */
        spin_lock(&ip->i_flags_lock);
        ip->i_flags = XFS_IRECLAIM;
        ip->i_ino = 0;
        spin_unlock(&ip->i_flags_lock);
 
-       xfs_ifunlock(ip);
        xfs_iunlock(ip, XFS_ILOCK_EXCL);
 
        XFS_STATS_INC(ip->i_mount, xs_ig_reclaims);
@@ -1580,10 +1581,15 @@ xfs_inode_free_cowblocks(
        struct xfs_eofblocks *eofb = args;
        bool need_iolock = true;
        int match;
+       struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
 
        ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0));
 
-       if (!xfs_reflink_has_real_cow_blocks(ip)) {
+       /*
+        * Just clear the tag if we have an empty cow fork or none at all. It's
+        * possible the inode was fully unshared since it was originally tagged.
+        */
+       if (!xfs_is_reflink_inode(ip) || !ifp->if_bytes) {
                trace_xfs_inode_free_cowblocks_invalid(ip);
                xfs_inode_clear_cowblocks_tag(ip);
                return 0;
@@ -1656,9 +1662,9 @@ void
 xfs_inode_set_cowblocks_tag(
        xfs_inode_t     *ip)
 {
-       trace_xfs_inode_set_eofblocks_tag(ip);
+       trace_xfs_inode_set_cowblocks_tag(ip);
        return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_cowblocks,
-                       trace_xfs_perag_set_eofblocks,
+                       trace_xfs_perag_set_cowblocks,
                        XFS_ICI_COWBLOCKS_TAG);
 }
 
@@ -1666,7 +1672,7 @@ void
 xfs_inode_clear_cowblocks_tag(
        xfs_inode_t     *ip)
 {
-       trace_xfs_inode_clear_eofblocks_tag(ip);
+       trace_xfs_inode_clear_cowblocks_tag(ip);
        return __xfs_inode_clear_eofblocks_tag(ip,
-                       trace_xfs_perag_clear_eofblocks, XFS_ICI_COWBLOCKS_TAG);
+                       trace_xfs_perag_clear_cowblocks, XFS_ICI_COWBLOCKS_TAG);
 }
index d45ca72af6fbcbc6bb0d97a91b6467a930ace4a2..865ad1373e5eb5a464e3ce16ab47d8a5af741f52 100644 (file)
@@ -133,7 +133,7 @@ xfs_icreate_item_committing(
 /*
  * This is the ops vector shared by all buf log items.
  */
-static struct xfs_item_ops xfs_icreate_item_ops = {
+static const struct xfs_item_ops xfs_icreate_item_ops = {
        .iop_size       = xfs_icreate_item_size,
        .iop_format     = xfs_icreate_item_format,
        .iop_pin        = xfs_icreate_item_pin,
index f14c1de2549db758f4bed9641fbfa99769570c71..71e8a81c91a371d659dbad0b57099916ad0b2168 100644 (file)
@@ -246,6 +246,11 @@ static inline bool xfs_is_reflink_inode(struct xfs_inode *ip)
  * Synchronize processes attempting to flush the in-core inode back to disk.
  */
 
+static inline int xfs_isiflocked(struct xfs_inode *ip)
+{
+       return xfs_iflags_test(ip, XFS_IFLOCK);
+}
+
 extern void __xfs_iflock(struct xfs_inode *ip);
 
 static inline int xfs_iflock_nowait(struct xfs_inode *ip)
@@ -261,16 +266,12 @@ static inline void xfs_iflock(struct xfs_inode *ip)
 
 static inline void xfs_ifunlock(struct xfs_inode *ip)
 {
+       ASSERT(xfs_isiflocked(ip));
        xfs_iflags_clear(ip, XFS_IFLOCK);
        smp_mb();
        wake_up_bit(&ip->i_flags, __XFS_IFLOCK_BIT);
 }
 
-static inline int xfs_isiflocked(struct xfs_inode *ip)
-{
-       return xfs_iflags_test(ip, XFS_IFLOCK);
-}
-
 /*
  * Flags for inode locking.
  * Bit ranges: 1<<1  - 1<<16-1 -- iolock/ilock modes (bitfield)
index 9610e9c0095277c7b53feb9ad5e230d6b156327c..d90e7811ccdd9ccec14f9ebb29219bc065d36270 100644 (file)
@@ -164,7 +164,7 @@ xfs_inode_item_format_data_fork(
                        struct xfs_bmbt_rec *p;
 
                        ASSERT(ip->i_df.if_u1.if_extents != NULL);
-                       ASSERT(ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) > 0);
+                       ASSERT(xfs_iext_count(&ip->i_df) > 0);
 
                        p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IEXT);
                        data_bytes = xfs_iextents_copy(ip, p, XFS_DATA_FORK);
@@ -261,7 +261,7 @@ xfs_inode_item_format_attr_fork(
                    ip->i_afp->if_bytes > 0) {
                        struct xfs_bmbt_rec *p;
 
-                       ASSERT(ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) ==
+                       ASSERT(xfs_iext_count(ip->i_afp) ==
                                ip->i_d.di_anextents);
                        ASSERT(ip->i_afp->if_u1.if_extents != NULL);
 
index c245bed3249bf1a192f430c7b7c781e84baf93e3..a39197501a7cd8cb597eb0250233c5802b1d8b9d 100644 (file)
@@ -910,16 +910,14 @@ xfs_ioc_fsgetxattr(
        if (attr) {
                if (ip->i_afp) {
                        if (ip->i_afp->if_flags & XFS_IFEXTENTS)
-                               fa.fsx_nextents = ip->i_afp->if_bytes /
-                                                       sizeof(xfs_bmbt_rec_t);
+                               fa.fsx_nextents = xfs_iext_count(ip->i_afp);
                        else
                                fa.fsx_nextents = ip->i_d.di_anextents;
                } else
                        fa.fsx_nextents = 0;
        } else {
                if (ip->i_df.if_flags & XFS_IFEXTENTS)
-                       fa.fsx_nextents = ip->i_df.if_bytes /
-                                               sizeof(xfs_bmbt_rec_t);
+                       fa.fsx_nextents = xfs_iext_count(&ip->i_df);
                else
                        fa.fsx_nextents = ip->i_d.di_nextents;
        }
index d907eb9f8ef32a079f845c4aa4731bb3413fcb25..15a83813b7085ad281ee713be05262abe107fa3e 100644 (file)
@@ -395,11 +395,12 @@ xfs_iomap_prealloc_size(
        struct xfs_inode        *ip,
        loff_t                  offset,
        loff_t                  count,
-       xfs_extnum_t            idx,
-       struct xfs_bmbt_irec    *prev)
+       xfs_extnum_t            idx)
 {
        struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
        xfs_fileoff_t           offset_fsb = XFS_B_TO_FSBT(mp, offset);
+       struct xfs_bmbt_irec    prev;
        int                     shift = 0;
        int64_t                 freesp;
        xfs_fsblock_t           qblocks;
@@ -419,8 +420,8 @@ xfs_iomap_prealloc_size(
         */
        if ((mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) ||
            XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_dalign) ||
-           idx == 0 ||
-           prev->br_startoff + prev->br_blockcount < offset_fsb)
+           !xfs_iext_get_extent(ifp, idx - 1, &prev) ||
+           prev.br_startoff + prev.br_blockcount < offset_fsb)
                return mp->m_writeio_blocks;
 
        /*
@@ -439,8 +440,8 @@ xfs_iomap_prealloc_size(
         * always extends to MAXEXTLEN rather than falling short due to things
         * like stripe unit/width alignment of real extents.
         */
-       if (prev->br_blockcount <= (MAXEXTLEN >> 1))
-               alloc_blocks = prev->br_blockcount << 1;
+       if (prev.br_blockcount <= (MAXEXTLEN >> 1))
+               alloc_blocks = prev.br_blockcount << 1;
        else
                alloc_blocks = XFS_B_TO_FSB(mp, offset);
        if (!alloc_blocks)
@@ -535,11 +536,11 @@ xfs_file_iomap_begin_delay(
        xfs_fileoff_t           offset_fsb = XFS_B_TO_FSBT(mp, offset);
        xfs_fileoff_t           maxbytes_fsb =
                XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
-       xfs_fileoff_t           end_fsb, orig_end_fsb;
+       xfs_fileoff_t           end_fsb;
        int                     error = 0, eof = 0;
        struct xfs_bmbt_irec    got;
-       struct xfs_bmbt_irec    prev;
        xfs_extnum_t            idx;
+       xfs_fsblock_t           prealloc_blocks = 0;
 
        ASSERT(!XFS_IS_REALTIME_INODE(ip));
        ASSERT(!xfs_get_extsz_hint(ip));
@@ -563,9 +564,19 @@ xfs_file_iomap_begin_delay(
                        goto out_unlock;
        }
 
-       xfs_bmap_search_extents(ip, offset_fsb, XFS_DATA_FORK, &eof, &idx,
-                       &got, &prev);
+       eof = !xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got);
        if (!eof && got.br_startoff <= offset_fsb) {
+               if (xfs_is_reflink_inode(ip)) {
+                       bool            shared;
+
+                       end_fsb = min(XFS_B_TO_FSB(mp, offset + count),
+                                       maxbytes_fsb);
+                       xfs_trim_extent(&got, offset_fsb, end_fsb - offset_fsb);
+                       error = xfs_reflink_reserve_cow(ip, &got, &shared);
+                       if (error)
+                               goto out_unlock;
+               }
+
                trace_xfs_iomap_found(ip, offset, count, 0, &got);
                goto done;
        }
@@ -584,35 +595,32 @@ xfs_file_iomap_begin_delay(
         * the lower level functions are updated.
         */
        count = min_t(loff_t, count, 1024 * PAGE_SIZE);
-       end_fsb = orig_end_fsb =
-               min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb);
+       end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb);
 
        if (eof) {
-               xfs_fsblock_t   prealloc_blocks;
-
-               prealloc_blocks =
-                       xfs_iomap_prealloc_size(ip, offset, count, idx, &prev);
+               prealloc_blocks = xfs_iomap_prealloc_size(ip, offset, count, idx);
                if (prealloc_blocks) {
                        xfs_extlen_t    align;
                        xfs_off_t       end_offset;
+                       xfs_fileoff_t   p_end_fsb;
 
                        end_offset = XFS_WRITEIO_ALIGN(mp, offset + count - 1);
-                       end_fsb = XFS_B_TO_FSBT(mp, end_offset) +
-                               prealloc_blocks;
+                       p_end_fsb = XFS_B_TO_FSBT(mp, end_offset) +
+                                       prealloc_blocks;
 
                        align = xfs_eof_alignment(ip, 0);
                        if (align)
-                               end_fsb = roundup_64(end_fsb, align);
+                               p_end_fsb = roundup_64(p_end_fsb, align);
 
-                       end_fsb = min(end_fsb, maxbytes_fsb);
-                       ASSERT(end_fsb > offset_fsb);
+                       p_end_fsb = min(p_end_fsb, maxbytes_fsb);
+                       ASSERT(p_end_fsb > offset_fsb);
+                       prealloc_blocks = p_end_fsb - end_fsb;
                }
        }
 
 retry:
        error = xfs_bmapi_reserve_delalloc(ip, XFS_DATA_FORK, offset_fsb,
-                       end_fsb - offset_fsb, &got,
-                       &prev, &idx, eof);
+                       end_fsb - offset_fsb, prealloc_blocks, &got, &idx, eof);
        switch (error) {
        case 0:
                break;
@@ -620,8 +628,8 @@ retry:
        case -EDQUOT:
                /* retry without any preallocation */
                trace_xfs_delalloc_enospc(ip, offset, count);
-               if (end_fsb != orig_end_fsb) {
-                       end_fsb = orig_end_fsb;
+               if (prealloc_blocks) {
+                       prealloc_blocks = 0;
                        goto retry;
                }
                /*FALLTHRU*/
@@ -629,13 +637,6 @@ retry:
                goto out_unlock;
        }
 
-       /*
-        * Tag the inode as speculatively preallocated so we can reclaim this
-        * space on demand, if necessary.
-        */
-       if (end_fsb != orig_end_fsb)
-               xfs_inode_set_eofblocks_tag(ip);
-
        trace_xfs_iomap_alloc(ip, offset, count, 0, &got);
 done:
        if (isnullstartblock(got.br_startblock))
@@ -961,19 +962,13 @@ xfs_file_iomap_begin(
        struct xfs_mount        *mp = ip->i_mount;
        struct xfs_bmbt_irec    imap;
        xfs_fileoff_t           offset_fsb, end_fsb;
-       bool                    shared, trimmed;
        int                     nimaps = 1, error = 0;
+       bool                    shared = false, trimmed = false;
        unsigned                lockmode;
 
        if (XFS_FORCED_SHUTDOWN(mp))
                return -EIO;
 
-       if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
-               error = xfs_reflink_reserve_cow_range(ip, offset, length);
-               if (error < 0)
-                       return error;
-       }
-
        if ((flags & IOMAP_WRITE) && !IS_DAX(inode) &&
                   !xfs_get_extsz_hint(ip)) {
                /* Reserve delalloc blocks for regular writeback. */
@@ -981,7 +976,16 @@ xfs_file_iomap_begin(
                                iomap);
        }
 
-       lockmode = xfs_ilock_data_map_shared(ip);
+       /*
+        * COW writes will allocate delalloc space, so we need to make sure
+        * to take the lock exclusively here.
+        */
+       if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
+               lockmode = XFS_ILOCK_EXCL;
+               xfs_ilock(ip, XFS_ILOCK_EXCL);
+       } else {
+               lockmode = xfs_ilock_data_map_shared(ip);
+       }
 
        ASSERT(offset <= mp->m_super->s_maxbytes);
        if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes)
@@ -991,16 +995,24 @@ xfs_file_iomap_begin(
 
        error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
                               &nimaps, 0);
-       if (error) {
-               xfs_iunlock(ip, lockmode);
-               return error;
+       if (error)
+               goto out_unlock;
+
+       if (flags & IOMAP_REPORT) {
+               /* Trim the mapping to the nearest shared extent boundary. */
+               error = xfs_reflink_trim_around_shared(ip, &imap, &shared,
+                               &trimmed);
+               if (error)
+                       goto out_unlock;
        }
 
-       /* Trim the mapping to the nearest shared extent boundary. */
-       error = xfs_reflink_trim_around_shared(ip, &imap, &shared, &trimmed);
-       if (error) {
-               xfs_iunlock(ip, lockmode);
-               return error;
+       if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
+               error = xfs_reflink_reserve_cow(ip, &imap, &shared);
+               if (error)
+                       goto out_unlock;
+
+               end_fsb = imap.br_startoff + imap.br_blockcount;
+               length = XFS_FSB_TO_B(mp, end_fsb) - offset;
        }
 
        if ((flags & IOMAP_WRITE) && imap_needs_alloc(inode, &imap, nimaps)) {
@@ -1039,6 +1051,9 @@ xfs_file_iomap_begin(
        if (shared)
                iomap->flags |= IOMAP_F_SHARED;
        return 0;
+out_unlock:
+       xfs_iunlock(ip, lockmode);
+       return error;
 }
 
 static int
index fc7873942bea51866611aee5a7437f3d4a036a67..b341f10cf4810bf3716aec354f33fbdbf9ab5494 100644 (file)
@@ -1009,6 +1009,7 @@ xfs_mountfs(
  out_quota:
        xfs_qm_unmount_quotas(mp);
  out_rtunmount:
+       mp->m_super->s_flags &= ~MS_ACTIVE;
        xfs_rtunmount_inodes(mp);
  out_rele_rip:
        IRELE(rip);
index a60d9e2739d14a2ebcff8ee7dcedae7f5177bf3c..45e50ea90769f15d80e3da57e9a6d1721d12a21a 100644 (file)
@@ -1135,7 +1135,7 @@ xfs_qm_get_rtblks(
                        return error;
        }
        rtblks = 0;
-       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+       nextents = xfs_iext_count(ifp);
        for (idx = 0; idx < nextents; idx++)
                rtblks += xfs_bmbt_get_blockcount(xfs_iext_get_ext(ifp, idx));
        *O_rtblks = (xfs_qcnt_t)rtblks;
index 5965e9455d91e03621680a08493610085d5a926c..becf2465dd2312d01b6a01accdf8e5bd9b6de163 100644 (file)
@@ -182,7 +182,8 @@ xfs_reflink_trim_around_shared(
        if (!xfs_is_reflink_inode(ip) ||
            ISUNWRITTEN(irec) ||
            irec->br_startblock == HOLESTARTBLOCK ||
-           irec->br_startblock == DELAYSTARTBLOCK) {
+           irec->br_startblock == DELAYSTARTBLOCK ||
+           isnullstartblock(irec->br_startblock)) {
                *shared = false;
                return 0;
        }
@@ -227,50 +228,54 @@ xfs_reflink_trim_around_shared(
        }
 }
 
-/* Create a CoW reservation for a range of blocks within a file. */
-static int
-__xfs_reflink_reserve_cow(
+/*
+ * Trim the passed in imap to the next shared/unshared extent boundary, and
+ * if imap->br_startoff points to a shared extent reserve space for it in the
+ * COW fork.  In this case *shared is set to true, else to false.
+ *
+ * Note that imap will always contain the block numbers for the existing blocks
+ * in the data fork, as the upper layers need them for read-modify-write
+ * operations.
+ */
+int
+xfs_reflink_reserve_cow(
        struct xfs_inode        *ip,
-       xfs_fileoff_t           *offset_fsb,
-       xfs_fileoff_t           end_fsb,
-       bool                    *skipped)
+       struct xfs_bmbt_irec    *imap,
+       bool                    *shared)
 {
-       struct xfs_bmbt_irec    got, prev, imap;
-       xfs_fileoff_t           orig_end_fsb;
-       int                     nimaps, eof = 0, error = 0;
-       bool                    shared = false, trimmed = false;
+       struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+       struct xfs_bmbt_irec    got;
+       int                     error = 0;
+       bool                    eof = false, trimmed;
        xfs_extnum_t            idx;
-       xfs_extlen_t            align;
-
-       /* Already reserved?  Skip the refcount btree access. */
-       xfs_bmap_search_extents(ip, *offset_fsb, XFS_COW_FORK, &eof, &idx,
-                       &got, &prev);
-       if (!eof && got.br_startoff <= *offset_fsb) {
-               end_fsb = orig_end_fsb = got.br_startoff + got.br_blockcount;
-               trace_xfs_reflink_cow_found(ip, &got);
-               goto done;
-       }
 
-       /* Read extent from the source file. */
-       nimaps = 1;
-       error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb,
-                       &imap, &nimaps, 0);
-       if (error)
-               goto out_unlock;
-       ASSERT(nimaps == 1);
+       /*
+        * Search the COW fork extent list first.  This serves two purposes:
+        * first this implement the speculative preallocation using cowextisze,
+        * so that we also unshared block adjacent to shared blocks instead
+        * of just the shared blocks themselves.  Second the lookup in the
+        * extent list is generally faster than going out to the shared extent
+        * tree.
+        */
+
+       if (!xfs_iext_lookup_extent(ip, ifp, imap->br_startoff, &idx, &got))
+               eof = true;
+       if (!eof && got.br_startoff <= imap->br_startoff) {
+               trace_xfs_reflink_cow_found(ip, imap);
+               xfs_trim_extent(imap, got.br_startoff, got.br_blockcount);
+
+               *shared = true;
+               return 0;
+       }
 
        /* Trim the mapping to the nearest shared extent boundary. */
-       error = xfs_reflink_trim_around_shared(ip, &imap, &shared, &trimmed);
+       error = xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed);
        if (error)
-               goto out_unlock;
-
-       end_fsb = orig_end_fsb = imap.br_startoff + imap.br_blockcount;
+               return error;
 
        /* Not shared?  Just report the (potentially capped) extent. */
-       if (!shared) {
-               *skipped = true;
-               goto done;
-       }
+       if (!*shared)
+               return 0;
 
        /*
         * Fork all the shared blocks from our write offset until the end of
@@ -278,72 +283,17 @@ __xfs_reflink_reserve_cow(
         */
        error = xfs_qm_dqattach_locked(ip, 0);
        if (error)
-               goto out_unlock;
-
-       align = xfs_eof_alignment(ip, xfs_get_cowextsz_hint(ip));
-       if (align)
-               end_fsb = roundup_64(end_fsb, align);
-
-retry:
-       error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, *offset_fsb,
-                       end_fsb - *offset_fsb, &got,
-                       &prev, &idx, eof);
-       switch (error) {
-       case 0:
-               break;
-       case -ENOSPC:
-       case -EDQUOT:
-               /* retry without any preallocation */
-               trace_xfs_reflink_cow_enospc(ip, &imap);
-               if (end_fsb != orig_end_fsb) {
-                       end_fsb = orig_end_fsb;
-                       goto retry;
-               }
-               /*FALLTHRU*/
-       default:
-               goto out_unlock;
-       }
+               return error;
 
-       if (end_fsb != orig_end_fsb)
-               xfs_inode_set_cowblocks_tag(ip);
+       error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, imap->br_startoff,
+                       imap->br_blockcount, 0, &got, &idx, eof);
+       if (error == -ENOSPC || error == -EDQUOT)
+               trace_xfs_reflink_cow_enospc(ip, imap);
+       if (error)
+               return error;
 
        trace_xfs_reflink_cow_alloc(ip, &got);
-done:
-       *offset_fsb = end_fsb;
-out_unlock:
-       return error;
-}
-
-/* Create a CoW reservation for part of a file. */
-int
-xfs_reflink_reserve_cow_range(
-       struct xfs_inode        *ip,
-       xfs_off_t               offset,
-       xfs_off_t               count)
-{
-       struct xfs_mount        *mp = ip->i_mount;
-       xfs_fileoff_t           offset_fsb, end_fsb;
-       bool                    skipped = false;
-       int                     error;
-
-       trace_xfs_reflink_reserve_cow_range(ip, offset, count);
-
-       offset_fsb = XFS_B_TO_FSBT(mp, offset);
-       end_fsb = XFS_B_TO_FSB(mp, offset + count);
-
-       xfs_ilock(ip, XFS_ILOCK_EXCL);
-       while (offset_fsb < end_fsb) {
-               error = __xfs_reflink_reserve_cow(ip, &offset_fsb, end_fsb,
-                               &skipped);
-               if (error) {
-                       trace_xfs_reflink_reserve_cow_range_error(ip, error,
-                               _RET_IP_);
-                       break;
-               }
-       }
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
-       return error;
+       return 0;
 }
 
 /* Allocate all CoW reservations covering a range of blocks in a file. */
@@ -358,9 +308,8 @@ __xfs_reflink_allocate_cow(
        struct xfs_defer_ops    dfops;
        struct xfs_trans        *tp;
        xfs_fsblock_t           first_block;
-       xfs_fileoff_t           next_fsb;
        int                     nimaps = 1, error;
-       bool                    skipped = false;
+       bool                    shared;
 
        xfs_defer_init(&dfops, &first_block);
 
@@ -371,33 +320,38 @@ __xfs_reflink_allocate_cow(
 
        xfs_ilock(ip, XFS_ILOCK_EXCL);
 
-       next_fsb = *offset_fsb;
-       error = __xfs_reflink_reserve_cow(ip, &next_fsb, end_fsb, &skipped);
+       /* Read extent from the source file. */
+       nimaps = 1;
+       error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb,
+                       &imap, &nimaps, 0);
+       if (error)
+               goto out_unlock;
+       ASSERT(nimaps == 1);
+
+       error = xfs_reflink_reserve_cow(ip, &imap, &shared);
        if (error)
                goto out_trans_cancel;
 
-       if (skipped) {
-               *offset_fsb = next_fsb;
+       if (!shared) {
+               *offset_fsb = imap.br_startoff + imap.br_blockcount;
                goto out_trans_cancel;
        }
 
        xfs_trans_ijoin(tp, ip, 0);
-       error = xfs_bmapi_write(tp, ip, *offset_fsb, next_fsb - *offset_fsb,
+       error = xfs_bmapi_write(tp, ip, imap.br_startoff, imap.br_blockcount,
                        XFS_BMAPI_COWFORK, &first_block,
                        XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK),
                        &imap, &nimaps, &dfops);
        if (error)
                goto out_trans_cancel;
 
-       /* We might not have been able to map the whole delalloc extent */
-       *offset_fsb = min(*offset_fsb + imap.br_blockcount, next_fsb);
-
        error = xfs_defer_finish(&tp, &dfops, NULL);
        if (error)
                goto out_trans_cancel;
 
        error = xfs_trans_commit(tp);
 
+       *offset_fsb = imap.br_startoff + imap.br_blockcount;
 out_unlock:
        xfs_iunlock(ip, XFS_ILOCK_EXCL);
        return error;
@@ -443,87 +397,65 @@ xfs_reflink_allocate_cow_range(
 }
 
 /*
- * Find the CoW reservation (and whether or not it needs block allocation)
- * for a given byte offset of a file.
+ * Find the CoW reservation for a given byte offset of a file.
  */
 bool
 xfs_reflink_find_cow_mapping(
        struct xfs_inode                *ip,
        xfs_off_t                       offset,
-       struct xfs_bmbt_irec            *imap,
-       bool                            *need_alloc)
+       struct xfs_bmbt_irec            *imap)
 {
-       struct xfs_bmbt_irec            irec;
-       struct xfs_ifork                *ifp;
-       struct xfs_bmbt_rec_host        *gotp;
-       xfs_fileoff_t                   bno;
+       struct xfs_ifork                *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+       xfs_fileoff_t                   offset_fsb;
+       struct xfs_bmbt_irec            got;
        xfs_extnum_t                    idx;
 
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED));
        ASSERT(xfs_is_reflink_inode(ip));
 
-       /* Find the extent in the CoW fork. */
-       ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
-       bno = XFS_B_TO_FSBT(ip->i_mount, offset);
-       gotp = xfs_iext_bno_to_ext(ifp, bno, &idx);
-       if (!gotp)
+       offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
+       if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got))
                return false;
-
-       xfs_bmbt_get_all(gotp, &irec);
-       if (bno >= irec.br_startoff + irec.br_blockcount ||
-           bno < irec.br_startoff)
+       if (got.br_startoff > offset_fsb)
                return false;
 
        trace_xfs_reflink_find_cow_mapping(ip, offset, 1, XFS_IO_OVERWRITE,
-                       &irec);
-
-       /* If it's still delalloc, we must allocate later. */
-       *imap = irec;
-       *need_alloc = !!(isnullstartblock(irec.br_startblock));
-
+                       &got);
+       *imap = got;
        return true;
 }
 
 /*
  * Trim an extent to end at the next CoW reservation past offset_fsb.
  */
-int
+void
 xfs_reflink_trim_irec_to_next_cow(
        struct xfs_inode                *ip,
        xfs_fileoff_t                   offset_fsb,
        struct xfs_bmbt_irec            *imap)
 {
-       struct xfs_bmbt_irec            irec;
-       struct xfs_ifork                *ifp;
-       struct xfs_bmbt_rec_host        *gotp;
+       struct xfs_ifork                *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+       struct xfs_bmbt_irec            got;
        xfs_extnum_t                    idx;
 
        if (!xfs_is_reflink_inode(ip))
-               return 0;
+               return;
 
        /* Find the extent in the CoW fork. */
-       ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
-       gotp = xfs_iext_bno_to_ext(ifp, offset_fsb, &idx);
-       if (!gotp)
-               return 0;
-       xfs_bmbt_get_all(gotp, &irec);
+       if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got))
+               return;
 
        /* This is the extent before; try sliding up one. */
-       if (irec.br_startoff < offset_fsb) {
-               idx++;
-               if (idx >= ifp->if_bytes / sizeof(xfs_bmbt_rec_t))
-                       return 0;
-               gotp = xfs_iext_get_ext(ifp, idx);
-               xfs_bmbt_get_all(gotp, &irec);
+       if (got.br_startoff < offset_fsb) {
+               if (!xfs_iext_get_extent(ifp, idx + 1, &got))
+                       return;
        }
 
-       if (irec.br_startoff >= imap->br_startoff + imap->br_blockcount)
-               return 0;
+       if (got.br_startoff >= imap->br_startoff + imap->br_blockcount)
+               return;
 
-       imap->br_blockcount = irec.br_startoff - imap->br_startoff;
+       imap->br_blockcount = got.br_startoff - imap->br_startoff;
        trace_xfs_reflink_trim_irec(ip, imap);
-
-       return 0;
 }
 
 /*
@@ -536,58 +468,46 @@ xfs_reflink_cancel_cow_blocks(
        xfs_fileoff_t                   offset_fsb,
        xfs_fileoff_t                   end_fsb)
 {
-       struct xfs_bmbt_irec            irec;
-       xfs_filblks_t                   count_fsb;
+       struct xfs_ifork                *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+       struct xfs_bmbt_irec            got, del;
+       xfs_extnum_t                    idx;
        xfs_fsblock_t                   firstfsb;
        struct xfs_defer_ops            dfops;
        int                             error = 0;
-       int                             nimaps;
 
        if (!xfs_is_reflink_inode(ip))
                return 0;
+       if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got))
+               return 0;
 
-       /* Go find the old extent in the CoW fork. */
-       while (offset_fsb < end_fsb) {
-               nimaps = 1;
-               count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb);
-               error = xfs_bmapi_read(ip, offset_fsb, count_fsb, &irec,
-                               &nimaps, XFS_BMAPI_COWFORK);
-               if (error)
-                       break;
-               ASSERT(nimaps == 1);
-
-               trace_xfs_reflink_cancel_cow(ip, &irec);
-
-               if (irec.br_startblock == DELAYSTARTBLOCK) {
-                       /* Free a delayed allocation. */
-                       xfs_mod_fdblocks(ip->i_mount, irec.br_blockcount,
-                                       false);
-                       ip->i_delayed_blks -= irec.br_blockcount;
+       while (got.br_startoff < end_fsb) {
+               del = got;
+               xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb);
+               trace_xfs_reflink_cancel_cow(ip, &del);
 
-                       /* Remove the mapping from the CoW fork. */
-                       error = xfs_bunmapi_cow(ip, &irec);
+               if (isnullstartblock(del.br_startblock)) {
+                       error = xfs_bmap_del_extent_delay(ip, XFS_COW_FORK,
+                                       &idx, &got, &del);
                        if (error)
                                break;
-               } else if (irec.br_startblock == HOLESTARTBLOCK) {
-                       /* empty */
                } else {
                        xfs_trans_ijoin(*tpp, ip, 0);
                        xfs_defer_init(&dfops, &firstfsb);
 
                        /* Free the CoW orphan record. */
                        error = xfs_refcount_free_cow_extent(ip->i_mount,
-                                       &dfops, irec.br_startblock,
-                                       irec.br_blockcount);
+                                       &dfops, del.br_startblock,
+                                       del.br_blockcount);
                        if (error)
                                break;
 
                        xfs_bmap_add_free(ip->i_mount, &dfops,
-                                       irec.br_startblock, irec.br_blockcount,
+                                       del.br_startblock, del.br_blockcount,
                                        NULL);
 
                        /* Update quota accounting */
                        xfs_trans_mod_dquot_byino(*tpp, ip, XFS_TRANS_DQ_BCOUNT,
-                                       -(long)irec.br_blockcount);
+                                       -(long)del.br_blockcount);
 
                        /* Roll the transaction */
                        error = xfs_defer_finish(tpp, &dfops, ip);
@@ -597,15 +517,17 @@ xfs_reflink_cancel_cow_blocks(
                        }
 
                        /* Remove the mapping from the CoW fork. */
-                       error = xfs_bunmapi_cow(ip, &irec);
-                       if (error)
-                               break;
+                       xfs_bmap_del_extent_cow(ip, &idx, &got, &del);
                }
 
-               /* Roll on... */
-               offset_fsb = irec.br_startoff + irec.br_blockcount;
+               if (!xfs_iext_get_extent(ifp, ++idx, &got))
+                       break;
        }
 
+       /* clear tag if cow fork is emptied */
+       if (!ifp->if_bytes)
+               xfs_inode_clear_cowblocks_tag(ip);
+
        return error;
 }
 
@@ -668,25 +590,26 @@ xfs_reflink_end_cow(
        xfs_off_t                       offset,
        xfs_off_t                       count)
 {
-       struct xfs_bmbt_irec            irec;
-       struct xfs_bmbt_irec            uirec;
+       struct xfs_ifork                *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+       struct xfs_bmbt_irec            got, del;
        struct xfs_trans                *tp;
        xfs_fileoff_t                   offset_fsb;
        xfs_fileoff_t                   end_fsb;
-       xfs_filblks_t                   count_fsb;
        xfs_fsblock_t                   firstfsb;
        struct xfs_defer_ops            dfops;
        int                             error;
        unsigned int                    resblks;
-       xfs_filblks_t                   ilen;
        xfs_filblks_t                   rlen;
-       int                             nimaps;
+       xfs_extnum_t                    idx;
 
        trace_xfs_reflink_end_cow(ip, offset, count);
 
+       /* No COW extents?  That's easy! */
+       if (ifp->if_bytes == 0)
+               return 0;
+
        offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
        end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count);
-       count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb);
 
        /* Start a rolling transaction to switch the mappings */
        resblks = XFS_EXTENTADD_SPACE_RES(ip->i_mount, XFS_DATA_FORK);
@@ -698,72 +621,61 @@ xfs_reflink_end_cow(
        xfs_ilock(ip, XFS_ILOCK_EXCL);
        xfs_trans_ijoin(tp, ip, 0);
 
-       /* Go find the old extent in the CoW fork. */
-       while (offset_fsb < end_fsb) {
-               /* Read extent from the source file */
-               nimaps = 1;
-               count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb);
-               error = xfs_bmapi_read(ip, offset_fsb, count_fsb, &irec,
-                               &nimaps, XFS_BMAPI_COWFORK);
-               if (error)
-                       goto out_cancel;
-               ASSERT(nimaps == 1);
+       /* If there is a hole at end_fsb - 1 go to the previous extent */
+       if (!xfs_iext_lookup_extent(ip, ifp, end_fsb - 1, &idx, &got) ||
+           got.br_startoff > end_fsb) {
+               ASSERT(idx > 0);
+               xfs_iext_get_extent(ifp, --idx, &got);
+       }
 
-               ASSERT(irec.br_startblock != DELAYSTARTBLOCK);
-               trace_xfs_reflink_cow_remap(ip, &irec);
+       /* Walk backwards until we're out of the I/O range... */
+       while (got.br_startoff + got.br_blockcount > offset_fsb) {
+               del = got;
+               xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb);
 
-               /*
-                * We can have a hole in the CoW fork if part of a directio
-                * write is CoW but part of it isn't.
-                */
-               rlen = ilen = irec.br_blockcount;
-               if (irec.br_startblock == HOLESTARTBLOCK)
+               /* Extent delete may have bumped idx forward */
+               if (!del.br_blockcount) {
+                       idx--;
                        goto next_extent;
+               }
+
+               ASSERT(!isnullstartblock(got.br_startblock));
 
                /* Unmap the old blocks in the data fork. */
-               while (rlen) {
-                       xfs_defer_init(&dfops, &firstfsb);
-                       error = __xfs_bunmapi(tp, ip, irec.br_startoff,
-                                       &rlen, 0, 1, &firstfsb, &dfops);
-                       if (error)
-                               goto out_defer;
-
-                       /*
-                        * Trim the extent to whatever got unmapped.
-                        * Remember, bunmapi works backwards.
-                        */
-                       uirec.br_startblock = irec.br_startblock + rlen;
-                       uirec.br_startoff = irec.br_startoff + rlen;
-                       uirec.br_blockcount = irec.br_blockcount - rlen;
-                       irec.br_blockcount = rlen;
-                       trace_xfs_reflink_cow_remap_piece(ip, &uirec);
+               xfs_defer_init(&dfops, &firstfsb);
+               rlen = del.br_blockcount;
+               error = __xfs_bunmapi(tp, ip, del.br_startoff, &rlen, 0, 1,
+                               &firstfsb, &dfops);
+               if (error)
+                       goto out_defer;
 
-                       /* Free the CoW orphan record. */
-                       error = xfs_refcount_free_cow_extent(tp->t_mountp,
-                                       &dfops, uirec.br_startblock,
-                                       uirec.br_blockcount);
-                       if (error)
-                               goto out_defer;
+               /* Trim the extent to whatever got unmapped. */
+               if (rlen) {
+                       xfs_trim_extent(&del, del.br_startoff + rlen,
+                               del.br_blockcount - rlen);
+               }
+               trace_xfs_reflink_cow_remap(ip, &del);
 
-                       /* Map the new blocks into the data fork. */
-                       error = xfs_bmap_map_extent(tp->t_mountp, &dfops,
-                                       ip, &uirec);
-                       if (error)
-                               goto out_defer;
+               /* Free the CoW orphan record. */
+               error = xfs_refcount_free_cow_extent(tp->t_mountp, &dfops,
+                               del.br_startblock, del.br_blockcount);
+               if (error)
+                       goto out_defer;
 
-                       /* Remove the mapping from the CoW fork. */
-                       error = xfs_bunmapi_cow(ip, &uirec);
-                       if (error)
-                               goto out_defer;
+               /* Map the new blocks into the data fork. */
+               error = xfs_bmap_map_extent(tp->t_mountp, &dfops, ip, &del);
+               if (error)
+                       goto out_defer;
 
-                       error = xfs_defer_finish(&tp, &dfops, ip);
-                       if (error)
-                               goto out_defer;
-               }
+               /* Remove the mapping from the CoW fork. */
+               xfs_bmap_del_extent_cow(ip, &idx, &got, &del);
 
+               error = xfs_defer_finish(&tp, &dfops, ip);
+               if (error)
+                       goto out_defer;
 next_extent:
-               /* Roll on... */
-               offset_fsb = irec.br_startoff + ilen;
+               if (!xfs_iext_get_extent(ifp, idx, &got))
+                       break;
        }
 
        error = xfs_trans_commit(tp);
@@ -774,7 +686,6 @@ next_extent:
 
 out_defer:
        xfs_defer_cancel(&dfops);
-out_cancel:
        xfs_trans_cancel(tp);
        xfs_iunlock(ip, XFS_ILOCK_EXCL);
 out:
@@ -1312,19 +1223,26 @@ out_error:
  */
 int
 xfs_reflink_remap_range(
-       struct xfs_inode        *src,
-       xfs_off_t               srcoff,
-       struct xfs_inode        *dest,
-       xfs_off_t               destoff,
-       xfs_off_t               len,
-       unsigned int            flags)
+       struct file             *file_in,
+       loff_t                  pos_in,
+       struct file             *file_out,
+       loff_t                  pos_out,
+       u64                     len,
+       bool                    is_dedupe)
 {
+       struct inode            *inode_in = file_inode(file_in);
+       struct xfs_inode        *src = XFS_I(inode_in);
+       struct inode            *inode_out = file_inode(file_out);
+       struct xfs_inode        *dest = XFS_I(inode_out);
        struct xfs_mount        *mp = src->i_mount;
+       loff_t                  bs = inode_out->i_sb->s_blocksize;
+       bool                    same_inode = (inode_in == inode_out);
        xfs_fileoff_t           sfsbno, dfsbno;
        xfs_filblks_t           fsblen;
-       int                     error;
        xfs_extlen_t            cowextsize;
-       bool                    is_same;
+       loff_t                  isize;
+       ssize_t                 ret;
+       loff_t                  blen;
 
        if (!xfs_sb_version_hasreflink(&mp->m_sb))
                return -EOPNOTSUPP;
@@ -1332,17 +1250,8 @@ xfs_reflink_remap_range(
        if (XFS_FORCED_SHUTDOWN(mp))
                return -EIO;
 
-       /* Don't reflink realtime inodes */
-       if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest))
-               return -EINVAL;
-
-       if (flags & ~XFS_REFLINK_ALL)
-               return -EINVAL;
-
-       trace_xfs_reflink_remap_range(src, srcoff, len, dest, destoff);
-
        /* Lock both files against IO */
-       if (src->i_ino == dest->i_ino) {
+       if (same_inode) {
                xfs_ilock(src, XFS_IOLOCK_EXCL);
                xfs_ilock(src, XFS_MMAPLOCK_EXCL);
        } else {
@@ -1350,39 +1259,132 @@ xfs_reflink_remap_range(
                xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL);
        }
 
+       /* Don't touch certain kinds of inodes */
+       ret = -EPERM;
+       if (IS_IMMUTABLE(inode_out))
+               goto out_unlock;
+
+       ret = -ETXTBSY;
+       if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out))
+               goto out_unlock;
+
+
+       /* Don't reflink dirs, pipes, sockets... */
+       ret = -EISDIR;
+       if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
+               goto out_unlock;
+       ret = -EINVAL;
+       if (S_ISFIFO(inode_in->i_mode) || S_ISFIFO(inode_out->i_mode))
+               goto out_unlock;
+       if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
+               goto out_unlock;
+
+       /* Don't reflink realtime inodes */
+       if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest))
+               goto out_unlock;
+
+       /* Don't share DAX file data for now. */
+       if (IS_DAX(inode_in) || IS_DAX(inode_out))
+               goto out_unlock;
+
+       /* Are we going all the way to the end? */
+       isize = i_size_read(inode_in);
+       if (isize == 0) {
+               ret = 0;
+               goto out_unlock;
+       }
+
+       /* Zero length dedupe exits immediately; reflink goes to EOF. */
+       if (len == 0) {
+               if (is_dedupe) {
+                       ret = 0;
+                       goto out_unlock;
+               }
+               len = isize - pos_in;
+       }
+
+       /* Ensure offsets don't wrap and the input is inside i_size */
+       if (pos_in + len < pos_in || pos_out + len < pos_out ||
+           pos_in + len > isize)
+               goto out_unlock;
+
+       /* Don't allow dedupe past EOF in the dest file */
+       if (is_dedupe) {
+               loff_t  disize;
+
+               disize = i_size_read(inode_out);
+               if (pos_out >= disize || pos_out + len > disize)
+                       goto out_unlock;
+       }
+
+       /* If we're linking to EOF, continue to the block boundary. */
+       if (pos_in + len == isize)
+               blen = ALIGN(isize, bs) - pos_in;
+       else
+               blen = len;
+
+       /* Only reflink if we're aligned to block boundaries */
+       if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) ||
+           !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs))
+               goto out_unlock;
+
+       /* Don't allow overlapped reflink within the same file */
+       if (same_inode) {
+               if (pos_out + blen > pos_in && pos_out < pos_in + blen)
+                       goto out_unlock;
+       }
+
+       /* Wait for the completion of any pending IOs on both files */
+       inode_dio_wait(inode_in);
+       if (!same_inode)
+               inode_dio_wait(inode_out);
+
+       ret = filemap_write_and_wait_range(inode_in->i_mapping,
+                       pos_in, pos_in + len - 1);
+       if (ret)
+               goto out_unlock;
+
+       ret = filemap_write_and_wait_range(inode_out->i_mapping,
+                       pos_out, pos_out + len - 1);
+       if (ret)
+               goto out_unlock;
+
+       trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
+
        /*
         * Check that the extents are the same.
         */
-       if (flags & XFS_REFLINK_DEDUPE) {
-               is_same = false;
-               error = xfs_compare_extents(VFS_I(src), srcoff, VFS_I(dest),
-                               destoff, len, &is_same);
-               if (error)
-                       goto out_error;
+       if (is_dedupe) {
+               bool            is_same = false;
+
+               ret = xfs_compare_extents(inode_in, pos_in, inode_out, pos_out,
+                               len, &is_same);
+               if (ret)
+                       goto out_unlock;
                if (!is_same) {
-                       error = -EBADE;
-                       goto out_error;
+                       ret = -EBADE;
+                       goto out_unlock;
                }
        }
 
-       error = xfs_reflink_set_inode_flag(src, dest);
-       if (error)
-               goto out_error;
+       ret = xfs_reflink_set_inode_flag(src, dest);
+       if (ret)
+               goto out_unlock;
 
        /*
         * Invalidate the page cache so that we can clear any CoW mappings
         * in the destination file.
         */
-       truncate_inode_pages_range(&VFS_I(dest)->i_data, destoff,
-                                  PAGE_ALIGN(destoff + len) - 1);
+       truncate_inode_pages_range(&inode_out->i_data, pos_out,
+                                  PAGE_ALIGN(pos_out + len) - 1);
 
-       dfsbno = XFS_B_TO_FSBT(mp, destoff);
-       sfsbno = XFS_B_TO_FSBT(mp, srcoff);
+       dfsbno = XFS_B_TO_FSBT(mp, pos_out);
+       sfsbno = XFS_B_TO_FSBT(mp, pos_in);
        fsblen = XFS_B_TO_FSB(mp, len);
-       error = xfs_reflink_remap_blocks(src, sfsbno, dest, dfsbno, fsblen,
-                       destoff + len);
-       if (error)
-               goto out_error;
+       ret = xfs_reflink_remap_blocks(src, sfsbno, dest, dfsbno, fsblen,
+                       pos_out + len);
+       if (ret)
+               goto out_unlock;
 
        /*
         * Carry the cowextsize hint from src to dest if we're sharing the
@@ -1390,26 +1392,24 @@ xfs_reflink_remap_range(
         * has a cowextsize hint, and the destination file does not.
         */
        cowextsize = 0;
-       if (srcoff == 0 && len == i_size_read(VFS_I(src)) &&
+       if (pos_in == 0 && len == i_size_read(inode_in) &&
            (src->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) &&
-           destoff == 0 && len >= i_size_read(VFS_I(dest)) &&
+           pos_out == 0 && len >= i_size_read(inode_out) &&
            !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE))
                cowextsize = src->i_d.di_cowextsize;
 
-       error = xfs_reflink_update_dest(dest, destoff + len, cowextsize);
-       if (error)
-               goto out_error;
+       ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize);
 
-out_error:
+out_unlock:
        xfs_iunlock(src, XFS_MMAPLOCK_EXCL);
        xfs_iunlock(src, XFS_IOLOCK_EXCL);
        if (src->i_ino != dest->i_ino) {
                xfs_iunlock(dest, XFS_MMAPLOCK_EXCL);
                xfs_iunlock(dest, XFS_IOLOCK_EXCL);
        }
-       if (error)
-               trace_xfs_reflink_remap_range_error(dest, error, _RET_IP_);
-       return error;
+       if (ret)
+               trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
+       return ret;
 }
 
 /*
@@ -1652,37 +1652,3 @@ out:
        trace_xfs_reflink_unshare_error(ip, error, _RET_IP_);
        return error;
 }
-
-/*
- * Does this inode have any real CoW reservations?
- */
-bool
-xfs_reflink_has_real_cow_blocks(
-       struct xfs_inode                *ip)
-{
-       struct xfs_bmbt_irec            irec;
-       struct xfs_ifork                *ifp;
-       struct xfs_bmbt_rec_host        *gotp;
-       xfs_extnum_t                    idx;
-
-       if (!xfs_is_reflink_inode(ip))
-               return false;
-
-       /* Go find the old extent in the CoW fork. */
-       ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
-       gotp = xfs_iext_bno_to_ext(ifp, 0, &idx);
-       while (gotp) {
-               xfs_bmbt_get_all(gotp, &irec);
-
-               if (!isnullstartblock(irec.br_startblock))
-                       return true;
-
-               /* Roll on... */
-               idx++;
-               if (idx >= ifp->if_bytes / sizeof(xfs_bmbt_rec_t))
-                       break;
-               gotp = xfs_iext_get_ext(ifp, idx);
-       }
-
-       return false;
-}
index 5dc3c8ac12aa5bef547904ca5dbe48275d63bc34..aa6a4d64bd35d94c2d564ff712f18ad240c34745 100644 (file)
@@ -26,13 +26,13 @@ extern int xfs_reflink_find_shared(struct xfs_mount *mp, xfs_agnumber_t agno,
 extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip,
                struct xfs_bmbt_irec *irec, bool *shared, bool *trimmed);
 
-extern int xfs_reflink_reserve_cow_range(struct xfs_inode *ip,
-               xfs_off_t offset, xfs_off_t count);
+extern int xfs_reflink_reserve_cow(struct xfs_inode *ip,
+               struct xfs_bmbt_irec *imap, bool *shared);
 extern int xfs_reflink_allocate_cow_range(struct xfs_inode *ip,
                xfs_off_t offset, xfs_off_t count);
 extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset,
-               struct xfs_bmbt_irec *imap, bool *need_alloc);
-extern int xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip,
+               struct xfs_bmbt_irec *imap);
+extern void xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip,
                xfs_fileoff_t offset_fsb, struct xfs_bmbt_irec *imap);
 
 extern int xfs_reflink_cancel_cow_blocks(struct xfs_inode *ip,
@@ -43,16 +43,11 @@ extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset,
 extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
                xfs_off_t count);
 extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
-#define XFS_REFLINK_DEDUPE     1       /* only reflink if contents match */
-#define XFS_REFLINK_ALL                (XFS_REFLINK_DEDUPE)
-extern int xfs_reflink_remap_range(struct xfs_inode *src, xfs_off_t srcoff,
-               struct xfs_inode *dest, xfs_off_t destoff, xfs_off_t len,
-               unsigned int flags);
+extern int xfs_reflink_remap_range(struct file *file_in, loff_t pos_in,
+               struct file *file_out, loff_t pos_out, u64 len, bool is_dedupe);
 extern int xfs_reflink_clear_inode_flag(struct xfs_inode *ip,
                struct xfs_trans **tpp);
 extern int xfs_reflink_unshare(struct xfs_inode *ip, xfs_off_t offset,
                xfs_off_t len);
 
-extern bool xfs_reflink_has_real_cow_blocks(struct xfs_inode *ip);
-
 #endif /* __XFS_REFLINK_H */
index 5f8d55d29a11cc4a4db2e30f55766acbeed78b33..276d3023d60f8201b635ae1f0c2ccbf26aac74fd 100644 (file)
@@ -512,13 +512,13 @@ static struct attribute *xfs_error_attrs[] = {
 };
 
 
-struct kobj_type xfs_error_cfg_ktype = {
+static struct kobj_type xfs_error_cfg_ktype = {
        .release = xfs_sysfs_release,
        .sysfs_ops = &xfs_sysfs_ops,
        .default_attrs = xfs_error_attrs,
 };
 
-struct kobj_type xfs_error_ktype = {
+static struct kobj_type xfs_error_ktype = {
        .release = xfs_sysfs_release,
        .sysfs_ops = &xfs_sysfs_ops,
 };
index ad188d3a83f3739db19ed8af577fe4259c9267e8..0907752be62d3de9e385890550a8e92f0402b398 100644 (file)
@@ -3346,7 +3346,7 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_alloc);
 DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_found);
 DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc);
 
-DEFINE_RW_EVENT(xfs_reflink_reserve_cow_range);
+DEFINE_RW_EVENT(xfs_reflink_reserve_cow);
 DEFINE_RW_EVENT(xfs_reflink_allocate_cow_range);
 
 DEFINE_INODE_IREC_EVENT(xfs_reflink_bounce_dio_write);
@@ -3356,9 +3356,7 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_irec);
 DEFINE_SIMPLE_IO_EVENT(xfs_reflink_cancel_cow_range);
 DEFINE_SIMPLE_IO_EVENT(xfs_reflink_end_cow);
 DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap);
-DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap_piece);
 
-DEFINE_INODE_ERROR_EVENT(xfs_reflink_reserve_cow_range_error);
 DEFINE_INODE_ERROR_EVENT(xfs_reflink_allocate_cow_range_error);
 DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_cow_range_error);
 DEFINE_INODE_ERROR_EVENT(xfs_reflink_end_cow_error);
index b9e7b8ec8c1dbb13e67900a3633d0cc596d4dc86..f185156de74d8270bf931cda18c46f7dd918f294 100644 (file)
@@ -19,11 +19,15 @@ struct vm_fault;
 #define IOMAP_UNWRITTEN        0x04    /* blocks allocated @blkno in unwritten state */
 
 /*
- * Flags for iomap mappings:
+ * Flags for all iomap mappings:
  */
-#define IOMAP_F_MERGED 0x01    /* contains multiple blocks/extents */
-#define IOMAP_F_SHARED 0x02    /* block shared with another file */
-#define IOMAP_F_NEW    0x04    /* blocks have been newly allocated */
+#define IOMAP_F_NEW    0x01    /* blocks have been newly allocated */
+
+/*
+ * Flags that only need to be reported for IOMAP_REPORT requests:
+ */
+#define IOMAP_F_MERGED 0x10    /* contains multiple blocks/extents */
+#define IOMAP_F_SHARED 0x20    /* block shared with another file */
 
 /*
  * Magic value for blkno:
@@ -42,8 +46,9 @@ struct iomap {
 /*
  * Flags for iomap_begin / iomap_end.  No flag implies a read.
  */
-#define IOMAP_WRITE            (1 << 0)
-#define IOMAP_ZERO             (1 << 1)
+#define IOMAP_WRITE            (1 << 0) /* writing, must allocate blocks */
+#define IOMAP_ZERO             (1 << 1) /* zeroing operation, may skip holes */
+#define IOMAP_REPORT           (1 << 2) /* report extent status, e.g. FIEMAP */
 #define IOMAP_FAULT            (1 << 3) /* mapping for page fault */
 
 struct iomap_ops {