Merge tag 'xfs-rmap-for-linus-4.8-rc1' of git://git.kernel.org/pub/scm/linux/kernel...

author Linus Torvalds <torvalds@linux-foundation.org>

Sat, 6 Aug 2016 13:50:36 +0000 (09:50 -0400)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sat, 6 Aug 2016 13:50:36 +0000 (09:50 -0400)
author Linus Torvalds <torvalds@linux-foundation.org>
Sat, 6 Aug 2016 13:50:36 +0000 (09:50 -0400)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sat, 6 Aug 2016 13:50:36 +0000 (09:50 -0400)
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile

index 52c288514be1ff729a38f7b6d9a8e7232ea60b67..fc593c8694936e91af919e5dc59c2e69750c22fb 100644 (file)
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -39,6 +39,7 @@ xfs-y                         += $(addprefix libxfs/, \
                                    xfs_btree.o \
                                    xfs_da_btree.o \
                                    xfs_da_format.o \
+                                  xfs_defer.o \
                                    xfs_dir2.o \
                                    xfs_dir2_block.o \
                                    xfs_dir2_data.o \
@@ -51,6 +52,8 @@ xfs-y                         += $(addprefix libxfs/, \
                                    xfs_inode_fork.o \
                                    xfs_inode_buf.o \
                                    xfs_log_rlimit.o \
+                                  xfs_rmap.o \
+                                  xfs_rmap_btree.o \
                                    xfs_sb.o \
                                    xfs_symlink_remote.o \
                                    xfs_trans_resv.o \
@@ -100,11 +103,13 @@ xfs-y                             += xfs_log.o \
                                    xfs_extfree_item.o \
                                    xfs_icreate_item.o \
                                    xfs_inode_item.o \
+                                  xfs_rmap_item.o \
                                    xfs_log_recover.o \
                                    xfs_trans_ail.o \
                                    xfs_trans_buf.o \
                                    xfs_trans_extfree.o \
                                    xfs_trans_inode.o \
+                                  xfs_trans_rmap.o \
  
  # optional features
  xfs-$(CONFIG_XFS_QUOTA)                += xfs_dquot.o \
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c

index 88c26b827a2dd0819464a82222d822bb3f8e9202..776ae2f325d1e4f534540206a30a138b01c1ea46 100644 (file)
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -24,8 +24,10 @@
  #include "xfs_bit.h"
  #include "xfs_sb.h"
  #include "xfs_mount.h"
+#include "xfs_defer.h"
  #include "xfs_inode.h"
  #include "xfs_btree.h"
+#include "xfs_rmap.h"
  #include "xfs_alloc_btree.h"
  #include "xfs_alloc.h"
  #include "xfs_extent_busy.h"
@@ -49,6 +51,81 @@ STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
  STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
                 xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
  
+xfs_extlen_t
+xfs_prealloc_blocks(
+       struct xfs_mount        *mp)
+{
+       if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+               return XFS_RMAP_BLOCK(mp) + 1;
+       if (xfs_sb_version_hasfinobt(&mp->m_sb))
+               return XFS_FIBT_BLOCK(mp) + 1;
+       return XFS_IBT_BLOCK(mp) + 1;
+}
+
+/*
+ * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of
+ * AGF buffer (PV 947395), we place constraints on the relationship among
+ * actual allocations for data blocks, freelist blocks, and potential file data
+ * bmap btree blocks. However, these restrictions may result in no actual space
+ * allocated for a delayed extent, for example, a data block in a certain AG is
+ * allocated but there is no additional block for the additional bmap btree
+ * block due to a split of the bmap btree of the file. The result of this may
+ * lead to an infinite loop when the file gets flushed to disk and all delayed
+ * extents need to be actually allocated. To get around this, we explicitly set
+ * aside a few blocks which will not be reserved in delayed allocation.
+ *
+ * When rmap is disabled, we need to reserve 4 fsbs _per AG_ for the freelist
+ * and 4 more to handle a potential split of the file's bmap btree.
+ *
+ * When rmap is enabled, we must also be able to handle two rmap btree inserts
+ * to record both the file data extent and a new bmbt block.  The bmbt block
+ * might not be in the same AG as the file data extent.  In the worst case
+ * the bmap btree splits multiple levels and all the new blocks come from
+ * different AGs, so set aside enough to handle rmap btree splits in all AGs.
+ */
+unsigned int
+xfs_alloc_set_aside(
+       struct xfs_mount        *mp)
+{
+       unsigned int            blocks;
+
+       blocks = 4 + (mp->m_sb.sb_agcount * XFS_ALLOC_AGFL_RESERVE);
+       if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+               blocks += mp->m_sb.sb_agcount * mp->m_rmap_maxlevels;
+       return blocks;
+}
+
+/*
+ * When deciding how much space to allocate out of an AG, we limit the
+ * allocation maximum size to the size the AG. However, we cannot use all the
+ * blocks in the AG - some are permanently used by metadata. These
+ * blocks are generally:
+ *     - the AG superblock, AGF, AGI and AGFL
+ *     - the AGF (bno and cnt) and AGI btree root blocks, and optionally
+ *       the AGI free inode and rmap btree root blocks.
+ *     - blocks on the AGFL according to xfs_alloc_set_aside() limits
+ *     - the rmapbt root block
+ *
+ * The AG headers are sector sized, so the amount of space they take up is
+ * dependent on filesystem geometry. The others are all single blocks.
+ */
+unsigned int
+xfs_alloc_ag_max_usable(
+       struct xfs_mount        *mp)
+{
+       unsigned int            blocks;
+
+       blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */
+       blocks += XFS_ALLOC_AGFL_RESERVE;
+       blocks += 3;                    /* AGF, AGI btree root blocks */
+       if (xfs_sb_version_hasfinobt(&mp->m_sb))
+               blocks++;               /* finobt root block */
+       if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+               blocks++;               /* rmap root block */
+
+       return mp->m_sb.sb_agblocks - blocks;
+}
+
  /*
   * Lookup the record equal to [bno, len] in the btree given by cur.
   */
@@ -636,6 +713,14 @@ xfs_alloc_ag_vextent(
         ASSERT(!args->wasfromfl || !args->isfl);
         ASSERT(args->agbno % args->alignment == 0);
  
+       /* if not file data, insert new block into the reverse map btree */
+       if (args->oinfo.oi_owner != XFS_RMAP_OWN_UNKNOWN) {
+               error = xfs_rmap_alloc(args->tp, args->agbp, args->agno,
+                                      args->agbno, args->len, &args->oinfo);
+               if (error)
+                       return error;
+       }
+
         if (!args->wasfromfl) {
                 error = xfs_alloc_update_counters(args->tp, args->pag,
                                                   args->agbp,
@@ -1577,14 +1662,15 @@ error0:
  /*
   * Free the extent starting at agno/bno for length.
   */
-STATIC int                     /* error */
+STATIC int
  xfs_free_ag_extent(
-       xfs_trans_t     *tp,    /* transaction pointer */
-       xfs_buf_t       *agbp,  /* buffer for a.g. freelist header */
-       xfs_agnumber_t  agno,   /* allocation group number */
-       xfs_agblock_t   bno,    /* starting block number */
-       xfs_extlen_t    len,    /* length of extent */
-       int             isfl)   /* set if is freelist blocks - no sb acctg */
+       xfs_trans_t             *tp,
+       xfs_buf_t               *agbp,
+       xfs_agnumber_t          agno,
+       xfs_agblock_t           bno,
+       xfs_extlen_t            len,
+       struct xfs_owner_info   *oinfo,
+       int                     isfl)
  {
         xfs_btree_cur_t *bno_cur;       /* cursor for by-block btree */
         xfs_btree_cur_t *cnt_cur;       /* cursor for by-size btree */
@@ -1601,12 +1687,19 @@ xfs_free_ag_extent(
         xfs_extlen_t    nlen;           /* new length of freespace */
         xfs_perag_t     *pag;           /* per allocation group data */
  
+       bno_cur = cnt_cur = NULL;
         mp = tp->t_mountp;
+
+       if (oinfo->oi_owner != XFS_RMAP_OWN_UNKNOWN) {
+               error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo);
+               if (error)
+                       goto error0;
+       }
+
         /*
          * Allocate and initialize a cursor for the by-block btree.
          */
         bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
-       cnt_cur = NULL;
         /*
          * Look for a neighboring block on the left (lower block numbers)
          * that is contiguous with this space.
@@ -1875,6 +1968,11 @@ xfs_alloc_min_freelist(
         /* space needed by-size freespace btree */
         min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1,
                                        mp->m_ag_maxlevels);
+       /* space needed reverse mapping used space btree */
+       if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+               min_free += min_t(unsigned int,
+                                 pag->pagf_levels[XFS_BTNUM_RMAPi] + 1,
+                                 mp->m_rmap_maxlevels);
  
         return min_free;
  }
@@ -1992,21 +2090,34 @@ xfs_alloc_fix_freelist(
          * anything other than extra overhead when we need to put more blocks
          * back on the free list? Maybe we should only do this when space is
          * getting low or the AGFL is more than half full?
+        *
+        * The NOSHRINK flag prevents the AGFL from being shrunk if it's too
+        * big; the NORMAP flag prevents AGFL expand/shrink operations from
+        * updating the rmapbt.  Both flags are used in xfs_repair while we're
+        * rebuilding the rmapbt, and neither are used by the kernel.  They're
+        * both required to ensure that rmaps are correctly recorded for the
+        * regenerated AGFL, bnobt, and cntbt.  See repair/phase5.c and
+        * repair/rmap.c in xfsprogs for details.
          */
-       while (pag->pagf_flcount > need) {
+       memset(&targs, 0, sizeof(targs));
+       if (flags & XFS_ALLOC_FLAG_NORMAP)
+               xfs_rmap_skip_owner_update(&targs.oinfo);
+       else
+               xfs_rmap_ag_owner(&targs.oinfo, XFS_RMAP_OWN_AG);
+       while (!(flags & XFS_ALLOC_FLAG_NOSHRINK) && pag->pagf_flcount > need) {
                 struct xfs_buf  *bp;
  
                 error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
                 if (error)
                         goto out_agbp_relse;
-               error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1);
+               error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1,
+                                          &targs.oinfo, 1);
                 if (error)
                         goto out_agbp_relse;
                 bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
                 xfs_trans_binval(tp, bp);
         }
  
-       memset(&targs, 0, sizeof(targs));
         targs.tp = tp;
         targs.mp = mp;
         targs.agbp = agbp;
@@ -2271,6 +2382,10 @@ xfs_agf_verify(
             be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS)
                 return false;
  
+       if (xfs_sb_version_hasrmapbt(&mp->m_sb) &&
+           be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS)
+               return false;
+
         /*
          * during growfs operations, the perag is not fully initialised,
          * so we can't use it for any useful checking. growfs ensures we can't
@@ -2402,6 +2517,8 @@ xfs_alloc_read_agf(
                         be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
                 pag->pagf_levels[XFS_BTNUM_CNTi] =
                         be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
+               pag->pagf_levels[XFS_BTNUM_RMAPi] =
+                       be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
                 spin_lock_init(&pag->pagb_lock);
                 pag->pagb_count = 0;
                 pag->pagb_tree = RB_ROOT;
@@ -2691,7 +2808,8 @@ int                               /* error */
  xfs_free_extent(
         struct xfs_trans        *tp,    /* transaction pointer */
         xfs_fsblock_t           bno,    /* starting block number of extent */
-       xfs_extlen_t            len)    /* length of extent */
+       xfs_extlen_t            len,    /* length of extent */
+       struct xfs_owner_info   *oinfo) /* extent owner */
  {
         struct xfs_mount        *mp = tp->t_mountp;
         struct xfs_buf          *agbp;
@@ -2701,6 +2819,11 @@ xfs_free_extent(
  
         ASSERT(len != 0);
  
+       if (XFS_TEST_ERROR(false, mp,
+                       XFS_ERRTAG_FREE_EXTENT,
+                       XFS_RANDOM_FREE_EXTENT))
+               return -EIO;
+
         error = xfs_free_extent_fix_freelist(tp, agno, &agbp);
         if (error)
                 return error;
@@ -2712,7 +2835,7 @@ xfs_free_extent(
                 agbno + len <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_length),
                                 err);
  
-       error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, 0);
+       error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, 0);
         if (error)
                 goto err;
  
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h

index cf268b2d0b6c2035aaad4b781d43e2cebeedcd54..6fe2d6b7cfe93e6ed87f999438877e28ebad3ef6 100644 (file)
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -54,41 +54,8 @@ typedef unsigned int xfs_alloctype_t;
   */
  #define        XFS_ALLOC_FLAG_TRYLOCK  0x00000001  /* use trylock for buffer locking */
  #define        XFS_ALLOC_FLAG_FREEING  0x00000002  /* indicate caller is freeing extents*/
-
-/*
- * In order to avoid ENOSPC-related deadlock caused by
- * out-of-order locking of AGF buffer (PV 947395), we place
- * constraints on the relationship among actual allocations for
- * data blocks, freelist blocks, and potential file data bmap
- * btree blocks. However, these restrictions may result in no
- * actual space allocated for a delayed extent, for example, a data
- * block in a certain AG is allocated but there is no additional
- * block for the additional bmap btree block due to a split of the
- * bmap btree of the file. The result of this may lead to an
- * infinite loop in xfssyncd when the file gets flushed to disk and
- * all delayed extents need to be actually allocated. To get around
- * this, we explicitly set aside a few blocks which will not be
- * reserved in delayed allocation. Considering the minimum number of
- * needed freelist blocks is 4 fsbs _per AG_, a potential split of file's bmap
- * btree requires 1 fsb, so we set the number of set-aside blocks
- * to 4 + 4*agcount.
- */
-#define XFS_ALLOC_SET_ASIDE(mp)  (4 + ((mp)->m_sb.sb_agcount * 4))
-
-/*
- * When deciding how much space to allocate out of an AG, we limit the
- * allocation maximum size to the size the AG. However, we cannot use all the
- * blocks in the AG - some are permanently used by metadata. These
- * blocks are generally:
- *     - the AG superblock, AGF, AGI and AGFL
- *     - the AGF (bno and cnt) and AGI btree root blocks
- *     - 4 blocks on the AGFL according to XFS_ALLOC_SET_ASIDE() limits
- *
- * The AG headers are sector sized, so the amount of space they take up is
- * dependent on filesystem geometry. The others are all single blocks.
- */
-#define XFS_ALLOC_AG_MAX_USABLE(mp)    \
-       ((mp)->m_sb.sb_agblocks - XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)) - 7)
+#define        XFS_ALLOC_FLAG_NORMAP   0x00000004  /* don't modify the rmapbt */
+#define        XFS_ALLOC_FLAG_NOSHRINK 0x00000008  /* don't shrink the freelist */
  
  
  /*
@@ -123,6 +90,7 @@ typedef struct xfs_alloc_arg {
         char            isfl;           /* set if is freelist blocks - !acctg */
         char            userdata;       /* mask defining userdata treatment */
         xfs_fsblock_t   firstblock;     /* io first block allocated */
+       struct xfs_owner_info   oinfo;  /* owner of blocks being allocated */
  } xfs_alloc_arg_t;
  
  /*
@@ -132,6 +100,11 @@ typedef struct xfs_alloc_arg {
  #define XFS_ALLOC_INITIAL_USER_DATA    (1 << 1)/* special case start of file */
  #define XFS_ALLOC_USERDATA_ZERO                (1 << 2)/* zero extent on allocation */
  
+/* freespace limit calculations */
+#define XFS_ALLOC_AGFL_RESERVE 4
+unsigned int xfs_alloc_set_aside(struct xfs_mount *mp);
+unsigned int xfs_alloc_ag_max_usable(struct xfs_mount *mp);
+
  xfs_extlen_t xfs_alloc_longest_free_extent(struct xfs_mount *mp,
                 struct xfs_perag *pag, xfs_extlen_t need);
  unsigned int xfs_alloc_min_freelist(struct xfs_mount *mp,
@@ -208,9 +181,10 @@ xfs_alloc_vextent(
   */
  int                            /* error */
  xfs_free_extent(
-       struct xfs_trans *tp,   /* transaction pointer */
-       xfs_fsblock_t   bno,    /* starting block number of extent */
-       xfs_extlen_t    len);   /* length of extent */
+       struct xfs_trans        *tp,    /* transaction pointer */
+       xfs_fsblock_t           bno,    /* starting block number of extent */
+       xfs_extlen_t            len,    /* length of extent */
+       struct xfs_owner_info   *oinfo);/* extent owner */
  
  int                            /* error */
  xfs_alloc_lookup_ge(
@@ -232,4 +206,6 @@ int xfs_alloc_fix_freelist(struct xfs_alloc_arg *args, int flags);
  int xfs_free_extent_fix_freelist(struct xfs_trans *tp, xfs_agnumber_t agno,
                 struct xfs_buf **agbp);
  
+xfs_extlen_t xfs_prealloc_blocks(struct xfs_mount *mp);
+
  #endif /* __XFS_ALLOC_H__ */
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c

index d9b42425291e37c6a4845c21dd0e1f61d8a76e86..5ba2dac5e67c492a1a9fe5047995899290e25220 100644 (file)
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -211,17 +211,6 @@ xfs_allocbt_init_key_from_rec(
         key->alloc.ar_blockcount = rec->alloc.ar_blockcount;
  }
  
-STATIC void
-xfs_allocbt_init_rec_from_key(
-       union xfs_btree_key     *key,
-       union xfs_btree_rec     *rec)
-{
-       ASSERT(key->alloc.ar_startblock != 0);
-
-       rec->alloc.ar_startblock = key->alloc.ar_startblock;
-       rec->alloc.ar_blockcount = key->alloc.ar_blockcount;
-}
-
  STATIC void
  xfs_allocbt_init_rec_from_cur(
         struct xfs_btree_cur    *cur,
@@ -406,7 +395,6 @@ static const struct xfs_btree_ops xfs_allocbt_ops = {
         .get_minrecs            = xfs_allocbt_get_minrecs,
         .get_maxrecs            = xfs_allocbt_get_maxrecs,
         .init_key_from_rec      = xfs_allocbt_init_key_from_rec,
-       .init_rec_from_key      = xfs_allocbt_init_rec_from_key,
         .init_rec_from_cur      = xfs_allocbt_init_rec_from_cur,
         .init_ptr_from_cur      = xfs_allocbt_init_ptr_from_cur,
         .key_diff               = xfs_allocbt_key_diff,
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c

index 4e126f41a0aa97d1f73773ea8efb89ef7a52746c..af1ecb19121e9e8569c0ee907652405575d882c8 100644 (file)
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -23,6 +23,7 @@
  #include "xfs_trans_resv.h"
  #include "xfs_bit.h"
  #include "xfs_mount.h"
+#include "xfs_defer.h"
  #include "xfs_da_format.h"
  #include "xfs_da_btree.h"
  #include "xfs_attr_sf.h"
@@ -203,7 +204,7 @@ xfs_attr_set(
  {
         struct xfs_mount        *mp = dp->i_mount;
         struct xfs_da_args      args;
-       struct xfs_bmap_free    flist;
+       struct xfs_defer_ops    dfops;
         struct xfs_trans_res    tres;
         xfs_fsblock_t           firstblock;
         int                     rsvd = (flags & ATTR_ROOT) != 0;
@@ -221,7 +222,7 @@ xfs_attr_set(
         args.value = value;
         args.valuelen = valuelen;
         args.firstblock = &firstblock;
-       args.flist = &flist;
+       args.dfops = &dfops;
         args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
         args.total = xfs_attr_calc_size(&args, &local);
  
@@ -316,13 +317,13 @@ xfs_attr_set(
                  * It won't fit in the shortform, transform to a leaf block.
                  * GROT: another possible req'mt for a double-split btree op.
                  */
-               xfs_bmap_init(args.flist, args.firstblock);
+               xfs_defer_init(args.dfops, args.firstblock);
                 error = xfs_attr_shortform_to_leaf(&args);
                 if (!error)
-                       error = xfs_bmap_finish(&args.trans, args.flist, dp);
+                       error = xfs_defer_finish(&args.trans, args.dfops, dp);
                 if (error) {
                         args.trans = NULL;
-                       xfs_bmap_cancel(&flist);
+                       xfs_defer_cancel(&dfops);
                         goto out;
                 }
  
@@ -382,7 +383,7 @@ xfs_attr_remove(
  {
         struct xfs_mount        *mp = dp->i_mount;
         struct xfs_da_args      args;
-       struct xfs_bmap_free    flist;
+       struct xfs_defer_ops    dfops;
         xfs_fsblock_t           firstblock;
         int                     error;
  
@@ -399,7 +400,7 @@ xfs_attr_remove(
                 return error;
  
         args.firstblock = &firstblock;
-       args.flist = &flist;
+       args.dfops = &dfops;
  
         /*
          * we have no control over the attribute names that userspace passes us
@@ -584,13 +585,13 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
                  * Commit that transaction so that the node_addname() call
                  * can manage its own transactions.
                  */
-               xfs_bmap_init(args->flist, args->firstblock);
+               xfs_defer_init(args->dfops, args->firstblock);
                 error = xfs_attr3_leaf_to_node(args);
                 if (!error)
-                       error = xfs_bmap_finish(&args->trans, args->flist, dp);
+                       error = xfs_defer_finish(&args->trans, args->dfops, dp);
                 if (error) {
                         args->trans = NULL;
-                       xfs_bmap_cancel(args->flist);
+                       xfs_defer_cancel(args->dfops);
                         return error;
                 }
  
@@ -674,15 +675,15 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
                  * If the result is small enough, shrink it all into the inode.
                  */
                 if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
-                       xfs_bmap_init(args->flist, args->firstblock);
+                       xfs_defer_init(args->dfops, args->firstblock);
                         error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
                         /* bp is gone due to xfs_da_shrink_inode */
                         if (!error)
-                               error = xfs_bmap_finish(&args->trans,
-                                                       args->flist, dp);
+                               error = xfs_defer_finish(&args->trans,
+                                                       args->dfops, dp);
                         if (error) {
                                 args->trans = NULL;
-                               xfs_bmap_cancel(args->flist);
+                               xfs_defer_cancel(args->dfops);
                                 return error;
                         }
                 }
@@ -737,14 +738,14 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
          * If the result is small enough, shrink it all into the inode.
          */
         if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
-               xfs_bmap_init(args->flist, args->firstblock);
+               xfs_defer_init(args->dfops, args->firstblock);
                 error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
                 /* bp is gone due to xfs_da_shrink_inode */
                 if (!error)
-                       error = xfs_bmap_finish(&args->trans, args->flist, dp);
+                       error = xfs_defer_finish(&args->trans, args->dfops, dp);
                 if (error) {
                         args->trans = NULL;
-                       xfs_bmap_cancel(args->flist);
+                       xfs_defer_cancel(args->dfops);
                         return error;
                 }
         }
@@ -863,14 +864,14 @@ restart:
                          */
                         xfs_da_state_free(state);
                         state = NULL;
-                       xfs_bmap_init(args->flist, args->firstblock);
+                       xfs_defer_init(args->dfops, args->firstblock);
                         error = xfs_attr3_leaf_to_node(args);
                         if (!error)
-                               error = xfs_bmap_finish(&args->trans,
-                                                       args->flist, dp);
+                               error = xfs_defer_finish(&args->trans,
+                                                       args->dfops, dp);
                         if (error) {
                                 args->trans = NULL;
-                               xfs_bmap_cancel(args->flist);
+                               xfs_defer_cancel(args->dfops);
                                 goto out;
                         }
  
@@ -891,13 +892,13 @@ restart:
                  * in the index/blkno/rmtblkno/rmtblkcnt fields and
                  * in the index2/blkno2/rmtblkno2/rmtblkcnt2 fields.
                  */
-               xfs_bmap_init(args->flist, args->firstblock);
+               xfs_defer_init(args->dfops, args->firstblock);
                 error = xfs_da3_split(state);
                 if (!error)
-                       error = xfs_bmap_finish(&args->trans, args->flist, dp);
+                       error = xfs_defer_finish(&args->trans, args->dfops, dp);
                 if (error) {
                         args->trans = NULL;
-                       xfs_bmap_cancel(args->flist);
+                       xfs_defer_cancel(args->dfops);
                         goto out;
                 }
         } else {
@@ -990,14 +991,14 @@ restart:
                  * Check to see if the tree needs to be collapsed.
                  */
                 if (retval && (state->path.active > 1)) {
-                       xfs_bmap_init(args->flist, args->firstblock);
+                       xfs_defer_init(args->dfops, args->firstblock);
                         error = xfs_da3_join(state);
                         if (!error)
-                               error = xfs_bmap_finish(&args->trans,
-                                                       args->flist, dp);
+                               error = xfs_defer_finish(&args->trans,
+                                                       args->dfops, dp);
                         if (error) {
                                 args->trans = NULL;
-                               xfs_bmap_cancel(args->flist);
+                               xfs_defer_cancel(args->dfops);
                                 goto out;
                         }
                 }
@@ -1113,13 +1114,13 @@ xfs_attr_node_removename(xfs_da_args_t *args)
          * Check to see if the tree needs to be collapsed.
          */
         if (retval && (state->path.active > 1)) {
-               xfs_bmap_init(args->flist, args->firstblock);
+               xfs_defer_init(args->dfops, args->firstblock);
                 error = xfs_da3_join(state);
                 if (!error)
-                       error = xfs_bmap_finish(&args->trans, args->flist, dp);
+                       error = xfs_defer_finish(&args->trans, args->dfops, dp);
                 if (error) {
                         args->trans = NULL;
-                       xfs_bmap_cancel(args->flist);
+                       xfs_defer_cancel(args->dfops);
                         goto out;
                 }
                 /*
@@ -1146,15 +1147,15 @@ xfs_attr_node_removename(xfs_da_args_t *args)
                         goto out;
  
                 if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
-                       xfs_bmap_init(args->flist, args->firstblock);
+                       xfs_defer_init(args->dfops, args->firstblock);
                         error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
                         /* bp is gone due to xfs_da_shrink_inode */
                         if (!error)
-                               error = xfs_bmap_finish(&args->trans,
-                                                       args->flist, dp);
+                               error = xfs_defer_finish(&args->trans,
+                                                       args->dfops, dp);
                         if (error) {
                                 args->trans = NULL;
-                               xfs_bmap_cancel(args->flist);
+                               xfs_defer_cancel(args->dfops);
                                 goto out;
                         }
                 } else
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c

index 01a5ecfedfcf162cc155d1214cd2746eec5a46b7..8ea91f3630938a63523602e5d14c2553a472c015 100644 (file)
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -792,7 +792,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
         nargs.dp = dp;
         nargs.geo = args->geo;
         nargs.firstblock = args->firstblock;
-       nargs.flist = args->flist;
+       nargs.dfops = args->dfops;
         nargs.total = args->total;
         nargs.whichfork = XFS_ATTR_FORK;
         nargs.trans = args->trans;
@@ -922,7 +922,7 @@ xfs_attr3_leaf_to_shortform(
         nargs.geo = args->geo;
         nargs.dp = dp;
         nargs.firstblock = args->firstblock;
-       nargs.flist = args->flist;
+       nargs.dfops = args->dfops;
         nargs.total = args->total;
         nargs.whichfork = XFS_ATTR_FORK;
         nargs.trans = args->trans;
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c

index a572532a55cdc3bcd6f971ef3b84efd2f950183a..d52f525f5b2dffe74be16ccf1bae2494a02787d7 100644 (file)
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -24,6 +24,7 @@
  #include "xfs_trans_resv.h"
  #include "xfs_bit.h"
  #include "xfs_mount.h"
+#include "xfs_defer.h"
  #include "xfs_da_format.h"
  #include "xfs_da_btree.h"
  #include "xfs_inode.h"
@@ -460,16 +461,16 @@ xfs_attr_rmtval_set(
                  * extent and then crash then the block may not contain the
                  * correct metadata after log recovery occurs.
                  */
-               xfs_bmap_init(args->flist, args->firstblock);
+               xfs_defer_init(args->dfops, args->firstblock);
                 nmap = 1;
                 error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno,
                                   blkcnt, XFS_BMAPI_ATTRFORK, args->firstblock,
-                                 args->total, &map, &nmap, args->flist);
+                                 args->total, &map, &nmap, args->dfops);
                 if (!error)
-                       error = xfs_bmap_finish(&args->trans, args->flist, dp);
+                       error = xfs_defer_finish(&args->trans, args->dfops, dp);
                 if (error) {
                         args->trans = NULL;
-                       xfs_bmap_cancel(args->flist);
+                       xfs_defer_cancel(args->dfops);
                         return error;
                 }
  
@@ -503,7 +504,7 @@ xfs_attr_rmtval_set(
  
                 ASSERT(blkcnt > 0);
  
-               xfs_bmap_init(args->flist, args->firstblock);
+               xfs_defer_init(args->dfops, args->firstblock);
                 nmap = 1;
                 error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno,
                                        blkcnt, &map, &nmap,
@@ -603,16 +604,16 @@ xfs_attr_rmtval_remove(
         blkcnt = args->rmtblkcnt;
         done = 0;
         while (!done) {
-               xfs_bmap_init(args->flist, args->firstblock);
+               xfs_defer_init(args->dfops, args->firstblock);
                 error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
                                     XFS_BMAPI_ATTRFORK, 1, args->firstblock,
-                                   args->flist, &done);
+                                   args->dfops, &done);
                 if (!error)
-                       error = xfs_bmap_finish(&args->trans, args->flist,
+                       error = xfs_defer_finish(&args->trans, args->dfops,
                                                 args->dp);
                 if (error) {
                         args->trans = NULL;
-                       xfs_bmap_cancel(args->flist);
+                       xfs_defer_cancel(args->dfops);
                         return error;
                 }
  
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c

index 2f2c85cc81173dec19952d2a5ae4efe2f9377983..b060bca93402710fcea1fc2da4544e4d856329bf 100644 (file)
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -24,6 +24,7 @@
  #include "xfs_bit.h"
  #include "xfs_sb.h"
  #include "xfs_mount.h"
+#include "xfs_defer.h"
  #include "xfs_da_format.h"
  #include "xfs_da_btree.h"
  #include "xfs_dir2.h"
@@ -45,6 +46,7 @@
  #include "xfs_symlink.h"
  #include "xfs_attr_leaf.h"
  #include "xfs_filestream.h"
+#include "xfs_rmap.h"
  
  
  kmem_zone_t            *xfs_bmap_free_item_zone;
@@ -570,12 +572,13 @@ xfs_bmap_validate_ret(
   */
  void
  xfs_bmap_add_free(
-       struct xfs_mount        *mp,            /* mount point structure */
-       struct xfs_bmap_free    *flist,         /* list of extents */
-       xfs_fsblock_t           bno,            /* fs block number of extent */
-       xfs_filblks_t           len)            /* length of extent */
+       struct xfs_mount                *mp,
+       struct xfs_defer_ops            *dfops,
+       xfs_fsblock_t                   bno,
+       xfs_filblks_t                   len,
+       struct xfs_owner_info           *oinfo)
  {
-       struct xfs_bmap_free_item       *new;           /* new element */
+       struct xfs_extent_free_item     *new;           /* new element */
  #ifdef DEBUG
         xfs_agnumber_t          agno;
         xfs_agblock_t           agbno;
@@ -592,44 +595,17 @@ xfs_bmap_add_free(
         ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
  #endif
         ASSERT(xfs_bmap_free_item_zone != NULL);
-       new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP);
-       new->xbfi_startblock = bno;
-       new->xbfi_blockcount = (xfs_extlen_t)len;
-       list_add(&new->xbfi_list, &flist->xbf_flist);
-       flist->xbf_count++;
-}
-
-/*
- * Remove the entry "free" from the free item list.  Prev points to the
- * previous entry, unless "free" is the head of the list.
- */
-void
-xfs_bmap_del_free(
-       struct xfs_bmap_free            *flist, /* free item list header */
-       struct xfs_bmap_free_item       *free)  /* list item to be freed */
-{
-       list_del(&free->xbfi_list);
-       flist->xbf_count--;
-       kmem_zone_free(xfs_bmap_free_item_zone, free);
-}
-
-/*
- * Free up any items left in the list.
- */
-void
-xfs_bmap_cancel(
-       struct xfs_bmap_free            *flist) /* list of bmap_free_items */
-{
-       struct xfs_bmap_free_item       *free;  /* free list item */
  
-       if (flist->xbf_count == 0)
-               return;
-       while (!list_empty(&flist->xbf_flist)) {
-               free = list_first_entry(&flist->xbf_flist,
-                               struct xfs_bmap_free_item, xbfi_list);
-               xfs_bmap_del_free(flist, free);
-       }
-       ASSERT(flist->xbf_count == 0);
+       new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP);
+       new->xefi_startblock = bno;
+       new->xefi_blockcount = (xfs_extlen_t)len;
+       if (oinfo)
+               new->xefi_oinfo = *oinfo;
+       else
+               xfs_rmap_skip_owner_update(&new->xefi_oinfo);
+       trace_xfs_bmap_free_defer(mp, XFS_FSB_TO_AGNO(mp, bno), 0,
+                       XFS_FSB_TO_AGBNO(mp, bno), len);
+       xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_FREE, &new->xefi_list);
  }
  
  /*
@@ -659,6 +635,7 @@ xfs_bmap_btree_to_extents(
         xfs_mount_t             *mp;    /* mount point structure */
         __be64                  *pp;    /* ptr to block address */
         struct xfs_btree_block  *rblock;/* root btree block */
+       struct xfs_owner_info   oinfo;
  
         mp = ip->i_mount;
         ifp = XFS_IFORK_PTR(ip, whichfork);
@@ -682,7 +659,8 @@ xfs_bmap_btree_to_extents(
         cblock = XFS_BUF_TO_BLOCK(cbp);
         if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
                 return error;
-       xfs_bmap_add_free(mp, cur->bc_private.b.flist, cbno, 1);
+       xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
+       xfs_bmap_add_free(mp, cur->bc_private.b.dfops, cbno, 1, &oinfo);
         ip->i_d.di_nblocks--;
         xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
         xfs_trans_binval(tp, cbp);
@@ -705,7 +683,7 @@ xfs_bmap_extents_to_btree(
         xfs_trans_t             *tp,            /* transaction pointer */
         xfs_inode_t             *ip,            /* incore inode pointer */
         xfs_fsblock_t           *firstblock,    /* first-block-allocated */
-       xfs_bmap_free_t         *flist,         /* blocks freed in xaction */
+       struct xfs_defer_ops    *dfops,         /* blocks freed in xaction */
         xfs_btree_cur_t         **curp,         /* cursor returned to caller */
         int                     wasdel,         /* converting a delayed alloc */
         int                     *logflagsp,     /* inode logging flags */
@@ -754,7 +732,7 @@ xfs_bmap_extents_to_btree(
          */
         cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
         cur->bc_private.b.firstblock = *firstblock;
-       cur->bc_private.b.flist = flist;
+       cur->bc_private.b.dfops = dfops;
         cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0;
         /*
          * Convert to a btree with two levels, one record in root.
@@ -763,11 +741,12 @@ xfs_bmap_extents_to_btree(
         memset(&args, 0, sizeof(args));
         args.tp = tp;
         args.mp = mp;
+       xfs_rmap_ino_bmbt_owner(&args.oinfo, ip->i_ino, whichfork);
         args.firstblock = *firstblock;
         if (*firstblock == NULLFSBLOCK) {
                 args.type = XFS_ALLOCTYPE_START_BNO;
                 args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
-       } else if (flist->xbf_low) {
+       } else if (dfops->dop_low) {
                 args.type = XFS_ALLOCTYPE_START_BNO;
                 args.fsbno = *firstblock;
         } else {
@@ -788,7 +767,7 @@ xfs_bmap_extents_to_btree(
         ASSERT(args.fsbno != NULLFSBLOCK);
         ASSERT(*firstblock == NULLFSBLOCK ||
                args.agno == XFS_FSB_TO_AGNO(mp, *firstblock) ||
-              (flist->xbf_low &&
+              (dfops->dop_low &&
                 args.agno > XFS_FSB_TO_AGNO(mp, *firstblock)));
         *firstblock = cur->bc_private.b.firstblock = args.fsbno;
         cur->bc_private.b.allocated++;
@@ -909,6 +888,7 @@ xfs_bmap_local_to_extents(
         memset(&args, 0, sizeof(args));
         args.tp = tp;
         args.mp = ip->i_mount;
+       xfs_rmap_ino_owner(&args.oinfo, ip->i_ino, whichfork, 0);
         args.firstblock = *firstblock;
         /*
          * Allocate a block.  We know we need only one, since the
@@ -973,7 +953,7 @@ xfs_bmap_add_attrfork_btree(
         xfs_trans_t             *tp,            /* transaction pointer */
         xfs_inode_t             *ip,            /* incore inode pointer */
         xfs_fsblock_t           *firstblock,    /* first block allocated */
-       xfs_bmap_free_t         *flist,         /* blocks to free at commit */
+       struct xfs_defer_ops    *dfops,         /* blocks to free at commit */
         int                     *flags)         /* inode logging flags */
  {
         xfs_btree_cur_t         *cur;           /* btree cursor */
@@ -986,7 +966,7 @@ xfs_bmap_add_attrfork_btree(
                 *flags |= XFS_ILOG_DBROOT;
         else {
                 cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
-               cur->bc_private.b.flist = flist;
+               cur->bc_private.b.dfops = dfops;
                 cur->bc_private.b.firstblock = *firstblock;
                 if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat)))
                         goto error0;
@@ -1016,7 +996,7 @@ xfs_bmap_add_attrfork_extents(
         xfs_trans_t             *tp,            /* transaction pointer */
         xfs_inode_t             *ip,            /* incore inode pointer */
         xfs_fsblock_t           *firstblock,    /* first block allocated */
-       xfs_bmap_free_t         *flist,         /* blocks to free at commit */
+       struct xfs_defer_ops    *dfops,         /* blocks to free at commit */
         int                     *flags)         /* inode logging flags */
  {
         xfs_btree_cur_t         *cur;           /* bmap btree cursor */
@@ -1025,7 +1005,7 @@ xfs_bmap_add_attrfork_extents(
         if (ip->i_d.di_nextents * sizeof(xfs_bmbt_rec_t) <= XFS_IFORK_DSIZE(ip))
                 return 0;
         cur = NULL;
-       error = xfs_bmap_extents_to_btree(tp, ip, firstblock, flist, &cur, 0,
+       error = xfs_bmap_extents_to_btree(tp, ip, firstblock, dfops, &cur, 0,
                 flags, XFS_DATA_FORK);
         if (cur) {
                 cur->bc_private.b.allocated = 0;
@@ -1051,7 +1031,7 @@ xfs_bmap_add_attrfork_local(
         xfs_trans_t             *tp,            /* transaction pointer */
         xfs_inode_t             *ip,            /* incore inode pointer */
         xfs_fsblock_t           *firstblock,    /* first block allocated */
-       xfs_bmap_free_t         *flist,         /* blocks to free at commit */
+       struct xfs_defer_ops    *dfops,         /* blocks to free at commit */
         int                     *flags)         /* inode logging flags */
  {
         xfs_da_args_t           dargs;          /* args for dir/attr code */
@@ -1064,7 +1044,7 @@ xfs_bmap_add_attrfork_local(
                 dargs.geo = ip->i_mount->m_dir_geo;
                 dargs.dp = ip;
                 dargs.firstblock = firstblock;
-               dargs.flist = flist;
+               dargs.dfops = dfops;
                 dargs.total = dargs.geo->fsbcount;
                 dargs.whichfork = XFS_DATA_FORK;
                 dargs.trans = tp;
@@ -1092,7 +1072,7 @@ xfs_bmap_add_attrfork(
         int                     rsvd)           /* xact may use reserved blks */
  {
         xfs_fsblock_t           firstblock;     /* 1st block/ag allocated */
-       xfs_bmap_free_t         flist;          /* freed extent records */
+       struct xfs_defer_ops    dfops;          /* freed extent records */
         xfs_mount_t             *mp;            /* mount structure */
         xfs_trans_t             *tp;            /* transaction pointer */
         int                     blks;           /* space reservation */
@@ -1158,18 +1138,18 @@ xfs_bmap_add_attrfork(
         ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP);
         ip->i_afp->if_flags = XFS_IFEXTENTS;
         logflags = 0;
-       xfs_bmap_init(&flist, &firstblock);
+       xfs_defer_init(&dfops, &firstblock);
         switch (ip->i_d.di_format) {
         case XFS_DINODE_FMT_LOCAL:
-               error = xfs_bmap_add_attrfork_local(tp, ip, &firstblock, &flist,
+               error = xfs_bmap_add_attrfork_local(tp, ip, &firstblock, &dfops,
                         &logflags);
                 break;
         case XFS_DINODE_FMT_EXTENTS:
                 error = xfs_bmap_add_attrfork_extents(tp, ip, &firstblock,
-                       &flist, &logflags);
+                       &dfops, &logflags);
                 break;
         case XFS_DINODE_FMT_BTREE:
-               error = xfs_bmap_add_attrfork_btree(tp, ip, &firstblock, &flist,
+               error = xfs_bmap_add_attrfork_btree(tp, ip, &firstblock, &dfops,
                         &logflags);
                 break;
         default:
@@ -1198,7 +1178,7 @@ xfs_bmap_add_attrfork(
                         xfs_log_sb(tp);
         }
  
-       error = xfs_bmap_finish(&tp, &flist, NULL);
+       error = xfs_defer_finish(&tp, &dfops, NULL);
         if (error)
                 goto bmap_cancel;
         error = xfs_trans_commit(tp);
@@ -1206,7 +1186,7 @@ xfs_bmap_add_attrfork(
         return error;
  
  bmap_cancel:
-       xfs_bmap_cancel(&flist);
+       xfs_defer_cancel(&dfops);
  trans_cancel:
         xfs_trans_cancel(tp);
         xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -2003,7 +1983,7 @@ xfs_bmap_add_extent_delay_real(
  
                 if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
                         error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
-                                       bma->firstblock, bma->flist,
+                                       bma->firstblock, bma->dfops,
                                         &bma->cur, 1, &tmp_rval, whichfork);
                         rval |= tmp_rval;
                         if (error)
@@ -2087,7 +2067,7 @@ xfs_bmap_add_extent_delay_real(
  
                 if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
                         error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
-                               bma->firstblock, bma->flist, &bma->cur, 1,
+                               bma->firstblock, bma->dfops, &bma->cur, 1,
                                 &tmp_rval, whichfork);
                         rval |= tmp_rval;
                         if (error)
@@ -2156,7 +2136,7 @@ xfs_bmap_add_extent_delay_real(
  
                 if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
                         error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
-                                       bma->firstblock, bma->flist, &bma->cur,
+                                       bma->firstblock, bma->dfops, &bma->cur,
                                         1, &tmp_rval, whichfork);
                         rval |= tmp_rval;
                         if (error)
@@ -2199,13 +2179,18 @@ xfs_bmap_add_extent_delay_real(
                 ASSERT(0);
         }
  
+       /* add reverse mapping */
+       error = xfs_rmap_map_extent(mp, bma->dfops, bma->ip, whichfork, new);
+       if (error)
+               goto done;
+
         /* convert to a btree if necessary */
         if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
                 int     tmp_logflags;   /* partial log flag return val */
  
                 ASSERT(bma->cur == NULL);
                 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
-                               bma->firstblock, bma->flist, &bma->cur,
+                               bma->firstblock, bma->dfops, &bma->cur,
                                 da_old > 0, &tmp_logflags, whichfork);
                 bma->logflags |= tmp_logflags;
                 if (error)
@@ -2247,7 +2232,7 @@ xfs_bmap_add_extent_unwritten_real(
         xfs_btree_cur_t         **curp, /* if *curp is null, not a btree */
         xfs_bmbt_irec_t         *new,   /* new data to add to file extents */
         xfs_fsblock_t           *first, /* pointer to firstblock variable */
-       xfs_bmap_free_t         *flist, /* list of extents to be freed */
+       struct xfs_defer_ops    *dfops, /* list of extents to be freed */
         int                     *logflagsp) /* inode logging flags */
  {
         xfs_btree_cur_t         *cur;   /* btree cursor */
@@ -2735,12 +2720,17 @@ xfs_bmap_add_extent_unwritten_real(
                 ASSERT(0);
         }
  
+       /* update reverse mappings */
+       error = xfs_rmap_convert_extent(mp, dfops, ip, XFS_DATA_FORK, new);
+       if (error)
+               goto done;
+
         /* convert to a btree if necessary */
         if (xfs_bmap_needs_btree(ip, XFS_DATA_FORK)) {
                 int     tmp_logflags;   /* partial log flag return val */
  
                 ASSERT(cur == NULL);
-               error = xfs_bmap_extents_to_btree(tp, ip, first, flist, &cur,
+               error = xfs_bmap_extents_to_btree(tp, ip, first, dfops, &cur,
                                 0, &tmp_logflags, XFS_DATA_FORK);
                 *logflagsp |= tmp_logflags;
                 if (error)
@@ -3127,13 +3117,18 @@ xfs_bmap_add_extent_hole_real(
                 break;
         }
  
+       /* add reverse mapping */
+       error = xfs_rmap_map_extent(mp, bma->dfops, bma->ip, whichfork, new);
+       if (error)
+               goto done;
+
         /* convert to a btree if necessary */
         if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
                 int     tmp_logflags;   /* partial log flag return val */
  
                 ASSERT(bma->cur == NULL);
                 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
-                               bma->firstblock, bma->flist, &bma->cur,
+                               bma->firstblock, bma->dfops, &bma->cur,
                                 0, &tmp_logflags, whichfork);
                 bma->logflags |= tmp_logflags;
                 if (error)
@@ -3691,9 +3686,10 @@ xfs_bmap_btalloc(
         args.tp = ap->tp;
         args.mp = mp;
         args.fsbno = ap->blkno;
+       xfs_rmap_skip_owner_update(&args.oinfo);
  
         /* Trim the allocation back to the maximum an AG can fit. */
-       args.maxlen = MIN(ap->length, XFS_ALLOC_AG_MAX_USABLE(mp));
+       args.maxlen = MIN(ap->length, mp->m_ag_max_usable);
         args.firstblock = *ap->firstblock;
         blen = 0;
         if (nullfb) {
@@ -3708,7 +3704,7 @@ xfs_bmap_btalloc(
                         error = xfs_bmap_btalloc_nullfb(ap, &args, &blen);
                 if (error)
                         return error;
-       } else if (ap->flist->xbf_low) {
+       } else if (ap->dfops->dop_low) {
                 if (xfs_inode_is_filestream(ap->ip))
                         args.type = XFS_ALLOCTYPE_FIRST_AG;
                 else
@@ -3741,7 +3737,7 @@ xfs_bmap_btalloc(
          * is >= the stripe unit and the allocation offset is
          * at the end of file.
          */
-       if (!ap->flist->xbf_low && ap->aeof) {
+       if (!ap->dfops->dop_low && ap->aeof) {
                 if (!ap->offset) {
                         args.alignment = stripe_align;
                         atype = args.type;
@@ -3834,7 +3830,7 @@ xfs_bmap_btalloc(
                 args.minleft = 0;
                 if ((error = xfs_alloc_vextent(&args)))
                         return error;
-               ap->flist->xbf_low = 1;
+               ap->dfops->dop_low = true;
         }
         if (args.fsbno != NULLFSBLOCK) {
                 /*
@@ -3844,7 +3840,7 @@ xfs_bmap_btalloc(
                 ASSERT(*ap->firstblock == NULLFSBLOCK ||
                        XFS_FSB_TO_AGNO(mp, *ap->firstblock) ==
                        XFS_FSB_TO_AGNO(mp, args.fsbno) ||
-                      (ap->flist->xbf_low &&
+                      (ap->dfops->dop_low &&
                         XFS_FSB_TO_AGNO(mp, *ap->firstblock) <
                         XFS_FSB_TO_AGNO(mp, args.fsbno)));
  
@@ -3852,7 +3848,7 @@ xfs_bmap_btalloc(
                 if (*ap->firstblock == NULLFSBLOCK)
                         *ap->firstblock = args.fsbno;
                 ASSERT(nullfb || fb_agno == args.agno ||
-                      (ap->flist->xbf_low && fb_agno < args.agno));
+                      (ap->dfops->dop_low && fb_agno < args.agno));
                 ap->length = args.len;
                 ap->ip->i_d.di_nblocks += args.len;
                 xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
@@ -4319,7 +4315,7 @@ xfs_bmapi_allocate(
         if (error)
                 return error;
  
-       if (bma->flist->xbf_low)
+       if (bma->dfops->dop_low)
                 bma->minleft = 0;
         if (bma->cur)
                 bma->cur->bc_private.b.firstblock = *bma->firstblock;
@@ -4328,7 +4324,7 @@ xfs_bmapi_allocate(
         if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) {
                 bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork);
                 bma->cur->bc_private.b.firstblock = *bma->firstblock;
-               bma->cur->bc_private.b.flist = bma->flist;
+               bma->cur->bc_private.b.dfops = bma->dfops;
         }
         /*
          * Bump the number of extents we've allocated
@@ -4409,7 +4405,7 @@ xfs_bmapi_convert_unwritten(
                 bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp,
                                         bma->ip, whichfork);
                 bma->cur->bc_private.b.firstblock = *bma->firstblock;
-               bma->cur->bc_private.b.flist = bma->flist;
+               bma->cur->bc_private.b.dfops = bma->dfops;
         }
         mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
                                 ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
@@ -4426,7 +4422,7 @@ xfs_bmapi_convert_unwritten(
         }
  
         error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx,
-                       &bma->cur, mval, bma->firstblock, bma->flist,
+                       &bma->cur, mval, bma->firstblock, bma->dfops,
                         &tmp_logflags);
         /*
          * Log the inode core unconditionally in the unwritten extent conversion
@@ -4480,7 +4476,7 @@ xfs_bmapi_write(
         xfs_extlen_t            total,          /* total blocks needed */
         struct xfs_bmbt_irec    *mval,          /* output: map values */
         int                     *nmap,          /* i/o: mval size/count */
-       struct xfs_bmap_free    *flist)         /* i/o: list extents to free */
+       struct xfs_defer_ops    *dfops)         /* i/o: list extents to free */
  {
         struct xfs_mount        *mp = ip->i_mount;
         struct xfs_ifork        *ifp;
@@ -4570,7 +4566,7 @@ xfs_bmapi_write(
         bma.ip = ip;
         bma.total = total;
         bma.userdata = 0;
-       bma.flist = flist;
+       bma.dfops = dfops;
         bma.firstblock = firstblock;
  
         while (bno < end && n < *nmap) {
@@ -4684,7 +4680,7 @@ error0:
                                XFS_FSB_TO_AGNO(mp, *firstblock) ==
                                XFS_FSB_TO_AGNO(mp,
                                        bma.cur->bc_private.b.firstblock) ||
-                              (flist->xbf_low &&
+                              (dfops->dop_low &&
                                 XFS_FSB_TO_AGNO(mp, *firstblock) <
                                 XFS_FSB_TO_AGNO(mp,
                                         bma.cur->bc_private.b.firstblock)));
@@ -4768,7 +4764,7 @@ xfs_bmap_del_extent(
         xfs_inode_t             *ip,    /* incore inode pointer */
         xfs_trans_t             *tp,    /* current transaction pointer */
         xfs_extnum_t            *idx,   /* extent number to update/delete */
-       xfs_bmap_free_t         *flist, /* list of extents to be freed */
+       struct xfs_defer_ops    *dfops, /* list of extents to be freed */
         xfs_btree_cur_t         *cur,   /* if null, not a btree */
         xfs_bmbt_irec_t         *del,   /* data to remove from extents */
         int                     *logflagsp, /* inode logging flags */
@@ -4870,6 +4866,7 @@ xfs_bmap_del_extent(
                 nblks = 0;
                 do_fx = 0;
         }
+
         /*
          * Set flag value to use in switch statement.
          * Left-contig is 2, right-contig is 1.
@@ -5052,12 +5049,20 @@ xfs_bmap_del_extent(
                 ++*idx;
                 break;
         }
+
+       /* remove reverse mapping */
+       if (!delay) {
+               error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, del);
+               if (error)
+                       goto done;
+       }
+
         /*
          * If we need to, add to list of extents to delete.
          */
         if (do_fx)
-               xfs_bmap_add_free(mp, flist, del->br_startblock,
-                       del->br_blockcount);
+               xfs_bmap_add_free(mp, dfops, del->br_startblock,
+                               del->br_blockcount, NULL);
         /*
          * Adjust inode # blocks in the file.
          */
@@ -5097,7 +5102,7 @@ xfs_bunmapi(
         xfs_extnum_t            nexts,          /* number of extents max */
         xfs_fsblock_t           *firstblock,    /* first allocated block
                                                    controls a.g. for allocs */
-       xfs_bmap_free_t         *flist,         /* i/o: list extents to free */
+       struct xfs_defer_ops    *dfops,         /* i/o: list extents to free */
         int                     *done)          /* set if not done yet */
  {
         xfs_btree_cur_t         *cur;           /* bmap btree cursor */
@@ -5170,7 +5175,7 @@ xfs_bunmapi(
                 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
                 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
                 cur->bc_private.b.firstblock = *firstblock;
-               cur->bc_private.b.flist = flist;
+               cur->bc_private.b.dfops = dfops;
                 cur->bc_private.b.flags = 0;
         } else
                 cur = NULL;
@@ -5179,8 +5184,10 @@ xfs_bunmapi(
                 /*
                  * Synchronize by locking the bitmap inode.
                  */
-               xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
+               xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP);
                 xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
+               xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM);
+               xfs_trans_ijoin(tp, mp->m_rsumip, XFS_ILOCK_EXCL);
         }
  
         extno = 0;
@@ -5262,7 +5269,7 @@ xfs_bunmapi(
                         }
                         del.br_state = XFS_EXT_UNWRITTEN;
                         error = xfs_bmap_add_extent_unwritten_real(tp, ip,
-                                       &lastx, &cur, &del, firstblock, flist,
+                                       &lastx, &cur, &del, firstblock, dfops,
                                         &logflags);
                         if (error)
                                 goto error0;
@@ -5321,7 +5328,7 @@ xfs_bunmapi(
                                 lastx--;
                                 error = xfs_bmap_add_extent_unwritten_real(tp,
                                                 ip, &lastx, &cur, &prev,
-                                               firstblock, flist, &logflags);
+                                               firstblock, dfops, &logflags);
                                 if (error)
                                         goto error0;
                                 goto nodelete;
@@ -5330,7 +5337,7 @@ xfs_bunmapi(
                                 del.br_state = XFS_EXT_UNWRITTEN;
                                 error = xfs_bmap_add_extent_unwritten_real(tp,
                                                 ip, &lastx, &cur, &del,
-                                               firstblock, flist, &logflags);
+                                               firstblock, dfops, &logflags);
                                 if (error)
                                         goto error0;
                                 goto nodelete;
@@ -5388,7 +5395,7 @@ xfs_bunmapi(
                 } else if (cur)
                         cur->bc_private.b.flags &= ~XFS_BTCUR_BPRV_WASDEL;
  
-               error = xfs_bmap_del_extent(ip, tp, &lastx, flist, cur, &del,
+               error = xfs_bmap_del_extent(ip, tp, &lastx, dfops, cur, &del,
                                 &tmp_logflags, whichfork);
                 logflags |= tmp_logflags;
                 if (error)
@@ -5422,7 +5429,7 @@ nodelete:
          */
         if (xfs_bmap_needs_btree(ip, whichfork)) {
                 ASSERT(cur == NULL);
-               error = xfs_bmap_extents_to_btree(tp, ip, firstblock, flist,
+               error = xfs_bmap_extents_to_btree(tp, ip, firstblock, dfops,
                         &cur, 0, &tmp_logflags, whichfork);
                 logflags |= tmp_logflags;
                 if (error)
@@ -5589,7 +5596,8 @@ xfs_bmse_shift_one(
         struct xfs_bmbt_rec_host        *gotp,
         struct xfs_btree_cur            *cur,
         int                             *logflags,
-       enum shift_direction            direction)
+       enum shift_direction            direction,
+       struct xfs_defer_ops            *dfops)
  {
         struct xfs_ifork                *ifp;
         struct xfs_mount                *mp;
@@ -5637,9 +5645,13 @@ xfs_bmse_shift_one(
                 /* check whether to merge the extent or shift it down */
                 if (xfs_bmse_can_merge(&adj_irec, &got,
                                        offset_shift_fsb)) {
-                       return xfs_bmse_merge(ip, whichfork, offset_shift_fsb,
-                                             *current_ext, gotp, adj_irecp,
-                                             cur, logflags);
+                       error = xfs_bmse_merge(ip, whichfork, offset_shift_fsb,
+                                              *current_ext, gotp, adj_irecp,
+                                              cur, logflags);
+                       if (error)
+                               return error;
+                       adj_irec = got;
+                       goto update_rmap;
                 }
         } else {
                 startoff = got.br_startoff + offset_shift_fsb;
@@ -5676,9 +5688,10 @@ update_current_ext:
                 (*current_ext)--;
         xfs_bmbt_set_startoff(gotp, startoff);
         *logflags |= XFS_ILOG_CORE;
+       adj_irec = got;
         if (!cur) {
                 *logflags |= XFS_ILOG_DEXT;
-               return 0;
+               goto update_rmap;
         }
  
         error = xfs_bmbt_lookup_eq(cur, got.br_startoff, got.br_startblock,
@@ -5688,8 +5701,18 @@ update_current_ext:
         XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
  
         got.br_startoff = startoff;
-       return xfs_bmbt_update(cur, got.br_startoff, got.br_startblock,
-                              got.br_blockcount, got.br_state);
+       error = xfs_bmbt_update(cur, got.br_startoff, got.br_startblock,
+                       got.br_blockcount, got.br_state);
+       if (error)
+               return error;
+
+update_rmap:
+       /* update reverse mapping */
+       error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, &adj_irec);
+       if (error)
+               return error;
+       adj_irec.br_startoff = startoff;
+       return xfs_rmap_map_extent(mp, dfops, ip, whichfork, &adj_irec);
  }
  
  /*
@@ -5711,7 +5734,7 @@ xfs_bmap_shift_extents(
         int                     *done,
         xfs_fileoff_t           stop_fsb,
         xfs_fsblock_t           *firstblock,
-       struct xfs_bmap_free    *flist,
+       struct xfs_defer_ops    *dfops,
         enum shift_direction    direction,
         int                     num_exts)
  {
@@ -5756,7 +5779,7 @@ xfs_bmap_shift_extents(
         if (ifp->if_flags & XFS_IFBROOT) {
                 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
                 cur->bc_private.b.firstblock = *firstblock;
-               cur->bc_private.b.flist = flist;
+               cur->bc_private.b.dfops = dfops;
                 cur->bc_private.b.flags = 0;
         }
  
@@ -5817,7 +5840,7 @@ xfs_bmap_shift_extents(
         while (nexts++ < num_exts) {
                 error = xfs_bmse_shift_one(ip, whichfork, offset_shift_fsb,
                                            &current_ext, gotp, cur, &logflags,
-                                          direction);
+                                          direction, dfops);
                 if (error)
                         goto del_cursor;
                 /*
@@ -5865,7 +5888,7 @@ xfs_bmap_split_extent_at(
         struct xfs_inode        *ip,
         xfs_fileoff_t           split_fsb,
         xfs_fsblock_t           *firstfsb,
-       struct xfs_bmap_free    *free_list)
+       struct xfs_defer_ops    *dfops)
  {
         int                             whichfork = XFS_DATA_FORK;
         struct xfs_btree_cur            *cur = NULL;
@@ -5927,7 +5950,7 @@ xfs_bmap_split_extent_at(
         if (ifp->if_flags & XFS_IFBROOT) {
                 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
                 cur->bc_private.b.firstblock = *firstfsb;
-               cur->bc_private.b.flist = free_list;
+               cur->bc_private.b.dfops = dfops;
                 cur->bc_private.b.flags = 0;
                 error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
                                 got.br_startblock,
@@ -5980,7 +6003,7 @@ xfs_bmap_split_extent_at(
                 int tmp_logflags; /* partial log flag return val */
  
                 ASSERT(cur == NULL);
-               error = xfs_bmap_extents_to_btree(tp, ip, firstfsb, free_list,
+               error = xfs_bmap_extents_to_btree(tp, ip, firstfsb, dfops,
                                 &cur, 0, &tmp_logflags, whichfork);
                 logflags |= tmp_logflags;
         }
@@ -6004,7 +6027,7 @@ xfs_bmap_split_extent(
  {
         struct xfs_mount        *mp = ip->i_mount;
         struct xfs_trans        *tp;
-       struct xfs_bmap_free    free_list;
+       struct xfs_defer_ops    dfops;
         xfs_fsblock_t           firstfsb;
         int                     error;
  
@@ -6016,21 +6039,21 @@ xfs_bmap_split_extent(
         xfs_ilock(ip, XFS_ILOCK_EXCL);
         xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
  
-       xfs_bmap_init(&free_list, &firstfsb);
+       xfs_defer_init(&dfops, &firstfsb);
  
         error = xfs_bmap_split_extent_at(tp, ip, split_fsb,
-                       &firstfsb, &free_list);
+                       &firstfsb, &dfops);
         if (error)
                 goto out;
  
-       error = xfs_bmap_finish(&tp, &free_list, NULL);
+       error = xfs_defer_finish(&tp, &dfops, NULL);
         if (error)
                 goto out;
  
         return xfs_trans_commit(tp);
  
  out:
-       xfs_bmap_cancel(&free_list);
+       xfs_defer_cancel(&dfops);
         xfs_trans_cancel(tp);
         return error;
  }
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h

index f1f3ae6c0a3f5d9b507002819afb4632eb1f95ba..254034f9694135c94ad5827c21608f51f183fe3e 100644 (file)
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -32,7 +32,7 @@ extern kmem_zone_t    *xfs_bmap_free_item_zone;
   */
  struct xfs_bmalloca {
         xfs_fsblock_t           *firstblock; /* i/o first block allocated */
-       struct xfs_bmap_free    *flist; /* bmap freelist */
+       struct xfs_defer_ops    *dfops; /* bmap freelist */
         struct xfs_trans        *tp;    /* transaction pointer */
         struct xfs_inode        *ip;    /* incore inode pointer */
         struct xfs_bmbt_irec    prev;   /* extent before the new one */
@@ -62,34 +62,14 @@ struct xfs_bmalloca {
   * List of extents to be free "later".
   * The list is kept sorted on xbf_startblock.
   */
-struct xfs_bmap_free_item
+struct xfs_extent_free_item
  {
-       xfs_fsblock_t           xbfi_startblock;/* starting fs block number */
-       xfs_extlen_t            xbfi_blockcount;/* number of blocks in extent */
-       struct list_head        xbfi_list;
+       xfs_fsblock_t           xefi_startblock;/* starting fs block number */
+       xfs_extlen_t            xefi_blockcount;/* number of blocks in extent */
+       struct list_head        xefi_list;
+       struct xfs_owner_info   xefi_oinfo;     /* extent owner */
  };
  
-/*
- * Header for free extent list.
- *
- * xbf_low is used by the allocator to activate the lowspace algorithm -
- * when free space is running low the extent allocator may choose to
- * allocate an extent from an AG without leaving sufficient space for
- * a btree split when inserting the new extent.  In this case the allocator
- * will enable the lowspace algorithm which is supposed to allow further
- * allocations (such as btree splits and newroots) to allocate from
- * sequential AGs.  In order to avoid locking AGs out of order the lowspace
- * algorithm will start searching for free space from AG 0.  If the correct
- * transaction reservations have been made then this algorithm will eventually
- * find all the space it needs.
- */
-typedef        struct xfs_bmap_free
-{
-       struct list_head        xbf_flist;      /* list of to-be-free extents */
-       int                     xbf_count;      /* count of items on list */
-       int                     xbf_low;        /* alloc in low mode */
-} xfs_bmap_free_t;
-
  #define        XFS_BMAP_MAX_NMAP       4
  
  /*
@@ -139,14 +119,6 @@ static inline int xfs_bmapi_aflag(int w)
  #define        DELAYSTARTBLOCK         ((xfs_fsblock_t)-1LL)
  #define        HOLESTARTBLOCK          ((xfs_fsblock_t)-2LL)
  
-static inline void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp)
-{
-       INIT_LIST_HEAD(&flp->xbf_flist);
-       flp->xbf_count = 0;
-       flp->xbf_low = 0;
-       *fbp = NULLFSBLOCK;
-}
-
  /*
   * Flags for xfs_bmap_add_extent*.
   */
@@ -193,11 +165,9 @@ void       xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
  
  int    xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd);
  void   xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork);
-void   xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_bmap_free *flist,
-                         xfs_fsblock_t bno, xfs_filblks_t len);
-void   xfs_bmap_cancel(struct xfs_bmap_free *flist);
-int    xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist,
-                       struct xfs_inode *ip);
+void   xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
+                         xfs_fsblock_t bno, xfs_filblks_t len,
+                         struct xfs_owner_info *oinfo);
  void   xfs_bmap_compute_maxlevels(struct xfs_mount *mp, int whichfork);
  int    xfs_bmap_first_unused(struct xfs_trans *tp, struct xfs_inode *ip,
                 xfs_extlen_t len, xfs_fileoff_t *unused, int whichfork);
@@ -218,18 +188,18 @@ int       xfs_bmapi_write(struct xfs_trans *tp, struct xfs_inode *ip,
                 xfs_fileoff_t bno, xfs_filblks_t len, int flags,
                 xfs_fsblock_t *firstblock, xfs_extlen_t total,
                 struct xfs_bmbt_irec *mval, int *nmap,
-               struct xfs_bmap_free *flist);
+               struct xfs_defer_ops *dfops);
  int    xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,
                 xfs_fileoff_t bno, xfs_filblks_t len, int flags,
                 xfs_extnum_t nexts, xfs_fsblock_t *firstblock,
-               struct xfs_bmap_free *flist, int *done);
+               struct xfs_defer_ops *dfops, int *done);
  int    xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx,
                 xfs_extnum_t num);
  uint   xfs_default_attroffset(struct xfs_inode *ip);
  int    xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip,
                 xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb,
                 int *done, xfs_fileoff_t stop_fsb, xfs_fsblock_t *firstblock,
-               struct xfs_bmap_free *flist, enum shift_direction direction,
+               struct xfs_defer_ops *dfops, enum shift_direction direction,
                 int num_exts);
  int    xfs_bmap_split_extent(struct xfs_inode *ip, xfs_fileoff_t split_offset);
  
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c

index db0c71e470c9575d54d5fd2a268eda6c49f7ff3a..cd85274e810cd1457dd62dfa7abfb725138a35fc 100644 (file)
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -23,6 +23,7 @@
  #include "xfs_trans_resv.h"
  #include "xfs_bit.h"
  #include "xfs_mount.h"
+#include "xfs_defer.h"
  #include "xfs_inode.h"
  #include "xfs_trans.h"
  #include "xfs_inode_item.h"
@@ -34,6 +35,7 @@
  #include "xfs_quota.h"
  #include "xfs_trace.h"
  #include "xfs_cksum.h"
+#include "xfs_rmap.h"
  
  /*
   * Determine the extent state.
@@ -406,11 +408,11 @@ xfs_bmbt_dup_cursor(
                         cur->bc_private.b.ip, cur->bc_private.b.whichfork);
  
         /*
-        * Copy the firstblock, flist, and flags values,
+        * Copy the firstblock, dfops, and flags values,
          * since init cursor doesn't get them.
          */
         new->bc_private.b.firstblock = cur->bc_private.b.firstblock;
-       new->bc_private.b.flist = cur->bc_private.b.flist;
+       new->bc_private.b.dfops = cur->bc_private.b.dfops;
         new->bc_private.b.flags = cur->bc_private.b.flags;
  
         return new;
@@ -423,7 +425,7 @@ xfs_bmbt_update_cursor(
  {
         ASSERT((dst->bc_private.b.firstblock != NULLFSBLOCK) ||
                (dst->bc_private.b.ip->i_d.di_flags & XFS_DIFLAG_REALTIME));
-       ASSERT(dst->bc_private.b.flist == src->bc_private.b.flist);
+       ASSERT(dst->bc_private.b.dfops == src->bc_private.b.dfops);
  
         dst->bc_private.b.allocated += src->bc_private.b.allocated;
         dst->bc_private.b.firstblock = src->bc_private.b.firstblock;
@@ -446,6 +448,8 @@ xfs_bmbt_alloc_block(
         args.mp = cur->bc_mp;
         args.fsbno = cur->bc_private.b.firstblock;
         args.firstblock = args.fsbno;
+       xfs_rmap_ino_bmbt_owner(&args.oinfo, cur->bc_private.b.ip->i_ino,
+                       cur->bc_private.b.whichfork);
  
         if (args.fsbno == NULLFSBLOCK) {
                 args.fsbno = be64_to_cpu(start->l);
@@ -462,7 +466,7 @@ xfs_bmbt_alloc_block(
                  * block allocation here and corrupt the filesystem.
                  */
                 args.minleft = args.tp->t_blk_res;
-       } else if (cur->bc_private.b.flist->xbf_low) {
+       } else if (cur->bc_private.b.dfops->dop_low) {
                 args.type = XFS_ALLOCTYPE_START_BNO;
         } else {
                 args.type = XFS_ALLOCTYPE_NEAR_BNO;
@@ -490,7 +494,7 @@ xfs_bmbt_alloc_block(
                 error = xfs_alloc_vextent(&args);
                 if (error)
                         goto error0;
-               cur->bc_private.b.flist->xbf_low = 1;
+               cur->bc_private.b.dfops->dop_low = true;
         }
         if (args.fsbno == NULLFSBLOCK) {
                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
@@ -525,8 +529,10 @@ xfs_bmbt_free_block(
         struct xfs_inode        *ip = cur->bc_private.b.ip;
         struct xfs_trans        *tp = cur->bc_tp;
         xfs_fsblock_t           fsbno = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp));
+       struct xfs_owner_info   oinfo;
  
-       xfs_bmap_add_free(mp, cur->bc_private.b.flist, fsbno, 1);
+       xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_private.b.whichfork);
+       xfs_bmap_add_free(mp, cur->bc_private.b.dfops, fsbno, 1, &oinfo);
         ip->i_d.di_nblocks--;
  
         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
@@ -599,17 +605,6 @@ xfs_bmbt_init_key_from_rec(
                 cpu_to_be64(xfs_bmbt_disk_get_startoff(&rec->bmbt));
  }
  
-STATIC void
-xfs_bmbt_init_rec_from_key(
-       union xfs_btree_key     *key,
-       union xfs_btree_rec     *rec)
-{
-       ASSERT(key->bmbt.br_startoff != 0);
-
-       xfs_bmbt_disk_set_allf(&rec->bmbt, be64_to_cpu(key->bmbt.br_startoff),
-                              0, 0, XFS_EXT_NORM);
-}
-
  STATIC void
  xfs_bmbt_init_rec_from_cur(
         struct xfs_btree_cur    *cur,
@@ -760,7 +755,6 @@ static const struct xfs_btree_ops xfs_bmbt_ops = {
         .get_minrecs            = xfs_bmbt_get_minrecs,
         .get_dmaxrecs           = xfs_bmbt_get_dmaxrecs,
         .init_key_from_rec      = xfs_bmbt_init_key_from_rec,
-       .init_rec_from_key      = xfs_bmbt_init_rec_from_key,
         .init_rec_from_cur      = xfs_bmbt_init_rec_from_cur,
         .init_ptr_from_cur      = xfs_bmbt_init_ptr_from_cur,
         .key_diff               = xfs_bmbt_key_diff,
@@ -800,7 +794,7 @@ xfs_bmbt_init_cursor(
         cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork);
         cur->bc_private.b.ip = ip;
         cur->bc_private.b.firstblock = NULLFSBLOCK;
-       cur->bc_private.b.flist = NULL;
+       cur->bc_private.b.dfops = NULL;
         cur->bc_private.b.allocated = 0;
         cur->bc_private.b.flags = 0;
         cur->bc_private.b.whichfork = whichfork;
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c

index 07eeb0b4ca74f1253eccbc3a2d0434278f0165ee..b5c213a051cde3f703227f10b73e367470a4df43 100644 (file)
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -23,6 +23,7 @@
  #include "xfs_trans_resv.h"
  #include "xfs_bit.h"
  #include "xfs_mount.h"
+#include "xfs_defer.h"
  #include "xfs_inode.h"
  #include "xfs_trans.h"
  #include "xfs_inode_item.h"
@@ -43,15 +44,14 @@ kmem_zone_t *xfs_btree_cur_zone;
   * Btree magic numbers.
   */
  static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
-       { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC,
+       { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, 0, XFS_BMAP_MAGIC, XFS_IBT_MAGIC,
           XFS_FIBT_MAGIC },
-       { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC,
+       { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, XFS_RMAP_CRC_MAGIC,
           XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC }
  };
  #define xfs_btree_magic(cur) \
         xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum]
  
-
  STATIC int                             /* error (0 or EFSCORRUPTED) */
  xfs_btree_check_lblock(
         struct xfs_btree_cur    *cur,   /* btree cursor */
@@ -428,6 +428,50 @@ xfs_btree_dup_cursor(
   * into a btree block (xfs_btree_*_offset) or return a pointer to the given
   * record, key or pointer (xfs_btree_*_addr).  Note that all addressing
   * inside the btree block is done using indices starting at one, not zero!
+ *
+ * If XFS_BTREE_OVERLAPPING is set, then this btree supports keys containing
+ * overlapping intervals.  In such a tree, records are still sorted lowest to
+ * highest and indexed by the smallest key value that refers to the record.
+ * However, nodes are different: each pointer has two associated keys -- one
+ * indexing the lowest key available in the block(s) below (the same behavior
+ * as the key in a regular btree) and another indexing the highest key
+ * available in the block(s) below.  Because records are /not/ sorted by the
+ * highest key, all leaf block updates require us to compute the highest key
+ * that matches any record in the leaf and to recursively update the high keys
+ * in the nodes going further up in the tree, if necessary.  Nodes look like
+ * this:
+ *
+ *             +--------+-----+-----+-----+-----+-----+-------+-------+-----+
+ * Non-Leaf:   | header | lo1 | hi1 | lo2 | hi2 | ... | ptr 1 | ptr 2 | ... |
+ *             +--------+-----+-----+-----+-----+-----+-------+-------+-----+
+ *
+ * To perform an interval query on an overlapped tree, perform the usual
+ * depth-first search and use the low and high keys to decide if we can skip
+ * that particular node.  If a leaf node is reached, return the records that
+ * intersect the interval.  Note that an interval query may return numerous
+ * entries.  For a non-overlapped tree, simply search for the record associated
+ * with the lowest key and iterate forward until a non-matching record is
+ * found.  Section 14.3 ("Interval Trees") of _Introduction to Algorithms_ by
+ * Cormen, Leiserson, Rivest, and Stein (2nd or 3rd ed. only) discuss this in
+ * more detail.
+ *
+ * Why do we care about overlapping intervals?  Let's say you have a bunch of
+ * reverse mapping records on a reflink filesystem:
+ *
+ * 1: +- file A startblock B offset C length D -----------+
+ * 2:      +- file E startblock F offset G length H --------------+
+ * 3:      +- file I startblock F offset J length K --+
+ * 4:                                                        +- file L... --+
+ *
+ * Now say we want to map block (B+D) into file A at offset (C+D).  Ideally,
+ * we'd simply increment the length of record 1.  But how do we find the record
+ * that ends at (B+D-1) (i.e. record 1)?  A LE lookup of (B+D-1) would return
+ * record 3 because the keys are ordered first by startblock.  An interval
+ * query would return records 1 and 2 because they both overlap (B+D-1), and
+ * from that we can pick out record 1 as the appropriate left neighbor.
+ *
+ * In the non-overlapped case you can do a LE lookup and decrement the cursor
+ * because a record's interval must end before the next record.
   */
  
  /*
@@ -478,6 +522,18 @@ xfs_btree_key_offset(
                 (n - 1) * cur->bc_ops->key_len;
  }
  
+/*
+ * Calculate offset of the n-th high key in a btree block.
+ */
+STATIC size_t
+xfs_btree_high_key_offset(
+       struct xfs_btree_cur    *cur,
+       int                     n)
+{
+       return xfs_btree_block_len(cur) +
+               (n - 1) * cur->bc_ops->key_len + (cur->bc_ops->key_len / 2);
+}
+
  /*
   * Calculate offset of the n-th block pointer in a btree block.
   */
@@ -518,6 +574,19 @@ xfs_btree_key_addr(
                 ((char *)block + xfs_btree_key_offset(cur, n));
  }
  
+/*
+ * Return a pointer to the n-th high key in the btree block.
+ */
+STATIC union xfs_btree_key *
+xfs_btree_high_key_addr(
+       struct xfs_btree_cur    *cur,
+       int                     n,
+       struct xfs_btree_block  *block)
+{
+       return (union xfs_btree_key *)
+               ((char *)block + xfs_btree_high_key_offset(cur, n));
+}
+
  /*
   * Return a pointer to the n-th block pointer in the btree block.
   */
@@ -1144,6 +1213,9 @@ xfs_btree_set_refs(
         case XFS_BTNUM_BMAP:
                 xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF);
                 break;
+       case XFS_BTNUM_RMAP:
+               xfs_buf_set_ref(bp, XFS_RMAP_BTREE_REF);
+               break;
         default:
                 ASSERT(0);
         }
@@ -1879,32 +1951,214 @@ error0:
         return error;
  }
  
+/* Find the high key storage area from a regular key. */
+STATIC union xfs_btree_key *
+xfs_btree_high_key_from_key(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_key     *key)
+{
+       ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING);
+       return (union xfs_btree_key *)((char *)key +
+                       (cur->bc_ops->key_len / 2));
+}
+
+/* Determine the low (and high if overlapped) keys of a leaf block */
+STATIC void
+xfs_btree_get_leaf_keys(
+       struct xfs_btree_cur    *cur,
+       struct xfs_btree_block  *block,
+       union xfs_btree_key     *key)
+{
+       union xfs_btree_key     max_hkey;
+       union xfs_btree_key     hkey;
+       union xfs_btree_rec     *rec;
+       union xfs_btree_key     *high;
+       int                     n;
+
+       rec = xfs_btree_rec_addr(cur, 1, block);
+       cur->bc_ops->init_key_from_rec(key, rec);
+
+       if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
+
+               cur->bc_ops->init_high_key_from_rec(&max_hkey, rec);
+               for (n = 2; n <= xfs_btree_get_numrecs(block); n++) {
+                       rec = xfs_btree_rec_addr(cur, n, block);
+                       cur->bc_ops->init_high_key_from_rec(&hkey, rec);
+                       if (cur->bc_ops->diff_two_keys(cur, &hkey, &max_hkey)
+                                       > 0)
+                               max_hkey = hkey;
+               }
+
+               high = xfs_btree_high_key_from_key(cur, key);
+               memcpy(high, &max_hkey, cur->bc_ops->key_len / 2);
+       }
+}
+
+/* Determine the low (and high if overlapped) keys of a node block */
+STATIC void
+xfs_btree_get_node_keys(
+       struct xfs_btree_cur    *cur,
+       struct xfs_btree_block  *block,
+       union xfs_btree_key     *key)
+{
+       union xfs_btree_key     *hkey;
+       union xfs_btree_key     *max_hkey;
+       union xfs_btree_key     *high;
+       int                     n;
+
+       if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
+               memcpy(key, xfs_btree_key_addr(cur, 1, block),
+                               cur->bc_ops->key_len / 2);
+
+               max_hkey = xfs_btree_high_key_addr(cur, 1, block);
+               for (n = 2; n <= xfs_btree_get_numrecs(block); n++) {
+                       hkey = xfs_btree_high_key_addr(cur, n, block);
+                       if (cur->bc_ops->diff_two_keys(cur, hkey, max_hkey) > 0)
+                               max_hkey = hkey;
+               }
+
+               high = xfs_btree_high_key_from_key(cur, key);
+               memcpy(high, max_hkey, cur->bc_ops->key_len / 2);
+       } else {
+               memcpy(key, xfs_btree_key_addr(cur, 1, block),
+                               cur->bc_ops->key_len);
+       }
+}
+
+/* Derive the keys for any btree block. */
+STATIC void
+xfs_btree_get_keys(
+       struct xfs_btree_cur    *cur,
+       struct xfs_btree_block  *block,
+       union xfs_btree_key     *key)
+{
+       if (be16_to_cpu(block->bb_level) == 0)
+               xfs_btree_get_leaf_keys(cur, block, key);
+       else
+               xfs_btree_get_node_keys(cur, block, key);
+}
+
  /*
- * Update keys at all levels from here to the root along the cursor's path.
+ * Decide if we need to update the parent keys of a btree block.  For
+ * a standard btree this is only necessary if we're updating the first
+ * record/key.  For an overlapping btree, we must always update the
+ * keys because the highest key can be in any of the records or keys
+ * in the block.
+ */
+static inline bool
+xfs_btree_needs_key_update(
+       struct xfs_btree_cur    *cur,
+       int                     ptr)
+{
+       return (cur->bc_flags & XFS_BTREE_OVERLAPPING) || ptr == 1;
+}
+
+/*
+ * Update the low and high parent keys of the given level, progressing
+ * towards the root.  If force_all is false, stop if the keys for a given
+ * level do not need updating.
   */
  STATIC int
-xfs_btree_updkey(
+__xfs_btree_updkeys(
+       struct xfs_btree_cur    *cur,
+       int                     level,
+       struct xfs_btree_block  *block,
+       struct xfs_buf          *bp0,
+       bool                    force_all)
+{
+       union xfs_btree_bigkey  key;    /* keys from current level */
+       union xfs_btree_key     *lkey;  /* keys from the next level up */
+       union xfs_btree_key     *hkey;
+       union xfs_btree_key     *nlkey; /* keys from the next level up */
+       union xfs_btree_key     *nhkey;
+       struct xfs_buf          *bp;
+       int                     ptr;
+
+       ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING);
+
+       /* Exit if there aren't any parent levels to update. */
+       if (level + 1 >= cur->bc_nlevels)
+               return 0;
+
+       trace_xfs_btree_updkeys(cur, level, bp0);
+
+       lkey = (union xfs_btree_key *)&key;
+       hkey = xfs_btree_high_key_from_key(cur, lkey);
+       xfs_btree_get_keys(cur, block, lkey);
+       for (level++; level < cur->bc_nlevels; level++) {
+#ifdef DEBUG
+               int             error;
+#endif
+               block = xfs_btree_get_block(cur, level, &bp);
+               trace_xfs_btree_updkeys(cur, level, bp);
+#ifdef DEBUG
+               error = xfs_btree_check_block(cur, block, level, bp);
+               if (error) {
+                       XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+                       return error;
+               }
+#endif
+               ptr = cur->bc_ptrs[level];
+               nlkey = xfs_btree_key_addr(cur, ptr, block);
+               nhkey = xfs_btree_high_key_addr(cur, ptr, block);
+               if (!force_all &&
+                   !(cur->bc_ops->diff_two_keys(cur, nlkey, lkey) != 0 ||
+                     cur->bc_ops->diff_two_keys(cur, nhkey, hkey) != 0))
+                       break;
+               xfs_btree_copy_keys(cur, nlkey, lkey, 1);
+               xfs_btree_log_keys(cur, bp, ptr, ptr);
+               if (level + 1 >= cur->bc_nlevels)
+                       break;
+               xfs_btree_get_node_keys(cur, block, lkey);
+       }
+
+       return 0;
+}
+
+/* Update all the keys from some level in cursor back to the root. */
+STATIC int
+xfs_btree_updkeys_force(
+       struct xfs_btree_cur    *cur,
+       int                     level)
+{
+       struct xfs_buf          *bp;
+       struct xfs_btree_block  *block;
+
+       block = xfs_btree_get_block(cur, level, &bp);
+       return __xfs_btree_updkeys(cur, level, block, bp, true);
+}
+
+/*
+ * Update the parent keys of the given level, progressing towards the root.
+ */
+STATIC int
+xfs_btree_update_keys(
         struct xfs_btree_cur    *cur,
-       union xfs_btree_key     *keyp,
         int                     level)
  {
         struct xfs_btree_block  *block;
         struct xfs_buf          *bp;
         union xfs_btree_key     *kp;
+       union xfs_btree_key     key;
         int                     ptr;
  
+       ASSERT(level >= 0);
+
+       block = xfs_btree_get_block(cur, level, &bp);
+       if (cur->bc_flags & XFS_BTREE_OVERLAPPING)
+               return __xfs_btree_updkeys(cur, level, block, bp, false);
+
         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
         XFS_BTREE_TRACE_ARGIK(cur, level, keyp);
  
-       ASSERT(!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || level >= 1);
-
         /*
          * Go up the tree from this level toward the root.
          * At each level, update the key value to the value input.
          * Stop when we reach a level where the cursor isn't pointing
          * at the first entry in the block.
          */
-       for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
+       xfs_btree_get_keys(cur, block, &key);
+       for (level++, ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
  #ifdef DEBUG
                 int             error;
  #endif
@@ -1918,7 +2172,7 @@ xfs_btree_updkey(
  #endif
                 ptr = cur->bc_ptrs[level];
                 kp = xfs_btree_key_addr(cur, ptr, block);
-               xfs_btree_copy_keys(cur, kp, keyp, 1);
+               xfs_btree_copy_keys(cur, kp, &key, 1);
                 xfs_btree_log_keys(cur, bp, ptr, ptr);
         }
  
@@ -1970,12 +2224,9 @@ xfs_btree_update(
                                             ptr, LASTREC_UPDATE);
         }
  
-       /* Updating first rec in leaf. Pass new key value up to our parent. */
-       if (ptr == 1) {
-               union xfs_btree_key     key;
-
-               cur->bc_ops->init_key_from_rec(&key, rec);
-               error = xfs_btree_updkey(cur, &key, 1);
+       /* Pass new key value up to our parent. */
+       if (xfs_btree_needs_key_update(cur, ptr)) {
+               error = xfs_btree_update_keys(cur, 0);
                 if (error)
                         goto error0;
         }
@@ -1998,18 +2249,19 @@ xfs_btree_lshift(
         int                     level,
         int                     *stat)          /* success/failure */
  {
-       union xfs_btree_key     key;            /* btree key */
         struct xfs_buf          *lbp;           /* left buffer pointer */
         struct xfs_btree_block  *left;          /* left btree block */
         int                     lrecs;          /* left record count */
         struct xfs_buf          *rbp;           /* right buffer pointer */
         struct xfs_btree_block  *right;         /* right btree block */
+       struct xfs_btree_cur    *tcur;          /* temporary btree cursor */
         int                     rrecs;          /* right record count */
         union xfs_btree_ptr     lptr;           /* left btree pointer */
         union xfs_btree_key     *rkp = NULL;    /* right btree key */
         union xfs_btree_ptr     *rpp = NULL;    /* right address pointer */
         union xfs_btree_rec     *rrp = NULL;    /* right record pointer */
         int                     error;          /* error return value */
+       int                     i;
  
         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
         XFS_BTREE_TRACE_ARGI(cur, level);
@@ -2139,18 +2391,33 @@ xfs_btree_lshift(
                         xfs_btree_rec_addr(cur, 2, right),
                         -1, rrecs);
                 xfs_btree_log_recs(cur, rbp, 1, rrecs);
+       }
  
-               /*
-                * If it's the first record in the block, we'll need a key
-                * structure to pass up to the next level (updkey).
-                */
-               cur->bc_ops->init_key_from_rec(&key,
-                       xfs_btree_rec_addr(cur, 1, right));
-               rkp = &key;
+       /*
+        * Using a temporary cursor, update the parent key values of the
+        * block on the left.
+        */
+       if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
+               error = xfs_btree_dup_cursor(cur, &tcur);
+               if (error)
+                       goto error0;
+               i = xfs_btree_firstrec(tcur, level);
+               XFS_WANT_CORRUPTED_GOTO(tcur->bc_mp, i == 1, error0);
+
+               error = xfs_btree_decrement(tcur, level, &i);
+               if (error)
+                       goto error1;
+
+               /* Update the parent high keys of the left block, if needed. */
+               error = xfs_btree_update_keys(tcur, level);
+               if (error)
+                       goto error1;
+
+               xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
         }
  
-       /* Update the parent key values of right. */
-       error = xfs_btree_updkey(cur, rkp, level + 1);
+       /* Update the parent keys of the right block. */
+       error = xfs_btree_update_keys(cur, level);
         if (error)
                 goto error0;
  
@@ -2169,6 +2436,11 @@ out0:
  error0:
         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
         return error;
+
+error1:
+       XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR);
+       xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
+       return error;
  }
  
  /*
@@ -2181,7 +2453,6 @@ xfs_btree_rshift(
         int                     level,
         int                     *stat)          /* success/failure */
  {
-       union xfs_btree_key     key;            /* btree key */
         struct xfs_buf          *lbp;           /* left buffer pointer */
         struct xfs_btree_block  *left;          /* left btree block */
         struct xfs_buf          *rbp;           /* right buffer pointer */
@@ -2290,12 +2561,6 @@ xfs_btree_rshift(
                 /* Now put the new data in, and log it. */
                 xfs_btree_copy_recs(cur, rrp, lrp, 1);
                 xfs_btree_log_recs(cur, rbp, 1, rrecs + 1);
-
-               cur->bc_ops->init_key_from_rec(&key, rrp);
-               rkp = &key;
-
-               ASSERT(cur->bc_ops->recs_inorder(cur, rrp,
-                       xfs_btree_rec_addr(cur, 2, right)));
         }
  
         /*
@@ -2315,13 +2580,21 @@ xfs_btree_rshift(
         if (error)
                 goto error0;
         i = xfs_btree_lastrec(tcur, level);
-       XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
+       XFS_WANT_CORRUPTED_GOTO(tcur->bc_mp, i == 1, error0);
  
         error = xfs_btree_increment(tcur, level, &i);
         if (error)
                 goto error1;
  
-       error = xfs_btree_updkey(tcur, rkp, level + 1);
+       /* Update the parent high keys of the left block, if needed. */
+       if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
+               error = xfs_btree_update_keys(cur, level);
+               if (error)
+                       goto error1;
+       }
+
+       /* Update the parent keys of the right block. */
+       error = xfs_btree_update_keys(tcur, level);
         if (error)
                 goto error1;
  
@@ -2422,6 +2695,11 @@ __xfs_btree_split(
  
         XFS_BTREE_STATS_ADD(cur, moves, rrecs);
  
+       /* Adjust numrecs for the later get_*_keys() calls. */
+       lrecs -= rrecs;
+       xfs_btree_set_numrecs(left, lrecs);
+       xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs);
+
         /*
          * Copy btree block entries from the left block over to the
          * new block, the right. Update the right block and log the
@@ -2447,14 +2725,15 @@ __xfs_btree_split(
                 }
  #endif
  
+               /* Copy the keys & pointers to the new block. */
                 xfs_btree_copy_keys(cur, rkp, lkp, rrecs);
                 xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs);
  
                 xfs_btree_log_keys(cur, rbp, 1, rrecs);
                 xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
  
-               /* Grab the keys to the entries moved to the right block */
-               xfs_btree_copy_keys(cur, key, rkp, 1);
+               /* Stash the keys of the new block for later insertion. */
+               xfs_btree_get_node_keys(cur, right, key);
         } else {
                 /* It's a leaf.  Move records.  */
                 union xfs_btree_rec     *lrp;   /* left record pointer */
@@ -2463,27 +2742,23 @@ __xfs_btree_split(
                 lrp = xfs_btree_rec_addr(cur, src_index, left);
                 rrp = xfs_btree_rec_addr(cur, 1, right);
  
+               /* Copy records to the new block. */
                 xfs_btree_copy_recs(cur, rrp, lrp, rrecs);
                 xfs_btree_log_recs(cur, rbp, 1, rrecs);
  
-               cur->bc_ops->init_key_from_rec(key,
-                       xfs_btree_rec_addr(cur, 1, right));
+               /* Stash the keys of the new block for later insertion. */
+               xfs_btree_get_leaf_keys(cur, right, key);
         }
  
-
         /*
          * Find the left block number by looking in the buffer.
-        * Adjust numrecs, sibling pointers.
+        * Adjust sibling pointers.
          */
         xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB);
         xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB);
         xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
         xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
  
-       lrecs -= rrecs;
-       xfs_btree_set_numrecs(left, lrecs);
-       xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs);
-
         xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS);
         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
  
@@ -2499,6 +2774,14 @@ __xfs_btree_split(
                 xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB);
                 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
         }
+
+       /* Update the parent high keys of the left block, if needed. */
+       if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
+               error = xfs_btree_update_keys(cur, level);
+               if (error)
+                       goto error0;
+       }
+
         /*
          * If the cursor is really in the right block, move it there.
          * If it's just pointing past the last entry in left, then we'll
@@ -2802,6 +3085,7 @@ xfs_btree_new_root(
                 bp = lbp;
                 nptr = 2;
         }
+
         /* Fill in the new block's btree header and log it. */
         xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2);
         xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);
@@ -2810,19 +3094,24 @@ xfs_btree_new_root(
  
         /* Fill in the key data in the new root. */
         if (xfs_btree_get_level(left) > 0) {
-               xfs_btree_copy_keys(cur,
-                               xfs_btree_key_addr(cur, 1, new),
-                               xfs_btree_key_addr(cur, 1, left), 1);
-               xfs_btree_copy_keys(cur,
-                               xfs_btree_key_addr(cur, 2, new),
-                               xfs_btree_key_addr(cur, 1, right), 1);
+               /*
+                * Get the keys for the left block's keys and put them directly
+                * in the parent block.  Do the same for the right block.
+                */
+               xfs_btree_get_node_keys(cur, left,
+                               xfs_btree_key_addr(cur, 1, new));
+               xfs_btree_get_node_keys(cur, right,
+                               xfs_btree_key_addr(cur, 2, new));
         } else {
-               cur->bc_ops->init_key_from_rec(
-                               xfs_btree_key_addr(cur, 1, new),
-                               xfs_btree_rec_addr(cur, 1, left));
-               cur->bc_ops->init_key_from_rec(
-                               xfs_btree_key_addr(cur, 2, new),
-                               xfs_btree_rec_addr(cur, 1, right));
+               /*
+                * Get the keys for the left block's records and put them
+                * directly in the parent block.  Do the same for the right
+                * block.
+                */
+               xfs_btree_get_leaf_keys(cur, left,
+                       xfs_btree_key_addr(cur, 1, new));
+               xfs_btree_get_leaf_keys(cur, right,
+                       xfs_btree_key_addr(cur, 2, new));
         }
         xfs_btree_log_keys(cur, nbp, 1, 2);
  
@@ -2858,10 +3147,9 @@ xfs_btree_make_block_unfull(
         int                     *index, /* new tree index */
         union xfs_btree_ptr     *nptr,  /* new btree ptr */
         struct xfs_btree_cur    **ncur, /* new btree cursor */
-       union xfs_btree_rec     *nrec,  /* new record */
+       union xfs_btree_key     *key,   /* key of new block */
         int                     *stat)
  {
-       union xfs_btree_key     key;    /* new btree key value */
         int                     error = 0;
  
         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
@@ -2871,6 +3159,7 @@ xfs_btree_make_block_unfull(
                 if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {
                         /* A root block that can be made bigger. */
                         xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork);
+                       *stat = 1;
                 } else {
                         /* A root block that needs replacing */
                         int     logflags = 0;
@@ -2906,13 +3195,12 @@ xfs_btree_make_block_unfull(
          * If this works we have to re-set our variables because we
          * could be in a different block now.
          */
-       error = xfs_btree_split(cur, level, nptr, &key, ncur, stat);
+       error = xfs_btree_split(cur, level, nptr, key, ncur, stat);
         if (error || *stat == 0)
                 return error;
  
  
         *index = cur->bc_ptrs[level];
-       cur->bc_ops->init_rec_from_key(&key, nrec);
         return 0;
  }
  
@@ -2925,16 +3213,17 @@ xfs_btree_insrec(
         struct xfs_btree_cur    *cur,   /* btree cursor */
         int                     level,  /* level to insert record at */
         union xfs_btree_ptr     *ptrp,  /* i/o: block number inserted */
-       union xfs_btree_rec     *recp,  /* i/o: record data inserted */
+       union xfs_btree_rec     *rec,   /* record to insert */
+       union xfs_btree_key     *key,   /* i/o: block key for ptrp */
         struct xfs_btree_cur    **curp, /* output: new cursor replacing cur */
         int                     *stat)  /* success/failure */
  {
         struct xfs_btree_block  *block; /* btree block */
         struct xfs_buf          *bp;    /* buffer for block */
-       union xfs_btree_key     key;    /* btree key */
         union xfs_btree_ptr     nptr;   /* new block ptr */
         struct xfs_btree_cur    *ncur;  /* new btree cursor */
-       union xfs_btree_rec     nrec;   /* new record count */
+       union xfs_btree_bigkey  nkey;   /* new block key */
+       union xfs_btree_key     *lkey;
         int                     optr;   /* old key/record index */
         int                     ptr;    /* key/record index */
         int                     numrecs;/* number of records */
@@ -2942,11 +3231,13 @@ xfs_btree_insrec(
  #ifdef DEBUG
         int                     i;
  #endif
+       xfs_daddr_t             old_bn;
  
         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-       XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, recp);
+       XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, &rec);
  
         ncur = NULL;
+       lkey = (union xfs_btree_key *)&nkey;
  
         /*
          * If we have an external root pointer, and we've made it to the
@@ -2969,15 +3260,13 @@ xfs_btree_insrec(
                 return 0;
         }
  
-       /* Make a key out of the record data to be inserted, and save it. */
-       cur->bc_ops->init_key_from_rec(&key, recp);
-
         optr = ptr;
  
         XFS_BTREE_STATS_INC(cur, insrec);
  
         /* Get pointers to the btree buffer and block. */
         block = xfs_btree_get_block(cur, level, &bp);
+       old_bn = bp ? bp->b_bn : XFS_BUF_DADDR_NULL;
         numrecs = xfs_btree_get_numrecs(block);
  
  #ifdef DEBUG
@@ -2988,10 +3277,10 @@ xfs_btree_insrec(
         /* Check that the new entry is being inserted in the right place. */
         if (ptr <= numrecs) {
                 if (level == 0) {
-                       ASSERT(cur->bc_ops->recs_inorder(cur, recp,
+                       ASSERT(cur->bc_ops->recs_inorder(cur, rec,
                                 xfs_btree_rec_addr(cur, ptr, block)));
                 } else {
-                       ASSERT(cur->bc_ops->keys_inorder(cur, &key,
+                       ASSERT(cur->bc_ops->keys_inorder(cur, key,
                                 xfs_btree_key_addr(cur, ptr, block)));
                 }
         }
@@ -3004,7 +3293,7 @@ xfs_btree_insrec(
         xfs_btree_set_ptr_null(cur, &nptr);
         if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) {
                 error = xfs_btree_make_block_unfull(cur, level, numrecs,
-                                       &optr, &ptr, &nptr, &ncur, &nrec, stat);
+                                       &optr, &ptr, &nptr, &ncur, lkey, stat);
                 if (error || *stat == 0)
                         goto error0;
         }
@@ -3054,7 +3343,7 @@ xfs_btree_insrec(
  #endif
  
                 /* Now put the new data in, bump numrecs and log it. */
-               xfs_btree_copy_keys(cur, kp, &key, 1);
+               xfs_btree_copy_keys(cur, kp, key, 1);
                 xfs_btree_copy_ptrs(cur, pp, ptrp, 1);
                 numrecs++;
                 xfs_btree_set_numrecs(block, numrecs);
@@ -3075,7 +3364,7 @@ xfs_btree_insrec(
                 xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1);
  
                 /* Now put the new data in, bump numrecs and log it. */
-               xfs_btree_copy_recs(cur, rp, recp, 1);
+               xfs_btree_copy_recs(cur, rp, rec, 1);
                 xfs_btree_set_numrecs(block, ++numrecs);
                 xfs_btree_log_recs(cur, bp, ptr, numrecs);
  #ifdef DEBUG
@@ -3089,9 +3378,18 @@ xfs_btree_insrec(
         /* Log the new number of records in the btree header. */
         xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
  
-       /* If we inserted at the start of a block, update the parents' keys. */
-       if (optr == 1) {
-               error = xfs_btree_updkey(cur, &key, level + 1);
+       /*
+        * If we just inserted into a new tree block, we have to
+        * recalculate nkey here because nkey is out of date.
+        *
+        * Otherwise we're just updating an existing block (having shoved
+        * some records into the new tree block), so use the regular key
+        * update mechanism.
+        */
+       if (bp && bp->b_bn != old_bn) {
+               xfs_btree_get_keys(cur, block, lkey);
+       } else if (xfs_btree_needs_key_update(cur, optr)) {
+               error = xfs_btree_update_keys(cur, level);
                 if (error)
                         goto error0;
         }
@@ -3101,7 +3399,7 @@ xfs_btree_insrec(
          * we are at the far right edge of the tree, update it.
          */
         if (xfs_btree_is_lastrec(cur, block, level)) {
-               cur->bc_ops->update_lastrec(cur, block, recp,
+               cur->bc_ops->update_lastrec(cur, block, rec,
                                             ptr, LASTREC_INSREC);
         }
  
@@ -3111,7 +3409,7 @@ xfs_btree_insrec(
          */
         *ptrp = nptr;
         if (!xfs_btree_ptr_is_null(cur, &nptr)) {
-               *recp = nrec;
+               xfs_btree_copy_keys(cur, key, lkey, 1);
                 *curp = ncur;
         }
  
@@ -3142,14 +3440,20 @@ xfs_btree_insert(
         union xfs_btree_ptr     nptr;   /* new block number (split result) */
         struct xfs_btree_cur    *ncur;  /* new cursor (split result) */
         struct xfs_btree_cur    *pcur;  /* previous level's cursor */
+       union xfs_btree_bigkey  bkey;   /* key of block to insert */
+       union xfs_btree_key     *key;
         union xfs_btree_rec     rec;    /* record to insert */
  
         level = 0;
         ncur = NULL;
         pcur = cur;
+       key = (union xfs_btree_key *)&bkey;
  
         xfs_btree_set_ptr_null(cur, &nptr);
+
+       /* Make a key out of the record data to be inserted, and save it. */
         cur->bc_ops->init_rec_from_cur(cur, &rec);
+       cur->bc_ops->init_key_from_rec(key, &rec);
  
         /*
          * Loop going up the tree, starting at the leaf level.
@@ -3161,7 +3465,8 @@ xfs_btree_insert(
                  * Insert nrec/nptr into this level of the tree.
                  * Note if we fail, nptr will be null.
                  */
-               error = xfs_btree_insrec(pcur, level, &nptr, &rec, &ncur, &i);
+               error = xfs_btree_insrec(pcur, level, &nptr, &rec, key,
+                               &ncur, &i);
                 if (error) {
                         if (pcur != cur)
                                 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
@@ -3385,8 +3690,6 @@ xfs_btree_delrec(
         struct xfs_buf          *bp;            /* buffer for block */
         int                     error;          /* error return value */
         int                     i;              /* loop counter */
-       union xfs_btree_key     key;            /* storage for keyp */
-       union xfs_btree_key     *keyp = &key;   /* passed to the next level */
         union xfs_btree_ptr     lptr;           /* left sibling block ptr */
         struct xfs_buf          *lbp;           /* left buffer pointer */
         struct xfs_btree_block  *left;          /* left btree block */
@@ -3457,13 +3760,6 @@ xfs_btree_delrec(
                         xfs_btree_log_keys(cur, bp, ptr, numrecs - 1);
                         xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1);
                 }
-
-               /*
-                * If it's the first record in the block, we'll need to pass a
-                * key up to the next level (updkey).
-                */
-               if (ptr == 1)
-                       keyp = xfs_btree_key_addr(cur, 1, block);
         } else {
                 /* It's a leaf. operate on records */
                 if (ptr < numrecs) {
@@ -3472,16 +3768,6 @@ xfs_btree_delrec(
                                 -1, numrecs - ptr);
                         xfs_btree_log_recs(cur, bp, ptr, numrecs - 1);
                 }
-
-               /*
-                * If it's the first record in the block, we'll need a key
-                * structure to pass up to the next level (updkey).
-                */
-               if (ptr == 1) {
-                       cur->bc_ops->init_key_from_rec(&key,
-                                       xfs_btree_rec_addr(cur, 1, block));
-                       keyp = &key;
-               }
         }
  
         /*
@@ -3548,8 +3834,8 @@ xfs_btree_delrec(
          * If we deleted the leftmost entry in the block, update the
          * key values above us in the tree.
          */
-       if (ptr == 1) {
-               error = xfs_btree_updkey(cur, keyp, level + 1);
+       if (xfs_btree_needs_key_update(cur, ptr)) {
+               error = xfs_btree_update_keys(cur, level);
                 if (error)
                         goto error0;
         }
@@ -3878,6 +4164,16 @@ xfs_btree_delrec(
         if (level > 0)
                 cur->bc_ptrs[level]--;
  
+       /*
+        * We combined blocks, so we have to update the parent keys if the
+        * btree supports overlapped intervals.  However, bc_ptrs[level + 1]
+        * points to the old block so that the caller knows which record to
+        * delete.  Therefore, the caller must be savvy enough to call updkeys
+        * for us if we return stat == 2.  The other exit points from this
+        * function don't require deletions further up the tree, so they can
+        * call updkeys directly.
+        */
+
         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
         /* Return value means the next level up has something to do. */
         *stat = 2;
@@ -3903,6 +4199,7 @@ xfs_btree_delete(
         int                     error;  /* error return value */
         int                     level;
         int                     i;
+       bool                    joined = false;
  
         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
  
@@ -3916,6 +4213,18 @@ xfs_btree_delete(
                 error = xfs_btree_delrec(cur, level, &i);
                 if (error)
                         goto error0;
+               if (i == 2)
+                       joined = true;
+       }
+
+       /*
+        * If we combined blocks as part of deleting the record, delrec won't
+        * have updated the parent high keys so we have to do that here.
+        */
+       if (joined && (cur->bc_flags & XFS_BTREE_OVERLAPPING)) {
+               error = xfs_btree_updkeys_force(cur, 0);
+               if (error)
+                       goto error0;
         }
  
         if (i == 0) {
@@ -3978,6 +4287,81 @@ xfs_btree_get_rec(
         return 0;
  }
  
+/* Visit a block in a btree. */
+STATIC int
+xfs_btree_visit_block(
+       struct xfs_btree_cur            *cur,
+       int                             level,
+       xfs_btree_visit_blocks_fn       fn,
+       void                            *data)
+{
+       struct xfs_btree_block          *block;
+       struct xfs_buf                  *bp;
+       union xfs_btree_ptr             rptr;
+       int                             error;
+
+       /* do right sibling readahead */
+       xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
+       block = xfs_btree_get_block(cur, level, &bp);
+
+       /* process the block */
+       error = fn(cur, level, data);
+       if (error)
+               return error;
+
+       /* now read rh sibling block for next iteration */
+       xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
+       if (xfs_btree_ptr_is_null(cur, &rptr))
+               return -ENOENT;
+
+       return xfs_btree_lookup_get_block(cur, level, &rptr, &block);
+}
+
+
+/* Visit every block in a btree. */
+int
+xfs_btree_visit_blocks(
+       struct xfs_btree_cur            *cur,
+       xfs_btree_visit_blocks_fn       fn,
+       void                            *data)
+{
+       union xfs_btree_ptr             lptr;
+       int                             level;
+       struct xfs_btree_block          *block = NULL;
+       int                             error = 0;
+
+       cur->bc_ops->init_ptr_from_cur(cur, &lptr);
+
+       /* for each level */
+       for (level = cur->bc_nlevels - 1; level >= 0; level--) {
+               /* grab the left hand block */
+               error = xfs_btree_lookup_get_block(cur, level, &lptr, &block);
+               if (error)
+                       return error;
+
+               /* readahead the left most block for the next level down */
+               if (level > 0) {
+                       union xfs_btree_ptr     *ptr;
+
+                       ptr = xfs_btree_ptr_addr(cur, 1, block);
+                       xfs_btree_readahead_ptr(cur, ptr, 1);
+
+                       /* save for the next iteration of the loop */
+                       lptr = *ptr;
+               }
+
+               /* for each buffer in the level */
+               do {
+                       error = xfs_btree_visit_block(cur, level, fn, data);
+               } while (!error);
+
+               if (error != -ENOENT)
+                       return error;
+       }
+
+       return 0;
+}
+
  /*
   * Change the owner of a btree.
   *
@@ -4002,26 +4386,27 @@ xfs_btree_get_rec(
   * just queue the modified buffer as delayed write buffer so the transaction
   * recovery completion writes the changes to disk.
   */
+struct xfs_btree_block_change_owner_info {
+       __uint64_t              new_owner;
+       struct list_head        *buffer_list;
+};
+
  static int
  xfs_btree_block_change_owner(
         struct xfs_btree_cur    *cur,
         int                     level,
-       __uint64_t              new_owner,
-       struct list_head        *buffer_list)
+       void                    *data)
  {
+       struct xfs_btree_block_change_owner_info        *bbcoi = data;
         struct xfs_btree_block  *block;
         struct xfs_buf          *bp;
-       union xfs_btree_ptr     rptr;
-
-       /* do right sibling readahead */
-       xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
  
         /* modify the owner */
         block = xfs_btree_get_block(cur, level, &bp);
         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
-               block->bb_u.l.bb_owner = cpu_to_be64(new_owner);
+               block->bb_u.l.bb_owner = cpu_to_be64(bbcoi->new_owner);
         else
-               block->bb_u.s.bb_owner = cpu_to_be32(new_owner);
+               block->bb_u.s.bb_owner = cpu_to_be32(bbcoi->new_owner);
  
         /*
          * If the block is a root block hosted in an inode, we might not have a
@@ -4035,19 +4420,14 @@ xfs_btree_block_change_owner(
                         xfs_trans_ordered_buf(cur->bc_tp, bp);
                         xfs_btree_log_block(cur, bp, XFS_BB_OWNER);
                 } else {
-                       xfs_buf_delwri_queue(bp, buffer_list);
+                       xfs_buf_delwri_queue(bp, bbcoi->buffer_list);
                 }
         } else {
                 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
                 ASSERT(level == cur->bc_nlevels - 1);
         }
  
-       /* now read rh sibling block for next iteration */
-       xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
-       if (xfs_btree_ptr_is_null(cur, &rptr))
-               return -ENOENT;
-
-       return xfs_btree_lookup_get_block(cur, level, &rptr, &block);
+       return 0;
  }
  
  int
@@ -4056,43 +4436,13 @@ xfs_btree_change_owner(
         __uint64_t              new_owner,
         struct list_head        *buffer_list)
  {
-       union xfs_btree_ptr     lptr;
-       int                     level;
-       struct xfs_btree_block  *block = NULL;
-       int                     error = 0;
-
-       cur->bc_ops->init_ptr_from_cur(cur, &lptr);
-
-       /* for each level */
-       for (level = cur->bc_nlevels - 1; level >= 0; level--) {
-               /* grab the left hand block */
-               error = xfs_btree_lookup_get_block(cur, level, &lptr, &block);
-               if (error)
-                       return error;
-
-               /* readahead the left most block for the next level down */
-               if (level > 0) {
-                       union xfs_btree_ptr     *ptr;
-
-                       ptr = xfs_btree_ptr_addr(cur, 1, block);
-                       xfs_btree_readahead_ptr(cur, ptr, 1);
-
-                       /* save for the next iteration of the loop */
-                       lptr = *ptr;
-               }
-
-               /* for each buffer in the level */
-               do {
-                       error = xfs_btree_block_change_owner(cur, level,
-                                                            new_owner,
-                                                            buffer_list);
-               } while (!error);
+       struct xfs_btree_block_change_owner_info        bbcoi;
  
-               if (error != -ENOENT)
-                       return error;
-       }
+       bbcoi.new_owner = new_owner;
+       bbcoi.buffer_list = buffer_list;
  
-       return 0;
+       return xfs_btree_visit_blocks(cur, xfs_btree_block_change_owner,
+                       &bbcoi);
  }
  
  /**
@@ -4171,3 +4521,267 @@ xfs_btree_compute_maxlevels(
                 maxblocks = (maxblocks + limits[1] - 1) / limits[1];
         return level;
  }
+
+/*
+ * Query a regular btree for all records overlapping a given interval.
+ * Start with a LE lookup of the key of low_rec and return all records
+ * until we find a record with a key greater than the key of high_rec.
+ */
+STATIC int
+xfs_btree_simple_query_range(
+       struct xfs_btree_cur            *cur,
+       union xfs_btree_key             *low_key,
+       union xfs_btree_key             *high_key,
+       xfs_btree_query_range_fn        fn,
+       void                            *priv)
+{
+       union xfs_btree_rec             *recp;
+       union xfs_btree_key             rec_key;
+       __int64_t                       diff;
+       int                             stat;
+       bool                            firstrec = true;
+       int                             error;
+
+       ASSERT(cur->bc_ops->init_high_key_from_rec);
+       ASSERT(cur->bc_ops->diff_two_keys);
+
+       /*
+        * Find the leftmost record.  The btree cursor must be set
+        * to the low record used to generate low_key.
+        */
+       stat = 0;
+       error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, &stat);
+       if (error)
+               goto out;
+
+       while (stat) {
+               /* Find the record. */
+               error = xfs_btree_get_rec(cur, &recp, &stat);
+               if (error || !stat)
+                       break;
+               cur->bc_ops->init_high_key_from_rec(&rec_key, recp);
+
+               /* Skip if high_key(rec) < low_key. */
+               if (firstrec) {
+                       firstrec = false;
+                       diff = cur->bc_ops->diff_two_keys(cur, low_key,
+                                       &rec_key);
+                       if (diff > 0)
+                               goto advloop;
+               }
+
+               /* Stop if high_key < low_key(rec). */
+               diff = cur->bc_ops->diff_two_keys(cur, &rec_key, high_key);
+               if (diff > 0)
+                       break;
+
+               /* Callback */
+               error = fn(cur, recp, priv);
+               if (error < 0 || error == XFS_BTREE_QUERY_RANGE_ABORT)
+                       break;
+
+advloop:
+               /* Move on to the next record. */
+               error = xfs_btree_increment(cur, 0, &stat);
+               if (error)
+                       break;
+       }
+
+out:
+       return error;
+}
+
+/*
+ * Query an overlapped interval btree for all records overlapping a given
+ * interval.  This function roughly follows the algorithm given in
+ * "Interval Trees" of _Introduction to Algorithms_, which is section
+ * 14.3 in the 2nd and 3rd editions.
+ *
+ * First, generate keys for the low and high records passed in.
+ *
+ * For any leaf node, generate the high and low keys for the record.
+ * If the record keys overlap with the query low/high keys, pass the
+ * record to the function iterator.
+ *
+ * For any internal node, compare the low and high keys of each
+ * pointer against the query low/high keys.  If there's an overlap,
+ * follow the pointer.
+ *
+ * As an optimization, we stop scanning a block when we find a low key
+ * that is greater than the query's high key.
+ */
+STATIC int
+xfs_btree_overlapped_query_range(
+       struct xfs_btree_cur            *cur,
+       union xfs_btree_key             *low_key,
+       union xfs_btree_key             *high_key,
+       xfs_btree_query_range_fn        fn,
+       void                            *priv)
+{
+       union xfs_btree_ptr             ptr;
+       union xfs_btree_ptr             *pp;
+       union xfs_btree_key             rec_key;
+       union xfs_btree_key             rec_hkey;
+       union xfs_btree_key             *lkp;
+       union xfs_btree_key             *hkp;
+       union xfs_btree_rec             *recp;
+       struct xfs_btree_block          *block;
+       __int64_t                       ldiff;
+       __int64_t                       hdiff;
+       int                             level;
+       struct xfs_buf                  *bp;
+       int                             i;
+       int                             error;
+
+       /* Load the root of the btree. */
+       level = cur->bc_nlevels - 1;
+       cur->bc_ops->init_ptr_from_cur(cur, &ptr);
+       error = xfs_btree_lookup_get_block(cur, level, &ptr, &block);
+       if (error)
+               return error;
+       xfs_btree_get_block(cur, level, &bp);
+       trace_xfs_btree_overlapped_query_range(cur, level, bp);
+#ifdef DEBUG
+       error = xfs_btree_check_block(cur, block, level, bp);
+       if (error)
+               goto out;
+#endif
+       cur->bc_ptrs[level] = 1;
+
+       while (level < cur->bc_nlevels) {
+               block = xfs_btree_get_block(cur, level, &bp);
+
+               /* End of node, pop back towards the root. */
+               if (cur->bc_ptrs[level] > be16_to_cpu(block->bb_numrecs)) {
+pop_up:
+                       if (level < cur->bc_nlevels - 1)
+                               cur->bc_ptrs[level + 1]++;
+                       level++;
+                       continue;
+               }
+
+               if (level == 0) {
+                       /* Handle a leaf node. */
+                       recp = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block);
+
+                       cur->bc_ops->init_high_key_from_rec(&rec_hkey, recp);
+                       ldiff = cur->bc_ops->diff_two_keys(cur, &rec_hkey,
+                                       low_key);
+
+                       cur->bc_ops->init_key_from_rec(&rec_key, recp);
+                       hdiff = cur->bc_ops->diff_two_keys(cur, high_key,
+                                       &rec_key);
+
+                       /*
+                        * If (record's high key >= query's low key) and
+                        *    (query's high key >= record's low key), then
+                        * this record overlaps the query range; callback.
+                        */
+                       if (ldiff >= 0 && hdiff >= 0) {
+                               error = fn(cur, recp, priv);
+                               if (error < 0 ||
+                                   error == XFS_BTREE_QUERY_RANGE_ABORT)
+                                       break;
+                       } else if (hdiff < 0) {
+                               /* Record is larger than high key; pop. */
+                               goto pop_up;
+                       }
+                       cur->bc_ptrs[level]++;
+                       continue;
+               }
+
+               /* Handle an internal node. */
+               lkp = xfs_btree_key_addr(cur, cur->bc_ptrs[level], block);
+               hkp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level], block);
+               pp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[level], block);
+
+               ldiff = cur->bc_ops->diff_two_keys(cur, hkp, low_key);
+               hdiff = cur->bc_ops->diff_two_keys(cur, high_key, lkp);
+
+               /*
+                * If (pointer's high key >= query's low key) and
+                *    (query's high key >= pointer's low key), then
+                * this record overlaps the query range; follow pointer.
+                */
+               if (ldiff >= 0 && hdiff >= 0) {
+                       level--;
+                       error = xfs_btree_lookup_get_block(cur, level, pp,
+                                       &block);
+                       if (error)
+                               goto out;
+                       xfs_btree_get_block(cur, level, &bp);
+                       trace_xfs_btree_overlapped_query_range(cur, level, bp);
+#ifdef DEBUG
+                       error = xfs_btree_check_block(cur, block, level, bp);
+                       if (error)
+                               goto out;
+#endif
+                       cur->bc_ptrs[level] = 1;
+                       continue;
+               } else if (hdiff < 0) {
+                       /* The low key is larger than the upper range; pop. */
+                       goto pop_up;
+               }
+               cur->bc_ptrs[level]++;
+       }
+
+out:
+       /*
+        * If we don't end this function with the cursor pointing at a record
+        * block, a subsequent non-error cursor deletion will not release
+        * node-level buffers, causing a buffer leak.  This is quite possible
+        * with a zero-results range query, so release the buffers if we
+        * failed to return any results.
+        */
+       if (cur->bc_bufs[0] == NULL) {
+               for (i = 0; i < cur->bc_nlevels; i++) {
+                       if (cur->bc_bufs[i]) {
+                               xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]);
+                               cur->bc_bufs[i] = NULL;
+                               cur->bc_ptrs[i] = 0;
+                               cur->bc_ra[i] = 0;
+                       }
+               }
+       }
+
+       return error;
+}
+
+/*
+ * Query a btree for all records overlapping a given interval of keys.  The
+ * supplied function will be called with each record found; return one of the
+ * XFS_BTREE_QUERY_RANGE_{CONTINUE,ABORT} values or the usual negative error
+ * code.  This function returns XFS_BTREE_QUERY_RANGE_ABORT, zero, or a
+ * negative error code.
+ */
+int
+xfs_btree_query_range(
+       struct xfs_btree_cur            *cur,
+       union xfs_btree_irec            *low_rec,
+       union xfs_btree_irec            *high_rec,
+       xfs_btree_query_range_fn        fn,
+       void                            *priv)
+{
+       union xfs_btree_rec             rec;
+       union xfs_btree_key             low_key;
+       union xfs_btree_key             high_key;
+
+       /* Find the keys of both ends of the interval. */
+       cur->bc_rec = *high_rec;
+       cur->bc_ops->init_rec_from_cur(cur, &rec);
+       cur->bc_ops->init_key_from_rec(&high_key, &rec);
+
+       cur->bc_rec = *low_rec;
+       cur->bc_ops->init_rec_from_cur(cur, &rec);
+       cur->bc_ops->init_key_from_rec(&low_key, &rec);
+
+       /* Enforce low key < high key. */
+       if (cur->bc_ops->diff_two_keys(cur, &low_key, &high_key) > 0)
+               return -EINVAL;
+
+       if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
+               return xfs_btree_simple_query_range(cur, &low_key,
+                               &high_key, fn, priv);
+       return xfs_btree_overlapped_query_range(cur, &low_key, &high_key,
+                       fn, priv);
+}
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h

index 785a996821591c89e9a74cea413789225c7ba890..04d0865e5e6dc0b3f5c30ac9f8a31f243f65baa2 100644 (file)
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -19,7 +19,7 @@
  #define        __XFS_BTREE_H__
  
  struct xfs_buf;
-struct xfs_bmap_free;
+struct xfs_defer_ops;
  struct xfs_inode;
  struct xfs_mount;
  struct xfs_trans;
@@ -38,17 +38,37 @@ union xfs_btree_ptr {
  };
  
  union xfs_btree_key {
-       xfs_bmbt_key_t          bmbt;
-       xfs_bmdr_key_t          bmbr;   /* bmbt root block */
-       xfs_alloc_key_t         alloc;
-       xfs_inobt_key_t         inobt;
+       struct xfs_bmbt_key             bmbt;
+       xfs_bmdr_key_t                  bmbr;   /* bmbt root block */
+       xfs_alloc_key_t                 alloc;
+       struct xfs_inobt_key            inobt;
+       struct xfs_rmap_key             rmap;
+};
+
+/*
+ * In-core key that holds both low and high keys for overlapped btrees.
+ * The two keys are packed next to each other on disk, so do the same
+ * in memory.  Preserve the existing xfs_btree_key as a single key to
+ * avoid the mental model breakage that would happen if we passed a
+ * bigkey into a function that operates on a single key.
+ */
+union xfs_btree_bigkey {
+       struct xfs_bmbt_key             bmbt;
+       xfs_bmdr_key_t                  bmbr;   /* bmbt root block */
+       xfs_alloc_key_t                 alloc;
+       struct xfs_inobt_key            inobt;
+       struct {
+               struct xfs_rmap_key     rmap;
+               struct xfs_rmap_key     rmap_hi;
+       };
  };
  
  union xfs_btree_rec {
-       xfs_bmbt_rec_t          bmbt;
-       xfs_bmdr_rec_t          bmbr;   /* bmbt root block */
-       xfs_alloc_rec_t         alloc;
-       xfs_inobt_rec_t         inobt;
+       struct xfs_bmbt_rec             bmbt;
+       xfs_bmdr_rec_t                  bmbr;   /* bmbt root block */
+       struct xfs_alloc_rec            alloc;
+       struct xfs_inobt_rec            inobt;
+       struct xfs_rmap_rec             rmap;
  };
  
  /*
@@ -63,6 +83,7 @@ union xfs_btree_rec {
  #define        XFS_BTNUM_BMAP  ((xfs_btnum_t)XFS_BTNUM_BMAPi)
  #define        XFS_BTNUM_INO   ((xfs_btnum_t)XFS_BTNUM_INOi)
  #define        XFS_BTNUM_FINO  ((xfs_btnum_t)XFS_BTNUM_FINOi)
+#define        XFS_BTNUM_RMAP  ((xfs_btnum_t)XFS_BTNUM_RMAPi)
  
  /*
   * For logging record fields.
@@ -95,6 +116,7 @@ do {    \
         case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(__mp, bmbt, stat); break; \
         case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(__mp, ibt, stat); break; \
         case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(__mp, fibt, stat); break; \
+       case XFS_BTNUM_RMAP: __XFS_BTREE_STATS_INC(__mp, rmap, stat); break; \
         case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break;       \
         }       \
  } while (0)
@@ -115,11 +137,13 @@ do {    \
                 __XFS_BTREE_STATS_ADD(__mp, ibt, stat, val); break; \
         case XFS_BTNUM_FINO:    \
                 __XFS_BTREE_STATS_ADD(__mp, fibt, stat, val); break; \
+       case XFS_BTNUM_RMAP:    \
+               __XFS_BTREE_STATS_ADD(__mp, rmap, stat, val); break; \
         case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
         }       \
  } while (0)
  
-#define        XFS_BTREE_MAXLEVELS     8       /* max of all btrees */
+#define        XFS_BTREE_MAXLEVELS     9       /* max of all btrees */
  
  struct xfs_btree_ops {
         /* size of the key and record structures */
@@ -158,17 +182,25 @@ struct xfs_btree_ops {
         /* init values of btree structures */
         void    (*init_key_from_rec)(union xfs_btree_key *key,
                                      union xfs_btree_rec *rec);
-       void    (*init_rec_from_key)(union xfs_btree_key *key,
-                                    union xfs_btree_rec *rec);
         void    (*init_rec_from_cur)(struct xfs_btree_cur *cur,
                                      union xfs_btree_rec *rec);
         void    (*init_ptr_from_cur)(struct xfs_btree_cur *cur,
                                      union xfs_btree_ptr *ptr);
+       void    (*init_high_key_from_rec)(union xfs_btree_key *key,
+                                         union xfs_btree_rec *rec);
  
         /* difference between key value and cursor value */
         __int64_t (*key_diff)(struct xfs_btree_cur *cur,
                               union xfs_btree_key *key);
  
+       /*
+        * Difference between key2 and key1 -- positive if key1 > key2,
+        * negative if key1 < key2, and zero if equal.
+        */
+       __int64_t (*diff_two_keys)(struct xfs_btree_cur *cur,
+                                  union xfs_btree_key *key1,
+                                  union xfs_btree_key *key2);
+
         const struct xfs_buf_ops        *buf_ops;
  
  #if defined(DEBUG) || defined(XFS_WARN)
@@ -192,6 +224,13 @@ struct xfs_btree_ops {
  #define LASTREC_DELREC 2
  
  
+union xfs_btree_irec {
+       struct xfs_alloc_rec_incore     a;
+       struct xfs_bmbt_irec            b;
+       struct xfs_inobt_rec_incore     i;
+       struct xfs_rmap_irec            r;
+};
+
  /*
   * Btree cursor structure.
   * This collects all information needed by the btree code in one place.
@@ -202,11 +241,7 @@ typedef struct xfs_btree_cur
         struct xfs_mount        *bc_mp; /* file system mount struct */
         const struct xfs_btree_ops *bc_ops;
         uint                    bc_flags; /* btree features - below */
-       union {
-               xfs_alloc_rec_incore_t  a;
-               xfs_bmbt_irec_t         b;
-               xfs_inobt_rec_incore_t  i;
-       }               bc_rec;         /* current insert/search record value */
+       union xfs_btree_irec    bc_rec; /* current insert/search record value */
         struct xfs_buf  *bc_bufs[XFS_BTREE_MAXLEVELS];  /* buf ptr per level */
         int             bc_ptrs[XFS_BTREE_MAXLEVELS];   /* key/record # */
         __uint8_t       bc_ra[XFS_BTREE_MAXLEVELS];     /* readahead bits */
@@ -218,11 +253,12 @@ typedef struct xfs_btree_cur
         union {
                 struct {                        /* needed for BNO, CNT, INO */
                         struct xfs_buf  *agbp;  /* agf/agi buffer pointer */
+                       struct xfs_defer_ops *dfops;    /* deferred updates */
                         xfs_agnumber_t  agno;   /* ag number */
                 } a;
                 struct {                        /* needed for BMAP */
                         struct xfs_inode *ip;   /* pointer to our inode */
-                       struct xfs_bmap_free *flist;    /* list to free after */
+                       struct xfs_defer_ops *dfops;    /* deferred updates */
                         xfs_fsblock_t   firstblock;     /* 1st blk allocated */
                         int             allocated;      /* count of alloced */
                         short           forksize;       /* fork's inode space */
@@ -238,6 +274,7 @@ typedef struct xfs_btree_cur
  #define XFS_BTREE_ROOT_IN_INODE                (1<<1)  /* root may be variable size */
  #define XFS_BTREE_LASTREC_UPDATE       (1<<2)  /* track last rec externally */
  #define XFS_BTREE_CRC_BLOCKS           (1<<3)  /* uses extended btree blocks */
+#define XFS_BTREE_OVERLAPPING          (1<<4)  /* overlapping intervals */
  
  
  #define        XFS_BTREE_NOERROR       0
@@ -477,4 +514,19 @@ bool xfs_btree_sblock_verify(struct xfs_buf *bp, unsigned int max_recs);
  uint xfs_btree_compute_maxlevels(struct xfs_mount *mp, uint *limits,
                                  unsigned long len);
  
+/* return codes */
+#define XFS_BTREE_QUERY_RANGE_CONTINUE 0       /* keep iterating */
+#define XFS_BTREE_QUERY_RANGE_ABORT    1       /* stop iterating */
+typedef int (*xfs_btree_query_range_fn)(struct xfs_btree_cur *cur,
+               union xfs_btree_rec *rec, void *priv);
+
+int xfs_btree_query_range(struct xfs_btree_cur *cur,
+               union xfs_btree_irec *low_rec, union xfs_btree_irec *high_rec,
+               xfs_btree_query_range_fn fn, void *priv);
+
+typedef int (*xfs_btree_visit_blocks_fn)(struct xfs_btree_cur *cur, int level,
+               void *data);
+int xfs_btree_visit_blocks(struct xfs_btree_cur *cur,
+               xfs_btree_visit_blocks_fn fn, void *data);
+
  #endif /* __XFS_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c

index 0f1f165f404864dc31e4a43b56124610ab6b57cc..f2dc1a950c85c691aac4dd00d3c0c640fc8f1543 100644 (file)
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -2029,7 +2029,7 @@ xfs_da_grow_inode_int(
         error = xfs_bmapi_write(tp, dp, *bno, count,
                         xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG,
                         args->firstblock, args->total, &map, &nmap,
-                       args->flist);
+                       args->dfops);
         if (error)
                 return error;
  
@@ -2052,7 +2052,7 @@ xfs_da_grow_inode_int(
                         error = xfs_bmapi_write(tp, dp, b, c,
                                         xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA,
                                         args->firstblock, args->total,
-                                       &mapp[mapi], &nmap, args->flist);
+                                       &mapp[mapi], &nmap, args->dfops);
                         if (error)
                                 goto out_free_map;
                         if (nmap < 1)
@@ -2362,7 +2362,7 @@ xfs_da_shrink_inode(
                  */
                 error = xfs_bunmapi(tp, dp, dead_blkno, count,
                                     xfs_bmapi_aflag(w), 0, args->firstblock,
-                                   args->flist, &done);
+                                   args->dfops, &done);
                 if (error == -ENOSPC) {
                         if (w != XFS_DATA_FORK)
                                 break;
diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h

index 6e153e399a7759ea5d73270c7e63645221f7026b..98c75cbe6ac2ec6063bf381cead3cea002a87dff 100644 (file)
--- a/fs/xfs/libxfs/xfs_da_btree.h
+++ b/fs/xfs/libxfs/xfs_da_btree.h
@@ -19,7 +19,7 @@
  #ifndef __XFS_DA_BTREE_H__
  #define        __XFS_DA_BTREE_H__
  
-struct xfs_bmap_free;
+struct xfs_defer_ops;
  struct xfs_inode;
  struct xfs_trans;
  struct zone;
@@ -70,7 +70,7 @@ typedef struct xfs_da_args {
         xfs_ino_t       inumber;        /* input/output inode number */
         struct xfs_inode *dp;           /* directory inode to manipulate */
         xfs_fsblock_t   *firstblock;    /* ptr to firstblock for bmap calls */
-       struct xfs_bmap_free *flist;    /* ptr to freelist for bmap_finish */
+       struct xfs_defer_ops *dfops;    /* ptr to freelist for bmap_finish */
         struct xfs_trans *trans;        /* current trans (changes over time) */
         xfs_extlen_t    total;          /* total blocks needed, for 1st bmap */
         int             whichfork;      /* data or attribute fork */
diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h

index 685f23b670568ea3df1f2944d70ac6e3ea2cc238..9a492a9e19bd0af9e52532ebb99ca7ec114b2f5d 100644 (file)
--- a/fs/xfs/libxfs/xfs_da_format.h
+++ b/fs/xfs/libxfs/xfs_da_format.h
@@ -629,6 +629,7 @@ typedef struct xfs_attr_shortform {
         struct xfs_attr_sf_hdr {        /* constant-structure header block */
                 __be16  totsize;        /* total bytes in shortform list */
                 __u8    count;  /* count of active entries */
+               __u8    padding;
         } hdr;
         struct xfs_attr_sf_entry {
                 __uint8_t namelen;      /* actual length of name (no NULL) */
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c

new file mode 100644 (file)

index 0000000..054a203
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -0,0 +1,463 @@
+/*
+ * Copyright (C) 2016 Oracle.  All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_bit.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_trans.h"
+#include "xfs_trace.h"
+
+/*
+ * Deferred Operations in XFS
+ *
+ * Due to the way locking rules work in XFS, certain transactions (block
+ * mapping and unmapping, typically) have permanent reservations so that
+ * we can roll the transaction to adhere to AG locking order rules and
+ * to unlock buffers between metadata updates.  Prior to rmap/reflink,
+ * the mapping code had a mechanism to perform these deferrals for
+ * extents that were going to be freed; this code makes that facility
+ * more generic.
+ *
+ * When adding the reverse mapping and reflink features, it became
+ * necessary to perform complex remapping multi-transactions to comply
+ * with AG locking order rules, and to be able to spread a single
+ * refcount update operation (an operation on an n-block extent can
+ * update as many as n records!) among multiple transactions.  XFS can
+ * roll a transaction to facilitate this, but using this facility
+ * requires us to log "intent" items in case log recovery needs to
+ * redo the operation, and to log "done" items to indicate that redo
+ * is not necessary.
+ *
+ * Deferred work is tracked in xfs_defer_pending items.  Each pending
+ * item tracks one type of deferred work.  Incoming work items (which
+ * have not yet had an intent logged) are attached to a pending item
+ * on the dop_intake list, where they wait for the caller to finish
+ * the deferred operations.
+ *
+ * Finishing a set of deferred operations is an involved process.  To
+ * start, we define "rolling a deferred-op transaction" as follows:
+ *
+ * > For each xfs_defer_pending item on the dop_intake list,
+ *   - Sort the work items in AG order.  XFS locking
+ *     order rules require us to lock buffers in AG order.
+ *   - Create a log intent item for that type.
+ *   - Attach it to the pending item.
+ *   - Move the pending item from the dop_intake list to the
+ *     dop_pending list.
+ * > Roll the transaction.
+ *
+ * NOTE: To avoid exceeding the transaction reservation, we limit the
+ * number of items that we attach to a given xfs_defer_pending.
+ *
+ * The actual finishing process looks like this:
+ *
+ * > For each xfs_defer_pending in the dop_pending list,
+ *   - Roll the deferred-op transaction as above.
+ *   - Create a log done item for that type, and attach it to the
+ *     log intent item.
+ *   - For each work item attached to the log intent item,
+ *     * Perform the described action.
+ *     * Attach the work item to the log done item.
+ *
+ * The key here is that we must log an intent item for all pending
+ * work items every time we roll the transaction, and that we must log
+ * a done item as soon as the work is completed.  With this mechanism
+ * we can perform complex remapping operations, chaining intent items
+ * as needed.
+ *
+ * This is an example of remapping the extent (E, E+B) into file X at
+ * offset A and dealing with the extent (C, C+B) already being mapped
+ * there:
+ * +-------------------------------------------------+
+ * | Unmap file X startblock C offset A length B     | t0
+ * | Intent to reduce refcount for extent (C, B)     |
+ * | Intent to remove rmap (X, C, A, B)              |
+ * | Intent to free extent (D, 1) (bmbt block)       |
+ * | Intent to map (X, A, B) at startblock E         |
+ * +-------------------------------------------------+
+ * | Map file X startblock E offset A length B       | t1
+ * | Done mapping (X, E, A, B)                       |
+ * | Intent to increase refcount for extent (E, B)   |
+ * | Intent to add rmap (X, E, A, B)                 |
+ * +-------------------------------------------------+
+ * | Reduce refcount for extent (C, B)               | t2
+ * | Done reducing refcount for extent (C, B)        |
+ * | Increase refcount for extent (E, B)             |
+ * | Done increasing refcount for extent (E, B)      |
+ * | Intent to free extent (C, B)                    |
+ * | Intent to free extent (F, 1) (refcountbt block) |
+ * | Intent to remove rmap (F, 1, REFC)              |
+ * +-------------------------------------------------+
+ * | Remove rmap (X, C, A, B)                        | t3
+ * | Done removing rmap (X, C, A, B)                 |
+ * | Add rmap (X, E, A, B)                           |
+ * | Done adding rmap (X, E, A, B)                   |
+ * | Remove rmap (F, 1, REFC)                        |
+ * | Done removing rmap (F, 1, REFC)                 |
+ * +-------------------------------------------------+
+ * | Free extent (C, B)                              | t4
+ * | Done freeing extent (C, B)                      |
+ * | Free extent (D, 1)                              |
+ * | Done freeing extent (D, 1)                      |
+ * | Free extent (F, 1)                              |
+ * | Done freeing extent (F, 1)                      |
+ * +-------------------------------------------------+
+ *
+ * If we should crash before t2 commits, log recovery replays
+ * the following intent items:
+ *
+ * - Intent to reduce refcount for extent (C, B)
+ * - Intent to remove rmap (X, C, A, B)
+ * - Intent to free extent (D, 1) (bmbt block)
+ * - Intent to increase refcount for extent (E, B)
+ * - Intent to add rmap (X, E, A, B)
+ *
+ * In the process of recovering, it should also generate and take care
+ * of these intent items:
+ *
+ * - Intent to free extent (C, B)
+ * - Intent to free extent (F, 1) (refcountbt block)
+ * - Intent to remove rmap (F, 1, REFC)
+ */
+
+static const struct xfs_defer_op_type *defer_op_types[XFS_DEFER_OPS_TYPE_MAX];
+
+/*
+ * For each pending item in the intake list, log its intent item and the
+ * associated extents, then add the entire intake list to the end of
+ * the pending list.
+ */
+STATIC void
+xfs_defer_intake_work(
+       struct xfs_trans                *tp,
+       struct xfs_defer_ops            *dop)
+{
+       struct list_head                *li;
+       struct xfs_defer_pending        *dfp;
+
+       list_for_each_entry(dfp, &dop->dop_intake, dfp_list) {
+               trace_xfs_defer_intake_work(tp->t_mountp, dfp);
+               dfp->dfp_intent = dfp->dfp_type->create_intent(tp,
+                               dfp->dfp_count);
+               list_sort(tp->t_mountp, &dfp->dfp_work,
+                               dfp->dfp_type->diff_items);
+               list_for_each(li, &dfp->dfp_work)
+                       dfp->dfp_type->log_item(tp, dfp->dfp_intent, li);
+       }
+
+       list_splice_tail_init(&dop->dop_intake, &dop->dop_pending);
+}
+
+/* Abort all the intents that were committed. */
+STATIC void
+xfs_defer_trans_abort(
+       struct xfs_trans                *tp,
+       struct xfs_defer_ops            *dop,
+       int                             error)
+{
+       struct xfs_defer_pending        *dfp;
+
+       trace_xfs_defer_trans_abort(tp->t_mountp, dop);
+       /*
+        * If the transaction was committed, drop the intent reference
+        * since we're bailing out of here. The other reference is
+        * dropped when the intent hits the AIL.  If the transaction
+        * was not committed, the intent is freed by the intent item
+        * unlock handler on abort.
+        */
+       if (!dop->dop_committed)
+               return;
+
+       /* Abort intent items. */
+       list_for_each_entry(dfp, &dop->dop_pending, dfp_list) {
+               trace_xfs_defer_pending_abort(tp->t_mountp, dfp);
+               if (dfp->dfp_committed)
+                       dfp->dfp_type->abort_intent(dfp->dfp_intent);
+       }
+
+       /* Shut down FS. */
+       xfs_force_shutdown(tp->t_mountp, (error == -EFSCORRUPTED) ?
+                       SHUTDOWN_CORRUPT_INCORE : SHUTDOWN_META_IO_ERROR);
+}
+
+/* Roll a transaction so we can do some deferred op processing. */
+STATIC int
+xfs_defer_trans_roll(
+       struct xfs_trans                **tp,
+       struct xfs_defer_ops            *dop,
+       struct xfs_inode                *ip)
+{
+       int                             i;
+       int                             error;
+
+       /* Log all the joined inodes except the one we passed in. */
+       for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++) {
+               if (dop->dop_inodes[i] == ip)
+                       continue;
+               xfs_trans_log_inode(*tp, dop->dop_inodes[i], XFS_ILOG_CORE);
+       }
+
+       trace_xfs_defer_trans_roll((*tp)->t_mountp, dop);
+
+       /* Roll the transaction. */
+       error = xfs_trans_roll(tp, ip);
+       if (error) {
+               trace_xfs_defer_trans_roll_error((*tp)->t_mountp, dop, error);
+               xfs_defer_trans_abort(*tp, dop, error);
+               return error;
+       }
+       dop->dop_committed = true;
+
+       /* Rejoin the joined inodes except the one we passed in. */
+       for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++) {
+               if (dop->dop_inodes[i] == ip)
+                       continue;
+               xfs_trans_ijoin(*tp, dop->dop_inodes[i], 0);
+       }
+
+       return error;
+}
+
+/* Do we have any work items to finish? */
+bool
+xfs_defer_has_unfinished_work(
+       struct xfs_defer_ops            *dop)
+{
+       return !list_empty(&dop->dop_pending) || !list_empty(&dop->dop_intake);
+}
+
+/*
+ * Add this inode to the deferred op.  Each joined inode is relogged
+ * each time we roll the transaction, in addition to any inode passed
+ * to xfs_defer_finish().
+ */
+int
+xfs_defer_join(
+       struct xfs_defer_ops            *dop,
+       struct xfs_inode                *ip)
+{
+       int                             i;
+
+       for (i = 0; i < XFS_DEFER_OPS_NR_INODES; i++) {
+               if (dop->dop_inodes[i] == ip)
+                       return 0;
+               else if (dop->dop_inodes[i] == NULL) {
+                       dop->dop_inodes[i] = ip;
+                       return 0;
+               }
+       }
+
+       return -EFSCORRUPTED;
+}
+
+/*
+ * Finish all the pending work.  This involves logging intent items for
+ * any work items that wandered in since the last transaction roll (if
+ * one has even happened), rolling the transaction, and finishing the
+ * work items in the first item on the logged-and-pending list.
+ *
+ * If an inode is provided, relog it to the new transaction.
+ */
+int
+xfs_defer_finish(
+       struct xfs_trans                **tp,
+       struct xfs_defer_ops            *dop,
+       struct xfs_inode                *ip)
+{
+       struct xfs_defer_pending        *dfp;
+       struct list_head                *li;
+       struct list_head                *n;
+       void                            *done_item = NULL;
+       void                            *state;
+       int                             error = 0;
+       void                            (*cleanup_fn)(struct xfs_trans *, void *, int);
+
+       ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
+
+       trace_xfs_defer_finish((*tp)->t_mountp, dop);
+
+       /* Until we run out of pending work to finish... */
+       while (xfs_defer_has_unfinished_work(dop)) {
+               /* Log intents for work items sitting in the intake. */
+               xfs_defer_intake_work(*tp, dop);
+
+               /* Roll the transaction. */
+               error = xfs_defer_trans_roll(tp, dop, ip);
+               if (error)
+                       goto out;
+
+               /* Mark all pending intents as committed. */
+               list_for_each_entry_reverse(dfp, &dop->dop_pending, dfp_list) {
+                       if (dfp->dfp_committed)
+                               break;
+                       trace_xfs_defer_pending_commit((*tp)->t_mountp, dfp);
+                       dfp->dfp_committed = true;
+               }
+
+               /* Log an intent-done item for the first pending item. */
+               dfp = list_first_entry(&dop->dop_pending,
+                               struct xfs_defer_pending, dfp_list);
+               trace_xfs_defer_pending_finish((*tp)->t_mountp, dfp);
+               done_item = dfp->dfp_type->create_done(*tp, dfp->dfp_intent,
+                               dfp->dfp_count);
+               cleanup_fn = dfp->dfp_type->finish_cleanup;
+
+               /* Finish the work items. */
+               state = NULL;
+               list_for_each_safe(li, n, &dfp->dfp_work) {
+                       list_del(li);
+                       dfp->dfp_count--;
+                       error = dfp->dfp_type->finish_item(*tp, dop, li,
+                                       done_item, &state);
+                       if (error) {
+                               /*
+                                * Clean up after ourselves and jump out.
+                                * xfs_defer_cancel will take care of freeing
+                                * all these lists and stuff.
+                                */
+                               if (cleanup_fn)
+                                       cleanup_fn(*tp, state, error);
+                               xfs_defer_trans_abort(*tp, dop, error);
+                               goto out;
+                       }
+               }
+               /* Done with the dfp, free it. */
+               list_del(&dfp->dfp_list);
+               kmem_free(dfp);
+
+               if (cleanup_fn)
+                       cleanup_fn(*tp, state, error);
+       }
+
+out:
+       if (error)
+               trace_xfs_defer_finish_error((*tp)->t_mountp, dop, error);
+       else
+               trace_xfs_defer_finish_done((*tp)->t_mountp, dop);
+       return error;
+}
+
+/*
+ * Free up any items left in the list.
+ */
+void
+xfs_defer_cancel(
+       struct xfs_defer_ops            *dop)
+{
+       struct xfs_defer_pending        *dfp;
+       struct xfs_defer_pending        *pli;
+       struct list_head                *pwi;
+       struct list_head                *n;
+
+       trace_xfs_defer_cancel(NULL, dop);
+
+       /*
+        * Free the pending items.  Caller should already have arranged
+        * for the intent items to be released.
+        */
+       list_for_each_entry_safe(dfp, pli, &dop->dop_intake, dfp_list) {
+               trace_xfs_defer_intake_cancel(NULL, dfp);
+               list_del(&dfp->dfp_list);
+               list_for_each_safe(pwi, n, &dfp->dfp_work) {
+                       list_del(pwi);
+                       dfp->dfp_count--;
+                       dfp->dfp_type->cancel_item(pwi);
+               }
+               ASSERT(dfp->dfp_count == 0);
+               kmem_free(dfp);
+       }
+       list_for_each_entry_safe(dfp, pli, &dop->dop_pending, dfp_list) {
+               trace_xfs_defer_pending_cancel(NULL, dfp);
+               list_del(&dfp->dfp_list);
+               list_for_each_safe(pwi, n, &dfp->dfp_work) {
+                       list_del(pwi);
+                       dfp->dfp_count--;
+                       dfp->dfp_type->cancel_item(pwi);
+               }
+               ASSERT(dfp->dfp_count == 0);
+               kmem_free(dfp);
+       }
+}
+
+/* Add an item for later deferred processing. */
+void
+xfs_defer_add(
+       struct xfs_defer_ops            *dop,
+       enum xfs_defer_ops_type         type,
+       struct list_head                *li)
+{
+       struct xfs_defer_pending        *dfp = NULL;
+
+       /*
+        * Add the item to a pending item at the end of the intake list.
+        * If the last pending item has the same type, reuse it.  Else,
+        * create a new pending item at the end of the intake list.
+        */
+       if (!list_empty(&dop->dop_intake)) {
+               dfp = list_last_entry(&dop->dop_intake,
+                               struct xfs_defer_pending, dfp_list);
+               if (dfp->dfp_type->type != type ||
+                   (dfp->dfp_type->max_items &&
+                    dfp->dfp_count >= dfp->dfp_type->max_items))
+                       dfp = NULL;
+       }
+       if (!dfp) {
+               dfp = kmem_alloc(sizeof(struct xfs_defer_pending),
+                               KM_SLEEP | KM_NOFS);
+               dfp->dfp_type = defer_op_types[type];
+               dfp->dfp_committed = false;
+               dfp->dfp_intent = NULL;
+               dfp->dfp_count = 0;
+               INIT_LIST_HEAD(&dfp->dfp_work);
+               list_add_tail(&dfp->dfp_list, &dop->dop_intake);
+       }
+
+       list_add_tail(li, &dfp->dfp_work);
+       dfp->dfp_count++;
+}
+
+/* Initialize a deferred operation list. */
+void
+xfs_defer_init_op_type(
+       const struct xfs_defer_op_type  *type)
+{
+       defer_op_types[type->type] = type;
+}
+
+/* Initialize a deferred operation. */
+void
+xfs_defer_init(
+       struct xfs_defer_ops            *dop,
+       xfs_fsblock_t                   *fbp)
+{
+       dop->dop_committed = false;
+       dop->dop_low = false;
+       memset(&dop->dop_inodes, 0, sizeof(dop->dop_inodes));
+       *fbp = NULLFSBLOCK;
+       INIT_LIST_HEAD(&dop->dop_intake);
+       INIT_LIST_HEAD(&dop->dop_pending);
+       trace_xfs_defer_init(NULL, dop);
+}
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h

new file mode 100644 (file)

index 0000000..cc3981c
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2016 Oracle.  All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#ifndef __XFS_DEFER_H__
+#define        __XFS_DEFER_H__
+
+struct xfs_defer_op_type;
+
+/*
+ * Save a log intent item and a list of extents, so that we can replay
+ * whatever action had to happen to the extent list and file the log done
+ * item.
+ */
+struct xfs_defer_pending {
+       const struct xfs_defer_op_type  *dfp_type;      /* function pointers */
+       struct list_head                dfp_list;       /* pending items */
+       bool                            dfp_committed;  /* committed trans? */
+       void                            *dfp_intent;    /* log intent item */
+       struct list_head                dfp_work;       /* work items */
+       unsigned int                    dfp_count;      /* # extent items */
+};
+
+/*
+ * Header for deferred operation list.
+ *
+ * dop_low is used by the allocator to activate the lowspace algorithm -
+ * when free space is running low the extent allocator may choose to
+ * allocate an extent from an AG without leaving sufficient space for
+ * a btree split when inserting the new extent.  In this case the allocator
+ * will enable the lowspace algorithm which is supposed to allow further
+ * allocations (such as btree splits and newroots) to allocate from
+ * sequential AGs.  In order to avoid locking AGs out of order the lowspace
+ * algorithm will start searching for free space from AG 0.  If the correct
+ * transaction reservations have been made then this algorithm will eventually
+ * find all the space it needs.
+ */
+enum xfs_defer_ops_type {
+       XFS_DEFER_OPS_TYPE_RMAP,
+       XFS_DEFER_OPS_TYPE_FREE,
+       XFS_DEFER_OPS_TYPE_MAX,
+};
+
+#define XFS_DEFER_OPS_NR_INODES        2       /* join up to two inodes */
+
+struct xfs_defer_ops {
+       bool                    dop_committed;  /* did any trans commit? */
+       bool                    dop_low;        /* alloc in low mode */
+       struct list_head        dop_intake;     /* unlogged pending work */
+       struct list_head        dop_pending;    /* logged pending work */
+
+       /* relog these inodes with each roll */
+       struct xfs_inode        *dop_inodes[XFS_DEFER_OPS_NR_INODES];
+};
+
+void xfs_defer_add(struct xfs_defer_ops *dop, enum xfs_defer_ops_type type,
+               struct list_head *h);
+int xfs_defer_finish(struct xfs_trans **tp, struct xfs_defer_ops *dop,
+               struct xfs_inode *ip);
+void xfs_defer_cancel(struct xfs_defer_ops *dop);
+void xfs_defer_init(struct xfs_defer_ops *dop, xfs_fsblock_t *fbp);
+bool xfs_defer_has_unfinished_work(struct xfs_defer_ops *dop);
+int xfs_defer_join(struct xfs_defer_ops *dop, struct xfs_inode *ip);
+
+/* Description of a deferred type. */
+struct xfs_defer_op_type {
+       enum xfs_defer_ops_type type;
+       unsigned int            max_items;
+       void (*abort_intent)(void *);
+       void *(*create_done)(struct xfs_trans *, void *, unsigned int);
+       int (*finish_item)(struct xfs_trans *, struct xfs_defer_ops *,
+                       struct list_head *, void *, void **);
+       void (*finish_cleanup)(struct xfs_trans *, void *, int);
+       void (*cancel_item)(struct list_head *);
+       int (*diff_items)(void *, struct list_head *, struct list_head *);
+       void *(*create_intent)(struct xfs_trans *, uint);
+       void (*log_item)(struct xfs_trans *, void *, struct list_head *);
+};
+
+void xfs_defer_init_op_type(const struct xfs_defer_op_type *type);
+
+#endif /* __XFS_DEFER_H__ */
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c

index af0f9d171f8a012758d778a0bd105e51448e5cf3..20a96dd5af7eb6d4ebbaf07a1f6a1e4b10e7f981 100644 (file)
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -21,6 +21,7 @@
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
+#include "xfs_defer.h"
  #include "xfs_da_format.h"
  #include "xfs_da_btree.h"
  #include "xfs_inode.h"
@@ -259,7 +260,7 @@ xfs_dir_createname(
         struct xfs_name         *name,
         xfs_ino_t               inum,           /* new entry inode number */
         xfs_fsblock_t           *first,         /* bmap's firstblock */
-       xfs_bmap_free_t         *flist,         /* bmap's freeblock list */
+       struct xfs_defer_ops    *dfops,         /* bmap's freeblock list */
         xfs_extlen_t            total)          /* bmap's total block count */
  {
         struct xfs_da_args      *args;
@@ -286,7 +287,7 @@ xfs_dir_createname(
         args->inumber = inum;
         args->dp = dp;
         args->firstblock = first;
-       args->flist = flist;
+       args->dfops = dfops;
         args->total = total;
         args->whichfork = XFS_DATA_FORK;
         args->trans = tp;
@@ -436,7 +437,7 @@ xfs_dir_removename(
         struct xfs_name *name,
         xfs_ino_t       ino,
         xfs_fsblock_t   *first,         /* bmap's firstblock */
-       xfs_bmap_free_t *flist,         /* bmap's freeblock list */
+       struct xfs_defer_ops    *dfops,         /* bmap's freeblock list */
         xfs_extlen_t    total)          /* bmap's total block count */
  {
         struct xfs_da_args *args;
@@ -458,7 +459,7 @@ xfs_dir_removename(
         args->inumber = ino;
         args->dp = dp;
         args->firstblock = first;
-       args->flist = flist;
+       args->dfops = dfops;
         args->total = total;
         args->whichfork = XFS_DATA_FORK;
         args->trans = tp;
@@ -498,7 +499,7 @@ xfs_dir_replace(
         struct xfs_name *name,          /* name of entry to replace */
         xfs_ino_t       inum,           /* new inode number */
         xfs_fsblock_t   *first,         /* bmap's firstblock */
-       xfs_bmap_free_t *flist,         /* bmap's freeblock list */
+       struct xfs_defer_ops    *dfops,         /* bmap's freeblock list */
         xfs_extlen_t    total)          /* bmap's total block count */
  {
         struct xfs_da_args *args;
@@ -523,7 +524,7 @@ xfs_dir_replace(
         args->inumber = inum;
         args->dp = dp;
         args->firstblock = first;
-       args->flist = flist;
+       args->dfops = dfops;
         args->total = total;
         args->whichfork = XFS_DATA_FORK;
         args->trans = tp;
@@ -680,7 +681,7 @@ xfs_dir2_shrink_inode(
  
         /* Unmap the fsblock(s). */
         error = xfs_bunmapi(tp, dp, da, args->geo->fsbcount, 0, 0,
-                           args->firstblock, args->flist, &done);
+                           args->firstblock, args->dfops, &done);
         if (error) {
                 /*
                  * ENOSPC actually can happen if we're in a removename with no
diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h

index e55353651f5b8678c14b0c22cbe3da1faae29548..becc926c3e3d900db0a021dd091e46e828fa6f09 100644 (file)
--- a/fs/xfs/libxfs/xfs_dir2.h
+++ b/fs/xfs/libxfs/xfs_dir2.h
@@ -18,7 +18,7 @@
  #ifndef __XFS_DIR2_H__
  #define __XFS_DIR2_H__
  
-struct xfs_bmap_free;
+struct xfs_defer_ops;
  struct xfs_da_args;
  struct xfs_inode;
  struct xfs_mount;
@@ -129,18 +129,18 @@ extern int xfs_dir_init(struct xfs_trans *tp, struct xfs_inode *dp,
  extern int xfs_dir_createname(struct xfs_trans *tp, struct xfs_inode *dp,
                                 struct xfs_name *name, xfs_ino_t inum,
                                 xfs_fsblock_t *first,
-                               struct xfs_bmap_free *flist, xfs_extlen_t tot);
+                               struct xfs_defer_ops *dfops, xfs_extlen_t tot);
  extern int xfs_dir_lookup(struct xfs_trans *tp, struct xfs_inode *dp,
                                 struct xfs_name *name, xfs_ino_t *inum,
                                 struct xfs_name *ci_name);
  extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp,
                                 struct xfs_name *name, xfs_ino_t ino,
                                 xfs_fsblock_t *first,
-                               struct xfs_bmap_free *flist, xfs_extlen_t tot);
+                               struct xfs_defer_ops *dfops, xfs_extlen_t tot);
  extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp,
                                 struct xfs_name *name, xfs_ino_t inum,
                                 xfs_fsblock_t *first,
-                               struct xfs_bmap_free *flist, xfs_extlen_t tot);
+                               struct xfs_defer_ops *dfops, xfs_extlen_t tot);
  extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp,
                                 struct xfs_name *name);
  
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h

index adb204d40f2246c181051566e385463f077e15ca..f814d42c73b2fb7484dd76ad024f63891de155f0 100644 (file)
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -455,8 +455,10 @@ xfs_sb_has_compat_feature(
  }
  
  #define XFS_SB_FEAT_RO_COMPAT_FINOBT   (1 << 0)                /* free inode btree */
+#define XFS_SB_FEAT_RO_COMPAT_RMAPBT   (1 << 1)                /* reverse map btree */
  #define XFS_SB_FEAT_RO_COMPAT_ALL \
-               (XFS_SB_FEAT_RO_COMPAT_FINOBT)
+               (XFS_SB_FEAT_RO_COMPAT_FINOBT | \
+                XFS_SB_FEAT_RO_COMPAT_RMAPBT)
  #define XFS_SB_FEAT_RO_COMPAT_UNKNOWN  ~XFS_SB_FEAT_RO_COMPAT_ALL
  static inline bool
  xfs_sb_has_ro_compat_feature(
@@ -538,6 +540,12 @@ static inline bool xfs_sb_version_hasmetauuid(struct xfs_sb *sbp)
                 (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_META_UUID);
  }
  
+static inline bool xfs_sb_version_hasrmapbt(struct xfs_sb *sbp)
+{
+       return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) &&
+               (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_RMAPBT);
+}
+
  /*
   * end of superblock version macros
   */
@@ -598,10 +606,10 @@ xfs_is_quota_inode(struct xfs_sb *sbp, xfs_ino_t ino)
  #define        XFS_AGI_GOOD_VERSION(v) ((v) == XFS_AGI_VERSION)
  
  /*
- * Btree number 0 is bno, 1 is cnt.  This value gives the size of the
+ * Btree number 0 is bno, 1 is cnt, 2 is rmap. This value gives the size of the
   * arrays below.
   */
-#define        XFS_BTNUM_AGF   ((int)XFS_BTNUM_CNTi + 1)
+#define        XFS_BTNUM_AGF   ((int)XFS_BTNUM_RMAPi + 1)
  
  /*
   * The second word of agf_levels in the first a.g. overlaps the EFS
@@ -618,12 +626,10 @@ typedef struct xfs_agf {
         __be32          agf_seqno;      /* sequence # starting from 0 */
         __be32          agf_length;     /* size in blocks of a.g. */
         /*
-        * Freespace information
+        * Freespace and rmap information
          */
         __be32          agf_roots[XFS_BTNUM_AGF];       /* root blocks */
-       __be32          agf_spare0;     /* spare field */
         __be32          agf_levels[XFS_BTNUM_AGF];      /* btree levels */
-       __be32          agf_spare1;     /* spare field */
  
         __be32          agf_flfirst;    /* first freelist block's index */
         __be32          agf_fllast;     /* last freelist block's index */
@@ -1308,17 +1314,118 @@ typedef __be32 xfs_inobt_ptr_t;
  #define        XFS_FIBT_BLOCK(mp)              ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1))
  
  /*
- * The first data block of an AG depends on whether the filesystem was formatted
- * with the finobt feature. If so, account for the finobt reserved root btree
- * block.
+ * Reverse mapping btree format definitions
+ *
+ * There is a btree for the reverse map per allocation group
+ */
+#define        XFS_RMAP_CRC_MAGIC      0x524d4233      /* 'RMB3' */
+
+/*
+ * Ownership info for an extent.  This is used to create reverse-mapping
+ * entries.
   */
-#define XFS_PREALLOC_BLOCKS(mp) \
+#define XFS_OWNER_INFO_ATTR_FORK       (1 << 0)
+#define XFS_OWNER_INFO_BMBT_BLOCK      (1 << 1)
+struct xfs_owner_info {
+       uint64_t                oi_owner;
+       xfs_fileoff_t           oi_offset;
+       unsigned int            oi_flags;
+};
+
+/*
+ * Special owner types.
+ *
+ * Seeing as we only support up to 8EB, we have the upper bit of the owner field
+ * to tell us we have a special owner value. We use these for static metadata
+ * allocated at mkfs/growfs time, as well as for freespace management metadata.
+ */
+#define XFS_RMAP_OWN_NULL      (-1ULL) /* No owner, for growfs */
+#define XFS_RMAP_OWN_UNKNOWN   (-2ULL) /* Unknown owner, for EFI recovery */
+#define XFS_RMAP_OWN_FS                (-3ULL) /* static fs metadata */
+#define XFS_RMAP_OWN_LOG       (-4ULL) /* static fs metadata */
+#define XFS_RMAP_OWN_AG                (-5ULL) /* AG freespace btree blocks */
+#define XFS_RMAP_OWN_INOBT     (-6ULL) /* Inode btree blocks */
+#define XFS_RMAP_OWN_INODES    (-7ULL) /* Inode chunk */
+#define XFS_RMAP_OWN_MIN       (-8ULL) /* guard */
+
+#define XFS_RMAP_NON_INODE_OWNER(owner)        (!!((owner) & (1ULL << 63)))
+
+/*
+ * Data record structure
+ */
+struct xfs_rmap_rec {
+       __be32          rm_startblock;  /* extent start block */
+       __be32          rm_blockcount;  /* extent length */
+       __be64          rm_owner;       /* extent owner */
+       __be64          rm_offset;      /* offset within the owner */
+};
+
+/*
+ * rmap btree record
+ *  rm_offset:63 is the attribute fork flag
+ *  rm_offset:62 is the bmbt block flag
+ *  rm_offset:61 is the unwritten extent flag (same as l0:63 in bmbt)
+ *  rm_offset:54-60 aren't used and should be zero
+ *  rm_offset:0-53 is the block offset within the inode
+ */
+#define XFS_RMAP_OFF_ATTR_FORK ((__uint64_t)1ULL << 63)
+#define XFS_RMAP_OFF_BMBT_BLOCK        ((__uint64_t)1ULL << 62)
+#define XFS_RMAP_OFF_UNWRITTEN ((__uint64_t)1ULL << 61)
+
+#define XFS_RMAP_LEN_MAX       ((__uint32_t)~0U)
+#define XFS_RMAP_OFF_FLAGS     (XFS_RMAP_OFF_ATTR_FORK | \
+                                XFS_RMAP_OFF_BMBT_BLOCK | \
+                                XFS_RMAP_OFF_UNWRITTEN)
+#define XFS_RMAP_OFF_MASK      ((__uint64_t)0x3FFFFFFFFFFFFFULL)
+
+#define XFS_RMAP_OFF(off)              ((off) & XFS_RMAP_OFF_MASK)
+
+#define XFS_RMAP_IS_BMBT_BLOCK(off)    (!!((off) & XFS_RMAP_OFF_BMBT_BLOCK))
+#define XFS_RMAP_IS_ATTR_FORK(off)     (!!((off) & XFS_RMAP_OFF_ATTR_FORK))
+#define XFS_RMAP_IS_UNWRITTEN(len)     (!!((off) & XFS_RMAP_OFF_UNWRITTEN))
+
+#define RMAPBT_STARTBLOCK_BITLEN       32
+#define RMAPBT_BLOCKCOUNT_BITLEN       32
+#define RMAPBT_OWNER_BITLEN            64
+#define RMAPBT_ATTRFLAG_BITLEN         1
+#define RMAPBT_BMBTFLAG_BITLEN         1
+#define RMAPBT_EXNTFLAG_BITLEN         1
+#define RMAPBT_UNUSED_OFFSET_BITLEN    7
+#define RMAPBT_OFFSET_BITLEN           54
+
+#define XFS_RMAP_ATTR_FORK             (1 << 0)
+#define XFS_RMAP_BMBT_BLOCK            (1 << 1)
+#define XFS_RMAP_UNWRITTEN             (1 << 2)
+#define XFS_RMAP_KEY_FLAGS             (XFS_RMAP_ATTR_FORK | \
+                                        XFS_RMAP_BMBT_BLOCK)
+#define XFS_RMAP_REC_FLAGS             (XFS_RMAP_UNWRITTEN)
+struct xfs_rmap_irec {
+       xfs_agblock_t   rm_startblock;  /* extent start block */
+       xfs_extlen_t    rm_blockcount;  /* extent length */
+       __uint64_t      rm_owner;       /* extent owner */
+       __uint64_t      rm_offset;      /* offset within the owner */
+       unsigned int    rm_flags;       /* state flags */
+};
+
+/*
+ * Key structure
+ *
+ * We don't use the length for lookups
+ */
+struct xfs_rmap_key {
+       __be32          rm_startblock;  /* extent start block */
+       __be64          rm_owner;       /* extent owner */
+       __be64          rm_offset;      /* offset within the owner */
+} __attribute__((packed));
+
+/* btree pointer type */
+typedef __be32 xfs_rmap_ptr_t;
+
+#define        XFS_RMAP_BLOCK(mp) \
         (xfs_sb_version_hasfinobt(&((mp)->m_sb)) ? \
          XFS_FIBT_BLOCK(mp) + 1 : \
          XFS_IBT_BLOCK(mp) + 1)
  
-
-
  /*
   * BMAP Btree format definitions
   *
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h

index f5ec9c5ccae6ace2873c0f1c44db1816baf0847b..79455058b752588e7855afde61ad59ba4ce3186f 100644 (file)
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -206,6 +206,7 @@ typedef struct xfs_fsop_resblks {
  #define XFS_FSOP_GEOM_FLAGS_FTYPE      0x10000 /* inode directory types */
  #define XFS_FSOP_GEOM_FLAGS_FINOBT     0x20000 /* free inode btree */
  #define XFS_FSOP_GEOM_FLAGS_SPINODES   0x40000 /* sparse inode chunks  */
+#define XFS_FSOP_GEOM_FLAGS_RMAPBT     0x80000 /* Reverse mapping btree */
  
  /*
   * Minimum and maximum sizes need for growth checks.
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c

index 4b1e408169a83de0c03825a8167bc8a678dd7124..51b4e0de1fdc424e13f039adf98ae2789a27ba74 100644 (file)
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -24,6 +24,7 @@
  #include "xfs_bit.h"
  #include "xfs_sb.h"
  #include "xfs_mount.h"
+#include "xfs_defer.h"
  #include "xfs_inode.h"
  #include "xfs_btree.h"
  #include "xfs_ialloc.h"
@@ -39,6 +40,7 @@
  #include "xfs_icache.h"
  #include "xfs_trace.h"
  #include "xfs_log.h"
+#include "xfs_rmap.h"
  
  
  /*
@@ -614,6 +616,7 @@ xfs_ialloc_ag_alloc(
         args.tp = tp;
         args.mp = tp->t_mountp;
         args.fsbno = NULLFSBLOCK;
+       xfs_rmap_ag_owner(&args.oinfo, XFS_RMAP_OWN_INODES);
  
  #ifdef DEBUG
         /* randomly do sparse inode allocations */
@@ -1817,19 +1820,21 @@ xfs_difree_inode_chunk(
         struct xfs_mount                *mp,
         xfs_agnumber_t                  agno,
         struct xfs_inobt_rec_incore     *rec,
-       struct xfs_bmap_free            *flist)
+       struct xfs_defer_ops            *dfops)
  {
         xfs_agblock_t   sagbno = XFS_AGINO_TO_AGBNO(mp, rec->ir_startino);
         int             startidx, endidx;
         int             nextbit;
         xfs_agblock_t   agbno;
         int             contigblk;
+       struct xfs_owner_info   oinfo;
         DECLARE_BITMAP(holemask, XFS_INOBT_HOLEMASK_BITS);
+       xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES);
  
         if (!xfs_inobt_issparse(rec->ir_holemask)) {
                 /* not sparse, calculate extent info directly */
-               xfs_bmap_add_free(mp, flist, XFS_AGB_TO_FSB(mp, agno, sagbno),
-                                 mp->m_ialloc_blks);
+               xfs_bmap_add_free(mp, dfops, XFS_AGB_TO_FSB(mp, agno, sagbno),
+                                 mp->m_ialloc_blks, &oinfo);
                 return;
         }
  
@@ -1872,8 +1877,8 @@ xfs_difree_inode_chunk(
  
                 ASSERT(agbno % mp->m_sb.sb_spino_align == 0);
                 ASSERT(contigblk % mp->m_sb.sb_spino_align == 0);
-               xfs_bmap_add_free(mp, flist, XFS_AGB_TO_FSB(mp, agno, agbno),
-                                 contigblk);
+               xfs_bmap_add_free(mp, dfops, XFS_AGB_TO_FSB(mp, agno, agbno),
+                                 contigblk, &oinfo);
  
                 /* reset range to current bit and carry on... */
                 startidx = endidx = nextbit;
@@ -1889,7 +1894,7 @@ xfs_difree_inobt(
         struct xfs_trans                *tp,
         struct xfs_buf                  *agbp,
         xfs_agino_t                     agino,
-       struct xfs_bmap_free            *flist,
+       struct xfs_defer_ops            *dfops,
         struct xfs_icluster             *xic,
         struct xfs_inobt_rec_incore     *orec)
  {
@@ -1976,7 +1981,7 @@ xfs_difree_inobt(
                         goto error0;
                 }
  
-               xfs_difree_inode_chunk(mp, agno, &rec, flist);
+               xfs_difree_inode_chunk(mp, agno, &rec, dfops);
         } else {
                 xic->deleted = 0;
  
@@ -2121,7 +2126,7 @@ int
  xfs_difree(
         struct xfs_trans        *tp,            /* transaction pointer */
         xfs_ino_t               inode,          /* inode to be freed */
-       struct xfs_bmap_free    *flist,         /* extents to free */
+       struct xfs_defer_ops    *dfops,         /* extents to free */
         struct xfs_icluster     *xic)   /* cluster info if deleted */
  {
         /* REFERENCED */
@@ -2173,7 +2178,7 @@ xfs_difree(
         /*
          * Fix up the inode allocation btree.
          */
-       error = xfs_difree_inobt(mp, tp, agbp, agino, flist, xic, &rec);
+       error = xfs_difree_inobt(mp, tp, agbp, agino, dfops, xic, &rec);
         if (error)
                 goto error0;
  
diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h

index 6e450df2979bfc80a7983cff0dbd79124741aa19..0bb89669fc072fd6bdf5ee44ded2b345aaf5de0b 100644 (file)
--- a/fs/xfs/libxfs/xfs_ialloc.h
+++ b/fs/xfs/libxfs/xfs_ialloc.h
@@ -95,7 +95,7 @@ int                                   /* error */
  xfs_difree(
         struct xfs_trans *tp,           /* transaction pointer */
         xfs_ino_t       inode,          /* inode to be freed */
-       struct xfs_bmap_free *flist,    /* extents to free */
+       struct xfs_defer_ops *dfops,    /* extents to free */
         struct xfs_icluster *ifree);    /* cluster info if deleted */
  
  /*
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c

index 89c21d771e35edbc026eb7fe7cb373280774b162..31ca2208c03dfbd9ff7f1b384cac616d1e4b022b 100644 (file)
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -32,6 +32,7 @@
  #include "xfs_trace.h"
  #include "xfs_cksum.h"
  #include "xfs_trans.h"
+#include "xfs_rmap.h"
  
  
  STATIC int
@@ -96,6 +97,7 @@ xfs_inobt_alloc_block(
         memset(&args, 0, sizeof(args));
         args.tp = cur->bc_tp;
         args.mp = cur->bc_mp;
+       xfs_rmap_ag_owner(&args.oinfo, XFS_RMAP_OWN_INOBT);
         args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, sbno);
         args.minlen = 1;
         args.maxlen = 1;
@@ -125,8 +127,12 @@ xfs_inobt_free_block(
         struct xfs_btree_cur    *cur,
         struct xfs_buf          *bp)
  {
+       struct xfs_owner_info   oinfo;
+
+       xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT);
         return xfs_free_extent(cur->bc_tp,
-                       XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1);
+                       XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1,
+                       &oinfo);
  }
  
  STATIC int
@@ -145,14 +151,6 @@ xfs_inobt_init_key_from_rec(
         key->inobt.ir_startino = rec->inobt.ir_startino;
  }
  
-STATIC void
-xfs_inobt_init_rec_from_key(
-       union xfs_btree_key     *key,
-       union xfs_btree_rec     *rec)
-{
-       rec->inobt.ir_startino = key->inobt.ir_startino;
-}
-
  STATIC void
  xfs_inobt_init_rec_from_cur(
         struct xfs_btree_cur    *cur,
@@ -314,7 +312,6 @@ static const struct xfs_btree_ops xfs_inobt_ops = {
         .get_minrecs            = xfs_inobt_get_minrecs,
         .get_maxrecs            = xfs_inobt_get_maxrecs,
         .init_key_from_rec      = xfs_inobt_init_key_from_rec,
-       .init_rec_from_key      = xfs_inobt_init_rec_from_key,
         .init_rec_from_cur      = xfs_inobt_init_rec_from_cur,
         .init_ptr_from_cur      = xfs_inobt_init_ptr_from_cur,
         .key_diff               = xfs_inobt_key_diff,
@@ -336,7 +333,6 @@ static const struct xfs_btree_ops xfs_finobt_ops = {
         .get_minrecs            = xfs_inobt_get_minrecs,
         .get_maxrecs            = xfs_inobt_get_maxrecs,
         .init_key_from_rec      = xfs_inobt_init_key_from_rec,
-       .init_rec_from_key      = xfs_inobt_init_rec_from_key,
         .init_rec_from_cur      = xfs_inobt_init_rec_from_cur,
         .init_ptr_from_cur      = xfs_finobt_init_ptr_from_cur,
         .key_diff               = xfs_inobt_key_diff,
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c

index 9d9559eb2835a33621e568392fab2c1074022da3..4b9769e23c834278eabe70ea429dacabd2d934f2 100644 (file)
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -22,6 +22,7 @@
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
+#include "xfs_defer.h"
  #include "xfs_inode.h"
  #include "xfs_error.h"
  #include "xfs_cksum.h"
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h

index e8f49c029ff05098ddc91eeeffe5ba7102eff77f..a6eed43fa7cd5d7898c7d15c8823bdd4874dec2b 100644 (file)
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -110,7 +110,9 @@ static inline uint xlog_get_cycle(char *ptr)
  #define XLOG_REG_TYPE_COMMIT           18
  #define XLOG_REG_TYPE_TRANSHDR         19
  #define XLOG_REG_TYPE_ICREATE          20
-#define XLOG_REG_TYPE_MAX              20
+#define XLOG_REG_TYPE_RUI_FORMAT       21
+#define XLOG_REG_TYPE_RUD_FORMAT       22
+#define XLOG_REG_TYPE_MAX              22
  
  /*
   * Flags to log operation header
@@ -227,6 +229,8 @@ typedef struct xfs_trans_header {
  #define        XFS_LI_DQUOT            0x123d
  #define        XFS_LI_QUOTAOFF         0x123e
  #define        XFS_LI_ICREATE          0x123f
+#define        XFS_LI_RUI              0x1240  /* rmap update intent */
+#define        XFS_LI_RUD              0x1241
  
  #define XFS_LI_TYPE_DESC \
         { XFS_LI_EFI,           "XFS_LI_EFI" }, \
@@ -236,7 +240,9 @@ typedef struct xfs_trans_header {
         { XFS_LI_BUF,           "XFS_LI_BUF" }, \
         { XFS_LI_DQUOT,         "XFS_LI_DQUOT" }, \
         { XFS_LI_QUOTAOFF,      "XFS_LI_QUOTAOFF" }, \
-       { XFS_LI_ICREATE,       "XFS_LI_ICREATE" }
+       { XFS_LI_ICREATE,       "XFS_LI_ICREATE" }, \
+       { XFS_LI_RUI,           "XFS_LI_RUI" }, \
+       { XFS_LI_RUD,           "XFS_LI_RUD" }
  
  /*
   * Inode Log Item Format definitions.
@@ -603,6 +609,59 @@ typedef struct xfs_efd_log_format_64 {
         xfs_extent_64_t         efd_extents[1]; /* array of extents freed */
  } xfs_efd_log_format_64_t;
  
+/*
+ * RUI/RUD (reverse mapping) log format definitions
+ */
+struct xfs_map_extent {
+       __uint64_t              me_owner;
+       __uint64_t              me_startblock;
+       __uint64_t              me_startoff;
+       __uint32_t              me_len;
+       __uint32_t              me_flags;
+};
+
+/* rmap me_flags: upper bits are flags, lower byte is type code */
+#define XFS_RMAP_EXTENT_MAP            1
+#define XFS_RMAP_EXTENT_UNMAP          3
+#define XFS_RMAP_EXTENT_CONVERT                5
+#define XFS_RMAP_EXTENT_ALLOC          7
+#define XFS_RMAP_EXTENT_FREE           8
+#define XFS_RMAP_EXTENT_TYPE_MASK      0xFF
+
+#define XFS_RMAP_EXTENT_ATTR_FORK      (1U << 31)
+#define XFS_RMAP_EXTENT_BMBT_BLOCK     (1U << 30)
+#define XFS_RMAP_EXTENT_UNWRITTEN      (1U << 29)
+
+#define XFS_RMAP_EXTENT_FLAGS          (XFS_RMAP_EXTENT_TYPE_MASK | \
+                                        XFS_RMAP_EXTENT_ATTR_FORK | \
+                                        XFS_RMAP_EXTENT_BMBT_BLOCK | \
+                                        XFS_RMAP_EXTENT_UNWRITTEN)
+
+/*
+ * This is the structure used to lay out an rui log item in the
+ * log.  The rui_extents field is a variable size array whose
+ * size is given by rui_nextents.
+ */
+struct xfs_rui_log_format {
+       __uint16_t              rui_type;       /* rui log item type */
+       __uint16_t              rui_size;       /* size of this item */
+       __uint32_t              rui_nextents;   /* # extents to free */
+       __uint64_t              rui_id;         /* rui identifier */
+       struct xfs_map_extent   rui_extents[1]; /* array of extents to rmap */
+};
+
+/*
+ * This is the structure used to lay out an rud log item in the
+ * log.  The rud_extents array is a variable size array whose
+ * size is given by rud_nextents;
+ */
+struct xfs_rud_log_format {
+       __uint16_t              rud_type;       /* rud log item type */
+       __uint16_t              rud_size;       /* size of this item */
+       __uint32_t              __pad;
+       __uint64_t              rud_rui_id;     /* id of corresponding rui */
+};
+
  /*
   * Dquot Log format definitions.
   *
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c

new file mode 100644 (file)

index 0000000..73d0540
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -0,0 +1,1399 @@
+/*
+ * Copyright (c) 2014 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_bit.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_btree.h"
+#include "xfs_trans.h"
+#include "xfs_alloc.h"
+#include "xfs_rmap.h"
+#include "xfs_rmap_btree.h"
+#include "xfs_trans_space.h"
+#include "xfs_trace.h"
+#include "xfs_error.h"
+#include "xfs_extent_busy.h"
+#include "xfs_bmap.h"
+#include "xfs_inode.h"
+
+/*
+ * Lookup the first record less than or equal to [bno, len, owner, offset]
+ * in the btree given by cur.
+ */
+int
+xfs_rmap_lookup_le(
+       struct xfs_btree_cur    *cur,
+       xfs_agblock_t           bno,
+       xfs_extlen_t            len,
+       uint64_t                owner,
+       uint64_t                offset,
+       unsigned int            flags,
+       int                     *stat)
+{
+       cur->bc_rec.r.rm_startblock = bno;
+       cur->bc_rec.r.rm_blockcount = len;
+       cur->bc_rec.r.rm_owner = owner;
+       cur->bc_rec.r.rm_offset = offset;
+       cur->bc_rec.r.rm_flags = flags;
+       return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
+}
+
+/*
+ * Lookup the record exactly matching [bno, len, owner, offset]
+ * in the btree given by cur.
+ */
+int
+xfs_rmap_lookup_eq(
+       struct xfs_btree_cur    *cur,
+       xfs_agblock_t           bno,
+       xfs_extlen_t            len,
+       uint64_t                owner,
+       uint64_t                offset,
+       unsigned int            flags,
+       int                     *stat)
+{
+       cur->bc_rec.r.rm_startblock = bno;
+       cur->bc_rec.r.rm_blockcount = len;
+       cur->bc_rec.r.rm_owner = owner;
+       cur->bc_rec.r.rm_offset = offset;
+       cur->bc_rec.r.rm_flags = flags;
+       return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
+}
+
+/*
+ * Update the record referred to by cur to the value given
+ * by [bno, len, owner, offset].
+ * This either works (return 0) or gets an EFSCORRUPTED error.
+ */
+STATIC int
+xfs_rmap_update(
+       struct xfs_btree_cur    *cur,
+       struct xfs_rmap_irec    *irec)
+{
+       union xfs_btree_rec     rec;
+       int                     error;
+
+       trace_xfs_rmap_update(cur->bc_mp, cur->bc_private.a.agno,
+                       irec->rm_startblock, irec->rm_blockcount,
+                       irec->rm_owner, irec->rm_offset, irec->rm_flags);
+
+       rec.rmap.rm_startblock = cpu_to_be32(irec->rm_startblock);
+       rec.rmap.rm_blockcount = cpu_to_be32(irec->rm_blockcount);
+       rec.rmap.rm_owner = cpu_to_be64(irec->rm_owner);
+       rec.rmap.rm_offset = cpu_to_be64(
+                       xfs_rmap_irec_offset_pack(irec));
+       error = xfs_btree_update(cur, &rec);
+       if (error)
+               trace_xfs_rmap_update_error(cur->bc_mp,
+                               cur->bc_private.a.agno, error, _RET_IP_);
+       return error;
+}
+
+int
+xfs_rmap_insert(
+       struct xfs_btree_cur    *rcur,
+       xfs_agblock_t           agbno,
+       xfs_extlen_t            len,
+       uint64_t                owner,
+       uint64_t                offset,
+       unsigned int            flags)
+{
+       int                     i;
+       int                     error;
+
+       trace_xfs_rmap_insert(rcur->bc_mp, rcur->bc_private.a.agno, agbno,
+                       len, owner, offset, flags);
+
+       error = xfs_rmap_lookup_eq(rcur, agbno, len, owner, offset, flags, &i);
+       if (error)
+               goto done;
+       XFS_WANT_CORRUPTED_GOTO(rcur->bc_mp, i == 0, done);
+
+       rcur->bc_rec.r.rm_startblock = agbno;
+       rcur->bc_rec.r.rm_blockcount = len;
+       rcur->bc_rec.r.rm_owner = owner;
+       rcur->bc_rec.r.rm_offset = offset;
+       rcur->bc_rec.r.rm_flags = flags;
+       error = xfs_btree_insert(rcur, &i);
+       if (error)
+               goto done;
+       XFS_WANT_CORRUPTED_GOTO(rcur->bc_mp, i == 1, done);
+done:
+       if (error)
+               trace_xfs_rmap_insert_error(rcur->bc_mp,
+                               rcur->bc_private.a.agno, error, _RET_IP_);
+       return error;
+}
+
+static int
+xfs_rmap_btrec_to_irec(
+       union xfs_btree_rec     *rec,
+       struct xfs_rmap_irec    *irec)
+{
+       irec->rm_flags = 0;
+       irec->rm_startblock = be32_to_cpu(rec->rmap.rm_startblock);
+       irec->rm_blockcount = be32_to_cpu(rec->rmap.rm_blockcount);
+       irec->rm_owner = be64_to_cpu(rec->rmap.rm_owner);
+       return xfs_rmap_irec_offset_unpack(be64_to_cpu(rec->rmap.rm_offset),
+                       irec);
+}
+
+/*
+ * Get the data from the pointed-to record.
+ */
+int
+xfs_rmap_get_rec(
+       struct xfs_btree_cur    *cur,
+       struct xfs_rmap_irec    *irec,
+       int                     *stat)
+{
+       union xfs_btree_rec     *rec;
+       int                     error;
+
+       error = xfs_btree_get_rec(cur, &rec, stat);
+       if (error || !*stat)
+               return error;
+
+       return xfs_rmap_btrec_to_irec(rec, irec);
+}
+
+/*
+ * Find the extent in the rmap btree and remove it.
+ *
+ * The record we find should always be an exact match for the extent that we're
+ * looking for, since we insert them into the btree without modification.
+ *
+ * Special Case #1: when growing the filesystem, we "free" an extent when
+ * growing the last AG. This extent is new space and so it is not tracked as
+ * used space in the btree. The growfs code will pass in an owner of
+ * XFS_RMAP_OWN_NULL to indicate that it expected that there is no owner of this
+ * extent. We verify that - the extent lookup result in a record that does not
+ * overlap.
+ *
+ * Special Case #2: EFIs do not record the owner of the extent, so when
+ * recovering EFIs from the log we pass in XFS_RMAP_OWN_UNKNOWN to tell the rmap
+ * btree to ignore the owner (i.e. wildcard match) so we don't trigger
+ * corruption checks during log recovery.
+ */
+STATIC int
+xfs_rmap_unmap(
+       struct xfs_btree_cur    *cur,
+       xfs_agblock_t           bno,
+       xfs_extlen_t            len,
+       bool                    unwritten,
+       struct xfs_owner_info   *oinfo)
+{
+       struct xfs_mount        *mp = cur->bc_mp;
+       struct xfs_rmap_irec    ltrec;
+       uint64_t                ltoff;
+       int                     error = 0;
+       int                     i;
+       uint64_t                owner;
+       uint64_t                offset;
+       unsigned int            flags;
+       bool                    ignore_off;
+
+       xfs_owner_info_unpack(oinfo, &owner, &offset, &flags);
+       ignore_off = XFS_RMAP_NON_INODE_OWNER(owner) ||
+                       (flags & XFS_RMAP_BMBT_BLOCK);
+       if (unwritten)
+               flags |= XFS_RMAP_UNWRITTEN;
+       trace_xfs_rmap_unmap(mp, cur->bc_private.a.agno, bno, len,
+                       unwritten, oinfo);
+
+       /*
+        * We should always have a left record because there's a static record
+        * for the AG headers at rm_startblock == 0 created by mkfs/growfs that
+        * will not ever be removed from the tree.
+        */
+       error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, flags, &i);
+       if (error)
+               goto out_error;
+       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+
+       error = xfs_rmap_get_rec(cur, &ltrec, &i);
+       if (error)
+               goto out_error;
+       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+       trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
+                       cur->bc_private.a.agno, ltrec.rm_startblock,
+                       ltrec.rm_blockcount, ltrec.rm_owner,
+                       ltrec.rm_offset, ltrec.rm_flags);
+       ltoff = ltrec.rm_offset;
+
+       /*
+        * For growfs, the incoming extent must be beyond the left record we
+        * just found as it is new space and won't be used by anyone. This is
+        * just a corruption check as we don't actually do anything with this
+        * extent.  Note that we need to use >= instead of > because it might
+        * be the case that the "left" extent goes all the way to EOFS.
+        */
+       if (owner == XFS_RMAP_OWN_NULL) {
+               XFS_WANT_CORRUPTED_GOTO(mp, bno >= ltrec.rm_startblock +
+                                               ltrec.rm_blockcount, out_error);
+               goto out_done;
+       }
+
+       /* Make sure the unwritten flag matches. */
+       XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) ==
+                       (ltrec.rm_flags & XFS_RMAP_UNWRITTEN), out_error);
+
+       /* Make sure the extent we found covers the entire freeing range. */
+       XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_startblock <= bno &&
+               ltrec.rm_startblock + ltrec.rm_blockcount >=
+               bno + len, out_error);
+
+       /* Make sure the owner matches what we expect to find in the tree. */
+       XFS_WANT_CORRUPTED_GOTO(mp, owner == ltrec.rm_owner ||
+                                   XFS_RMAP_NON_INODE_OWNER(owner), out_error);
+
+       /* Check the offset, if necessary. */
+       if (!XFS_RMAP_NON_INODE_OWNER(owner)) {
+               if (flags & XFS_RMAP_BMBT_BLOCK) {
+                       XFS_WANT_CORRUPTED_GOTO(mp,
+                                       ltrec.rm_flags & XFS_RMAP_BMBT_BLOCK,
+                                       out_error);
+               } else {
+                       XFS_WANT_CORRUPTED_GOTO(mp,
+                                       ltrec.rm_offset <= offset, out_error);
+                       XFS_WANT_CORRUPTED_GOTO(mp,
+                                       ltoff + ltrec.rm_blockcount >= offset + len,
+                                       out_error);
+               }
+       }
+
+       if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) {
+               /* exact match, simply remove the record from rmap tree */
+               trace_xfs_rmap_delete(mp, cur->bc_private.a.agno,
+                               ltrec.rm_startblock, ltrec.rm_blockcount,
+                               ltrec.rm_owner, ltrec.rm_offset,
+                               ltrec.rm_flags);
+               error = xfs_btree_delete(cur, &i);
+               if (error)
+                       goto out_error;
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+       } else if (ltrec.rm_startblock == bno) {
+               /*
+                * overlap left hand side of extent: move the start, trim the
+                * length and update the current record.
+                *
+                *       ltbno                ltlen
+                * Orig:    |oooooooooooooooooooo|
+                * Freeing: |fffffffff|
+                * Result:            |rrrrrrrrrr|
+                *         bno       len
+                */
+               ltrec.rm_startblock += len;
+               ltrec.rm_blockcount -= len;
+               if (!ignore_off)
+                       ltrec.rm_offset += len;
+               error = xfs_rmap_update(cur, &ltrec);
+               if (error)
+                       goto out_error;
+       } else if (ltrec.rm_startblock + ltrec.rm_blockcount == bno + len) {
+               /*
+                * overlap right hand side of extent: trim the length and update
+                * the current record.
+                *
+                *       ltbno                ltlen
+                * Orig:    |oooooooooooooooooooo|
+                * Freeing:            |fffffffff|
+                * Result:  |rrrrrrrrrr|
+                *                    bno       len
+                */
+               ltrec.rm_blockcount -= len;
+               error = xfs_rmap_update(cur, &ltrec);
+               if (error)
+                       goto out_error;
+       } else {
+
+               /*
+                * overlap middle of extent: trim the length of the existing
+                * record to the length of the new left-extent size, increment
+                * the insertion position so we can insert a new record
+                * containing the remaining right-extent space.
+                *
+                *       ltbno                ltlen
+                * Orig:    |oooooooooooooooooooo|
+                * Freeing:       |fffffffff|
+                * Result:  |rrrrr|         |rrrr|
+                *               bno       len
+                */
+               xfs_extlen_t    orig_len = ltrec.rm_blockcount;
+
+               ltrec.rm_blockcount = bno - ltrec.rm_startblock;
+               error = xfs_rmap_update(cur, &ltrec);
+               if (error)
+                       goto out_error;
+
+               error = xfs_btree_increment(cur, 0, &i);
+               if (error)
+                       goto out_error;
+
+               cur->bc_rec.r.rm_startblock = bno + len;
+               cur->bc_rec.r.rm_blockcount = orig_len - len -
+                                                    ltrec.rm_blockcount;
+               cur->bc_rec.r.rm_owner = ltrec.rm_owner;
+               if (ignore_off)
+                       cur->bc_rec.r.rm_offset = 0;
+               else
+                       cur->bc_rec.r.rm_offset = offset + len;
+               cur->bc_rec.r.rm_flags = flags;
+               trace_xfs_rmap_insert(mp, cur->bc_private.a.agno,
+                               cur->bc_rec.r.rm_startblock,
+                               cur->bc_rec.r.rm_blockcount,
+                               cur->bc_rec.r.rm_owner,
+                               cur->bc_rec.r.rm_offset,
+                               cur->bc_rec.r.rm_flags);
+               error = xfs_btree_insert(cur, &i);
+               if (error)
+                       goto out_error;
+       }
+
+out_done:
+       trace_xfs_rmap_unmap_done(mp, cur->bc_private.a.agno, bno, len,
+                       unwritten, oinfo);
+out_error:
+       if (error)
+               trace_xfs_rmap_unmap_error(mp, cur->bc_private.a.agno,
+                               error, _RET_IP_);
+       return error;
+}
+
+/*
+ * Remove a reference to an extent in the rmap btree.
+ */
+int
+xfs_rmap_free(
+       struct xfs_trans        *tp,
+       struct xfs_buf          *agbp,
+       xfs_agnumber_t          agno,
+       xfs_agblock_t           bno,
+       xfs_extlen_t            len,
+       struct xfs_owner_info   *oinfo)
+{
+       struct xfs_mount        *mp = tp->t_mountp;
+       struct xfs_btree_cur    *cur;
+       int                     error;
+
+       if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+               return 0;
+
+       cur = xfs_rmapbt_init_cursor(mp, tp, agbp, agno);
+
+       error = xfs_rmap_unmap(cur, bno, len, false, oinfo);
+       if (error)
+               goto out_error;
+
+       xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+       return 0;
+
+out_error:
+       xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+       return error;
+}
+
+/*
+ * A mergeable rmap must have the same owner and the same values for
+ * the unwritten, attr_fork, and bmbt flags.  The startblock and
+ * offset are checked separately.
+ */
+static bool
+xfs_rmap_is_mergeable(
+       struct xfs_rmap_irec    *irec,
+       uint64_t                owner,
+       unsigned int            flags)
+{
+       if (irec->rm_owner == XFS_RMAP_OWN_NULL)
+               return false;
+       if (irec->rm_owner != owner)
+               return false;
+       if ((flags & XFS_RMAP_UNWRITTEN) ^
+           (irec->rm_flags & XFS_RMAP_UNWRITTEN))
+               return false;
+       if ((flags & XFS_RMAP_ATTR_FORK) ^
+           (irec->rm_flags & XFS_RMAP_ATTR_FORK))
+               return false;
+       if ((flags & XFS_RMAP_BMBT_BLOCK) ^
+           (irec->rm_flags & XFS_RMAP_BMBT_BLOCK))
+               return false;
+       return true;
+}
+
+/*
+ * When we allocate a new block, the first thing we do is add a reference to
+ * the extent in the rmap btree. This takes the form of a [agbno, length,
+ * owner, offset] record.  Flags are encoded in the high bits of the offset
+ * field.
+ */
+STATIC int
+xfs_rmap_map(
+       struct xfs_btree_cur    *cur,
+       xfs_agblock_t           bno,
+       xfs_extlen_t            len,
+       bool                    unwritten,
+       struct xfs_owner_info   *oinfo)
+{
+       struct xfs_mount        *mp = cur->bc_mp;
+       struct xfs_rmap_irec    ltrec;
+       struct xfs_rmap_irec    gtrec;
+       int                     have_gt;
+       int                     have_lt;
+       int                     error = 0;
+       int                     i;
+       uint64_t                owner;
+       uint64_t                offset;
+       unsigned int            flags = 0;
+       bool                    ignore_off;
+
+       xfs_owner_info_unpack(oinfo, &owner, &offset, &flags);
+       ASSERT(owner != 0);
+       ignore_off = XFS_RMAP_NON_INODE_OWNER(owner) ||
+                       (flags & XFS_RMAP_BMBT_BLOCK);
+       if (unwritten)
+               flags |= XFS_RMAP_UNWRITTEN;
+       trace_xfs_rmap_map(mp, cur->bc_private.a.agno, bno, len,
+                       unwritten, oinfo);
+
+       /*
+        * For the initial lookup, look for an exact match or the left-adjacent
+        * record for our insertion point. This will also give us the record for
+        * start block contiguity tests.
+        */
+       error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, flags,
+                       &have_lt);
+       if (error)
+               goto out_error;
+       XFS_WANT_CORRUPTED_GOTO(mp, have_lt == 1, out_error);
+
+       error = xfs_rmap_get_rec(cur, &ltrec, &have_lt);
+       if (error)
+               goto out_error;
+       XFS_WANT_CORRUPTED_GOTO(mp, have_lt == 1, out_error);
+       trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
+                       cur->bc_private.a.agno, ltrec.rm_startblock,
+                       ltrec.rm_blockcount, ltrec.rm_owner,
+                       ltrec.rm_offset, ltrec.rm_flags);
+
+       if (!xfs_rmap_is_mergeable(&ltrec, owner, flags))
+               have_lt = 0;
+
+       XFS_WANT_CORRUPTED_GOTO(mp,
+               have_lt == 0 ||
+               ltrec.rm_startblock + ltrec.rm_blockcount <= bno, out_error);
+
+       /*
+        * Increment the cursor to see if we have a right-adjacent record to our
+        * insertion point. This will give us the record for end block
+        * contiguity tests.
+        */
+       error = xfs_btree_increment(cur, 0, &have_gt);
+       if (error)
+               goto out_error;
+       if (have_gt) {
+               error = xfs_rmap_get_rec(cur, &gtrec, &have_gt);
+               if (error)
+                       goto out_error;
+               XFS_WANT_CORRUPTED_GOTO(mp, have_gt == 1, out_error);
+               XFS_WANT_CORRUPTED_GOTO(mp, bno + len <= gtrec.rm_startblock,
+                                       out_error);
+               trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp,
+                       cur->bc_private.a.agno, gtrec.rm_startblock,
+                       gtrec.rm_blockcount, gtrec.rm_owner,
+                       gtrec.rm_offset, gtrec.rm_flags);
+               if (!xfs_rmap_is_mergeable(&gtrec, owner, flags))
+                       have_gt = 0;
+       }
+
+       /*
+        * Note: cursor currently points one record to the right of ltrec, even
+        * if there is no record in the tree to the right.
+        */
+       if (have_lt &&
+           ltrec.rm_startblock + ltrec.rm_blockcount == bno &&
+           (ignore_off || ltrec.rm_offset + ltrec.rm_blockcount == offset)) {
+               /*
+                * left edge contiguous, merge into left record.
+                *
+                *       ltbno     ltlen
+                * orig:   |ooooooooo|
+                * adding:           |aaaaaaaaa|
+                * result: |rrrrrrrrrrrrrrrrrrr|
+                *                  bno       len
+                */
+               ltrec.rm_blockcount += len;
+               if (have_gt &&
+                   bno + len == gtrec.rm_startblock &&
+                   (ignore_off || offset + len == gtrec.rm_offset) &&
+                   (unsigned long)ltrec.rm_blockcount + len +
+                               gtrec.rm_blockcount <= XFS_RMAP_LEN_MAX) {
+                       /*
+                        * right edge also contiguous, delete right record
+                        * and merge into left record.
+                        *
+                        *       ltbno     ltlen    gtbno     gtlen
+                        * orig:   |ooooooooo|         |ooooooooo|
+                        * adding:           |aaaaaaaaa|
+                        * result: |rrrrrrrrrrrrrrrrrrrrrrrrrrrrr|
+                        */
+                       ltrec.rm_blockcount += gtrec.rm_blockcount;
+                       trace_xfs_rmap_delete(mp, cur->bc_private.a.agno,
+                                       gtrec.rm_startblock,
+                                       gtrec.rm_blockcount,
+                                       gtrec.rm_owner,
+                                       gtrec.rm_offset,
+                                       gtrec.rm_flags);
+                       error = xfs_btree_delete(cur, &i);
+                       if (error)
+                               goto out_error;
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+               }
+
+               /* point the cursor back to the left record and update */
+               error = xfs_btree_decrement(cur, 0, &have_gt);
+               if (error)
+                       goto out_error;
+               error = xfs_rmap_update(cur, &ltrec);
+               if (error)
+                       goto out_error;
+       } else if (have_gt &&
+                  bno + len == gtrec.rm_startblock &&
+                  (ignore_off || offset + len == gtrec.rm_offset)) {
+               /*
+                * right edge contiguous, merge into right record.
+                *
+                *                 gtbno     gtlen
+                * Orig:             |ooooooooo|
+                * adding: |aaaaaaaaa|
+                * Result: |rrrrrrrrrrrrrrrrrrr|
+                *        bno       len
+                */
+               gtrec.rm_startblock = bno;
+               gtrec.rm_blockcount += len;
+               if (!ignore_off)
+                       gtrec.rm_offset = offset;
+               error = xfs_rmap_update(cur, &gtrec);
+               if (error)
+                       goto out_error;
+       } else {
+               /*
+                * no contiguous edge with identical owner, insert
+                * new record at current cursor position.
+                */
+               cur->bc_rec.r.rm_startblock = bno;
+               cur->bc_rec.r.rm_blockcount = len;
+               cur->bc_rec.r.rm_owner = owner;
+               cur->bc_rec.r.rm_offset = offset;
+               cur->bc_rec.r.rm_flags = flags;
+               trace_xfs_rmap_insert(mp, cur->bc_private.a.agno, bno, len,
+                       owner, offset, flags);
+               error = xfs_btree_insert(cur, &i);
+               if (error)
+                       goto out_error;
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+       }
+
+       trace_xfs_rmap_map_done(mp, cur->bc_private.a.agno, bno, len,
+                       unwritten, oinfo);
+out_error:
+       if (error)
+               trace_xfs_rmap_map_error(mp, cur->bc_private.a.agno,
+                               error, _RET_IP_);
+       return error;
+}
+
+/*
+ * Add a reference to an extent in the rmap btree.
+ */
+int
+xfs_rmap_alloc(
+       struct xfs_trans        *tp,
+       struct xfs_buf          *agbp,
+       xfs_agnumber_t          agno,
+       xfs_agblock_t           bno,
+       xfs_extlen_t            len,
+       struct xfs_owner_info   *oinfo)
+{
+       struct xfs_mount        *mp = tp->t_mountp;
+       struct xfs_btree_cur    *cur;
+       int                     error;
+
+       if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+               return 0;
+
+       cur = xfs_rmapbt_init_cursor(mp, tp, agbp, agno);
+       error = xfs_rmap_map(cur, bno, len, false, oinfo);
+       if (error)
+               goto out_error;
+
+       xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+       return 0;
+
+out_error:
+       xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+       return error;
+}
+
+#define RMAP_LEFT_CONTIG       (1 << 0)
+#define RMAP_RIGHT_CONTIG      (1 << 1)
+#define RMAP_LEFT_FILLING      (1 << 2)
+#define RMAP_RIGHT_FILLING     (1 << 3)
+#define RMAP_LEFT_VALID                (1 << 6)
+#define RMAP_RIGHT_VALID       (1 << 7)
+
+#define LEFT           r[0]
+#define RIGHT          r[1]
+#define PREV           r[2]
+#define NEW            r[3]
+
+/*
+ * Convert an unwritten extent to a real extent or vice versa.
+ * Does not handle overlapping extents.
+ */
+STATIC int
+xfs_rmap_convert(
+       struct xfs_btree_cur    *cur,
+       xfs_agblock_t           bno,
+       xfs_extlen_t            len,
+       bool                    unwritten,
+       struct xfs_owner_info   *oinfo)
+{
+       struct xfs_mount        *mp = cur->bc_mp;
+       struct xfs_rmap_irec    r[4];   /* neighbor extent entries */
+                                       /* left is 0, right is 1, prev is 2 */
+                                       /* new is 3 */
+       uint64_t                owner;
+       uint64_t                offset;
+       uint64_t                new_endoff;
+       unsigned int            oldext;
+       unsigned int            newext;
+       unsigned int            flags = 0;
+       int                     i;
+       int                     state = 0;
+       int                     error;
+
+       xfs_owner_info_unpack(oinfo, &owner, &offset, &flags);
+       ASSERT(!(XFS_RMAP_NON_INODE_OWNER(owner) ||
+                       (flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK))));
+       oldext = unwritten ? XFS_RMAP_UNWRITTEN : 0;
+       new_endoff = offset + len;
+       trace_xfs_rmap_convert(mp, cur->bc_private.a.agno, bno, len,
+                       unwritten, oinfo);
+
+       /*
+        * For the initial lookup, look for an exact match or the left-adjacent
+        * record for our insertion point. This will also give us the record for
+        * start block contiguity tests.
+        */
+       error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, oldext, &i);
+       if (error)
+               goto done;
+       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+
+       error = xfs_rmap_get_rec(cur, &PREV, &i);
+       if (error)
+               goto done;
+       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+       trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
+                       cur->bc_private.a.agno, PREV.rm_startblock,
+                       PREV.rm_blockcount, PREV.rm_owner,
+                       PREV.rm_offset, PREV.rm_flags);
+
+       ASSERT(PREV.rm_offset <= offset);
+       ASSERT(PREV.rm_offset + PREV.rm_blockcount >= new_endoff);
+       ASSERT((PREV.rm_flags & XFS_RMAP_UNWRITTEN) == oldext);
+       newext = ~oldext & XFS_RMAP_UNWRITTEN;
+
+       /*
+        * Set flags determining what part of the previous oldext allocation
+        * extent is being replaced by a newext allocation.
+        */
+       if (PREV.rm_offset == offset)
+               state |= RMAP_LEFT_FILLING;
+       if (PREV.rm_offset + PREV.rm_blockcount == new_endoff)
+               state |= RMAP_RIGHT_FILLING;
+
+       /*
+        * Decrement the cursor to see if we have a left-adjacent record to our
+        * insertion point. This will give us the record for end block
+        * contiguity tests.
+        */
+       error = xfs_btree_decrement(cur, 0, &i);
+       if (error)
+               goto done;
+       if (i) {
+               state |= RMAP_LEFT_VALID;
+               error = xfs_rmap_get_rec(cur, &LEFT, &i);
+               if (error)
+                       goto done;
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+               XFS_WANT_CORRUPTED_GOTO(mp,
+                               LEFT.rm_startblock + LEFT.rm_blockcount <= bno,
+                               done);
+               trace_xfs_rmap_find_left_neighbor_result(cur->bc_mp,
+                               cur->bc_private.a.agno, LEFT.rm_startblock,
+                               LEFT.rm_blockcount, LEFT.rm_owner,
+                               LEFT.rm_offset, LEFT.rm_flags);
+               if (LEFT.rm_startblock + LEFT.rm_blockcount == bno &&
+                   LEFT.rm_offset + LEFT.rm_blockcount == offset &&
+                   xfs_rmap_is_mergeable(&LEFT, owner, newext))
+                       state |= RMAP_LEFT_CONTIG;
+       }
+
+       /*
+        * Increment the cursor to see if we have a right-adjacent record to our
+        * insertion point. This will give us the record for end block
+        * contiguity tests.
+        */
+       error = xfs_btree_increment(cur, 0, &i);
+       if (error)
+               goto done;
+       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+       error = xfs_btree_increment(cur, 0, &i);
+       if (error)
+               goto done;
+       if (i) {
+               state |= RMAP_RIGHT_VALID;
+               error = xfs_rmap_get_rec(cur, &RIGHT, &i);
+               if (error)
+                       goto done;
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+               XFS_WANT_CORRUPTED_GOTO(mp, bno + len <= RIGHT.rm_startblock,
+                                       done);
+               trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp,
+                               cur->bc_private.a.agno, RIGHT.rm_startblock,
+                               RIGHT.rm_blockcount, RIGHT.rm_owner,
+                               RIGHT.rm_offset, RIGHT.rm_flags);
+               if (bno + len == RIGHT.rm_startblock &&
+                   offset + len == RIGHT.rm_offset &&
+                   xfs_rmap_is_mergeable(&RIGHT, owner, newext))
+                       state |= RMAP_RIGHT_CONTIG;
+       }
+
+       /* check that left + prev + right is not too long */
+       if ((state & (RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG |
+                        RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG)) ==
+           (RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG |
+            RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG) &&
+           (unsigned long)LEFT.rm_blockcount + len +
+            RIGHT.rm_blockcount > XFS_RMAP_LEN_MAX)
+               state &= ~RMAP_RIGHT_CONTIG;
+
+       trace_xfs_rmap_convert_state(mp, cur->bc_private.a.agno, state,
+                       _RET_IP_);
+
+       /* reset the cursor back to PREV */
+       error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, oldext, &i);
+       if (error)
+               goto done;
+       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+
+       /*
+        * Switch out based on the FILLING and CONTIG state bits.
+        */
+       switch (state & (RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG |
+                        RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG)) {
+       case RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG |
+            RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG:
+               /*
+                * Setting all of a previous oldext extent to newext.
+                * The left and right neighbors are both contiguous with new.
+                */
+               error = xfs_btree_increment(cur, 0, &i);
+               if (error)
+                       goto done;
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+               trace_xfs_rmap_delete(mp, cur->bc_private.a.agno,
+                               RIGHT.rm_startblock, RIGHT.rm_blockcount,
+                               RIGHT.rm_owner, RIGHT.rm_offset,
+                               RIGHT.rm_flags);
+               error = xfs_btree_delete(cur, &i);
+               if (error)
+                       goto done;
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+               error = xfs_btree_decrement(cur, 0, &i);
+               if (error)
+                       goto done;
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+               trace_xfs_rmap_delete(mp, cur->bc_private.a.agno,
+                               PREV.rm_startblock, PREV.rm_blockcount,
+                               PREV.rm_owner, PREV.rm_offset,
+                               PREV.rm_flags);
+               error = xfs_btree_delete(cur, &i);
+               if (error)
+                       goto done;
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+               error = xfs_btree_decrement(cur, 0, &i);
+               if (error)
+                       goto done;
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+               NEW = LEFT;
+               NEW.rm_blockcount += PREV.rm_blockcount + RIGHT.rm_blockcount;
+               error = xfs_rmap_update(cur, &NEW);
+               if (error)
+                       goto done;
+               break;
+
+       case RMAP_LEFT_FILLING | RMAP_RIGHT_FILLING | RMAP_LEFT_CONTIG:
+               /*
+                * Setting all of a previous oldext extent to newext.
+                * The left neighbor is contiguous, the right is not.
+                */
+               trace_xfs_rmap_delete(mp, cur->bc_private.a.agno,
+                               PREV.rm_startblock, PREV.rm_blockcount,
+                               PREV.rm_owner, PREV.rm_offset,
+                               PREV.rm_flags);
+               error = xfs_btree_delete(cur, &i);
+               if (error)
+                       goto done;
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+               error = xfs_btree_decrement(cur, 0, &i);
+               if (error)
+                       goto done;
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+               NEW = LEFT;
+               NEW.rm_blockcount += PREV.rm_blockcount;
+               error = xfs_rmap_update(cur, &NEW);
+               if (error)
+                       goto done;
+               break;
+
+       case RMAP_LEFT_FILLING | RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG:
+               /*
+                * Setting all of a previous oldext extent to newext.
+                * The right neighbor is contiguous, the left is not.
+                */
+               error = xfs_btree_increment(cur, 0, &i);
+               if (error)
+                       goto done;
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+               trace_xfs_rmap_delete(mp, cur->bc_private.a.agno,
+                               RIGHT.rm_startblock, RIGHT.rm_blockcount,
+                               RIGHT.rm_owner, RIGHT.rm_offset,
+                               RIGHT.rm_flags);
+               error = xfs_btree_delete(cur, &i);
+               if (error)
+                       goto done;
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+               error = xfs_btree_decrement(cur, 0, &i);
+               if (error)
+                       goto done;
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+               NEW = PREV;
+               NEW.rm_blockcount = len + RIGHT.rm_blockcount;
+               NEW.rm_flags = newext;
+               error = xfs_rmap_update(cur, &NEW);
+               if (error)
+                       goto done;
+               break;
+
+       case RMAP_LEFT_FILLING | RMAP_RIGHT_FILLING:
+               /*
+                * Setting all of a previous oldext extent to newext.
+                * Neither the left nor right neighbors are contiguous with
+                * the new one.
+                */
+               NEW = PREV;
+               NEW.rm_flags = newext;
+               error = xfs_rmap_update(cur, &NEW);
+               if (error)
+                       goto done;
+               break;
+
+       case RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG:
+               /*
+                * Setting the first part of a previous oldext extent to newext.
+                * The left neighbor is contiguous.
+                */
+               NEW = PREV;
+               NEW.rm_offset += len;
+               NEW.rm_startblock += len;
+               NEW.rm_blockcount -= len;
+               error = xfs_rmap_update(cur, &NEW);
+               if (error)
+                       goto done;
+               error = xfs_btree_decrement(cur, 0, &i);
+               if (error)
+                       goto done;
+               NEW = LEFT;
+               NEW.rm_blockcount += len;
+               error = xfs_rmap_update(cur, &NEW);
+               if (error)
+                       goto done;
+               break;
+
+       case RMAP_LEFT_FILLING:
+               /*
+                * Setting the first part of a previous oldext extent to newext.
+                * The left neighbor is not contiguous.
+                */
+               NEW = PREV;
+               NEW.rm_startblock += len;
+               NEW.rm_offset += len;
+               NEW.rm_blockcount -= len;
+               error = xfs_rmap_update(cur, &NEW);
+               if (error)
+                       goto done;
+               NEW.rm_startblock = bno;
+               NEW.rm_owner = owner;
+               NEW.rm_offset = offset;
+               NEW.rm_blockcount = len;
+               NEW.rm_flags = newext;
+               cur->bc_rec.r = NEW;
+               trace_xfs_rmap_insert(mp, cur->bc_private.a.agno, bno,
+                               len, owner, offset, newext);
+               error = xfs_btree_insert(cur, &i);
+               if (error)
+                       goto done;
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+               break;
+
+       case RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG:
+               /*
+                * Setting the last part of a previous oldext extent to newext.
+                * The right neighbor is contiguous with the new allocation.
+                */
+               NEW = PREV;
+               NEW.rm_blockcount -= len;
+               error = xfs_rmap_update(cur, &NEW);
+               if (error)
+                       goto done;
+               error = xfs_btree_increment(cur, 0, &i);
+               if (error)
+                       goto done;
+               NEW = RIGHT;
+               NEW.rm_offset = offset;
+               NEW.rm_startblock = bno;
+               NEW.rm_blockcount += len;
+               error = xfs_rmap_update(cur, &NEW);
+               if (error)
+                       goto done;
+               break;
+
+       case RMAP_RIGHT_FILLING:
+               /*
+                * Setting the last part of a previous oldext extent to newext.
+                * The right neighbor is not contiguous.
+                */
+               NEW = PREV;
+               NEW.rm_blockcount -= len;
+               error = xfs_rmap_update(cur, &NEW);
+               if (error)
+                       goto done;
+               error = xfs_rmap_lookup_eq(cur, bno, len, owner, offset,
+                               oldext, &i);
+               if (error)
+                       goto done;
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
+               NEW.rm_startblock = bno;
+               NEW.rm_owner = owner;
+               NEW.rm_offset = offset;
+               NEW.rm_blockcount = len;
+               NEW.rm_flags = newext;
+               cur->bc_rec.r = NEW;
+               trace_xfs_rmap_insert(mp, cur->bc_private.a.agno, bno,
+                               len, owner, offset, newext);
+               error = xfs_btree_insert(cur, &i);
+               if (error)
+                       goto done;
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+               break;
+
+       case 0:
+               /*
+                * Setting the middle part of a previous oldext extent to
+                * newext.  Contiguity is impossible here.
+                * One extent becomes three extents.
+                */
+               /* new right extent - oldext */
+               NEW.rm_startblock = bno + len;
+               NEW.rm_owner = owner;
+               NEW.rm_offset = new_endoff;
+               NEW.rm_blockcount = PREV.rm_offset + PREV.rm_blockcount -
+                               new_endoff;
+               NEW.rm_flags = PREV.rm_flags;
+               error = xfs_rmap_update(cur, &NEW);
+               if (error)
+                       goto done;
+               /* new left extent - oldext */
+               NEW = PREV;
+               NEW.rm_blockcount = offset - PREV.rm_offset;
+               cur->bc_rec.r = NEW;
+               trace_xfs_rmap_insert(mp, cur->bc_private.a.agno,
+                               NEW.rm_startblock, NEW.rm_blockcount,
+                               NEW.rm_owner, NEW.rm_offset,
+                               NEW.rm_flags);
+               error = xfs_btree_insert(cur, &i);
+               if (error)
+                       goto done;
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+               /*
+                * Reset the cursor to the position of the new extent
+                * we are about to insert as we can't trust it after
+                * the previous insert.
+                */
+               error = xfs_rmap_lookup_eq(cur, bno, len, owner, offset,
+                               oldext, &i);
+               if (error)
+                       goto done;
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
+               /* new middle extent - newext */
+               cur->bc_rec.r.rm_flags &= ~XFS_RMAP_UNWRITTEN;
+               cur->bc_rec.r.rm_flags |= newext;
+               trace_xfs_rmap_insert(mp, cur->bc_private.a.agno, bno, len,
+                               owner, offset, newext);
+               error = xfs_btree_insert(cur, &i);
+               if (error)
+                       goto done;
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+               break;
+
+       case RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG | RMAP_RIGHT_CONTIG:
+       case RMAP_RIGHT_FILLING | RMAP_LEFT_CONTIG | RMAP_RIGHT_CONTIG:
+       case RMAP_LEFT_FILLING | RMAP_RIGHT_CONTIG:
+       case RMAP_RIGHT_FILLING | RMAP_LEFT_CONTIG:
+       case RMAP_LEFT_CONTIG | RMAP_RIGHT_CONTIG:
+       case RMAP_LEFT_CONTIG:
+       case RMAP_RIGHT_CONTIG:
+               /*
+                * These cases are all impossible.
+                */
+               ASSERT(0);
+       }
+
+       trace_xfs_rmap_convert_done(mp, cur->bc_private.a.agno, bno, len,
+                       unwritten, oinfo);
+done:
+       if (error)
+               trace_xfs_rmap_convert_error(cur->bc_mp,
+                               cur->bc_private.a.agno, error, _RET_IP_);
+       return error;
+}
+
+#undef NEW
+#undef LEFT
+#undef RIGHT
+#undef PREV
+
+struct xfs_rmap_query_range_info {
+       xfs_rmap_query_range_fn fn;
+       void                            *priv;
+};
+
+/* Format btree record and pass to our callback. */
+STATIC int
+xfs_rmap_query_range_helper(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_rec     *rec,
+       void                    *priv)
+{
+       struct xfs_rmap_query_range_info        *query = priv;
+       struct xfs_rmap_irec                    irec;
+       int                                     error;
+
+       error = xfs_rmap_btrec_to_irec(rec, &irec);
+       if (error)
+               return error;
+       return query->fn(cur, &irec, query->priv);
+}
+
+/* Find all rmaps between two keys. */
+int
+xfs_rmap_query_range(
+       struct xfs_btree_cur            *cur,
+       struct xfs_rmap_irec            *low_rec,
+       struct xfs_rmap_irec            *high_rec,
+       xfs_rmap_query_range_fn fn,
+       void                            *priv)
+{
+       union xfs_btree_irec            low_brec;
+       union xfs_btree_irec            high_brec;
+       struct xfs_rmap_query_range_info        query;
+
+       low_brec.r = *low_rec;
+       high_brec.r = *high_rec;
+       query.priv = priv;
+       query.fn = fn;
+       return xfs_btree_query_range(cur, &low_brec, &high_brec,
+                       xfs_rmap_query_range_helper, &query);
+}
+
+/* Clean up after calling xfs_rmap_finish_one. */
+void
+xfs_rmap_finish_one_cleanup(
+       struct xfs_trans        *tp,
+       struct xfs_btree_cur    *rcur,
+       int                     error)
+{
+       struct xfs_buf          *agbp;
+
+       if (rcur == NULL)
+               return;
+       agbp = rcur->bc_private.a.agbp;
+       xfs_btree_del_cursor(rcur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+       if (error)
+               xfs_trans_brelse(tp, agbp);
+}
+
+/*
+ * Process one of the deferred rmap operations.  We pass back the
+ * btree cursor to maintain our lock on the rmapbt between calls.
+ * This saves time and eliminates a buffer deadlock between the
+ * superblock and the AGF because we'll always grab them in the same
+ * order.
+ */
+int
+xfs_rmap_finish_one(
+       struct xfs_trans                *tp,
+       enum xfs_rmap_intent_type       type,
+       __uint64_t                      owner,
+       int                             whichfork,
+       xfs_fileoff_t                   startoff,
+       xfs_fsblock_t                   startblock,
+       xfs_filblks_t                   blockcount,
+       xfs_exntst_t                    state,
+       struct xfs_btree_cur            **pcur)
+{
+       struct xfs_mount                *mp = tp->t_mountp;
+       struct xfs_btree_cur            *rcur;
+       struct xfs_buf                  *agbp = NULL;
+       int                             error = 0;
+       xfs_agnumber_t                  agno;
+       struct xfs_owner_info           oinfo;
+       xfs_agblock_t                   bno;
+       bool                            unwritten;
+
+       agno = XFS_FSB_TO_AGNO(mp, startblock);
+       ASSERT(agno != NULLAGNUMBER);
+       bno = XFS_FSB_TO_AGBNO(mp, startblock);
+
+       trace_xfs_rmap_deferred(mp, agno, type, bno, owner, whichfork,
+                       startoff, blockcount, state);
+
+       if (XFS_TEST_ERROR(false, mp,
+                       XFS_ERRTAG_RMAP_FINISH_ONE,
+                       XFS_RANDOM_RMAP_FINISH_ONE))
+               return -EIO;
+
+       /*
+        * If we haven't gotten a cursor or the cursor AG doesn't match
+        * the startblock, get one now.
+        */
+       rcur = *pcur;
+       if (rcur != NULL && rcur->bc_private.a.agno != agno) {
+               xfs_rmap_finish_one_cleanup(tp, rcur, 0);
+               rcur = NULL;
+               *pcur = NULL;
+       }
+       if (rcur == NULL) {
+               /*
+                * Refresh the freelist before we start changing the
+                * rmapbt, because a shape change could cause us to
+                * allocate blocks.
+                */
+               error = xfs_free_extent_fix_freelist(tp, agno, &agbp);
+               if (error)
+                       return error;
+               if (!agbp)
+                       return -EFSCORRUPTED;
+
+               rcur = xfs_rmapbt_init_cursor(mp, tp, agbp, agno);
+               if (!rcur) {
+                       error = -ENOMEM;
+                       goto out_cur;
+               }
+       }
+       *pcur = rcur;
+
+       xfs_rmap_ino_owner(&oinfo, owner, whichfork, startoff);
+       unwritten = state == XFS_EXT_UNWRITTEN;
+       bno = XFS_FSB_TO_AGBNO(rcur->bc_mp, startblock);
+
+       switch (type) {
+       case XFS_RMAP_ALLOC:
+       case XFS_RMAP_MAP:
+               error = xfs_rmap_map(rcur, bno, blockcount, unwritten, &oinfo);
+               break;
+       case XFS_RMAP_FREE:
+       case XFS_RMAP_UNMAP:
+               error = xfs_rmap_unmap(rcur, bno, blockcount, unwritten,
+                               &oinfo);
+               break;
+       case XFS_RMAP_CONVERT:
+               error = xfs_rmap_convert(rcur, bno, blockcount, !unwritten,
+                               &oinfo);
+               break;
+       default:
+               ASSERT(0);
+               error = -EFSCORRUPTED;
+       }
+       return error;
+
+out_cur:
+       xfs_trans_brelse(tp, agbp);
+
+       return error;
+}
+
+/*
+ * Don't defer an rmap if we aren't an rmap filesystem.
+ */
+static bool
+xfs_rmap_update_is_needed(
+       struct xfs_mount        *mp)
+{
+       return xfs_sb_version_hasrmapbt(&mp->m_sb);
+}
+
+/*
+ * Record a rmap intent; the list is kept sorted first by AG and then by
+ * increasing age.
+ */
+static int
+__xfs_rmap_add(
+       struct xfs_mount                *mp,
+       struct xfs_defer_ops            *dfops,
+       enum xfs_rmap_intent_type       type,
+       __uint64_t                      owner,
+       int                             whichfork,
+       struct xfs_bmbt_irec            *bmap)
+{
+       struct xfs_rmap_intent  *ri;
+
+       trace_xfs_rmap_defer(mp, XFS_FSB_TO_AGNO(mp, bmap->br_startblock),
+                       type,
+                       XFS_FSB_TO_AGBNO(mp, bmap->br_startblock),
+                       owner, whichfork,
+                       bmap->br_startoff,
+                       bmap->br_blockcount,
+                       bmap->br_state);
+
+       ri = kmem_alloc(sizeof(struct xfs_rmap_intent), KM_SLEEP | KM_NOFS);
+       INIT_LIST_HEAD(&ri->ri_list);
+       ri->ri_type = type;
+       ri->ri_owner = owner;
+       ri->ri_whichfork = whichfork;
+       ri->ri_bmap = *bmap;
+
+       xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_RMAP, &ri->ri_list);
+       return 0;
+}
+
+/* Map an extent into a file. */
+int
+xfs_rmap_map_extent(
+       struct xfs_mount        *mp,
+       struct xfs_defer_ops    *dfops,
+       struct xfs_inode        *ip,
+       int                     whichfork,
+       struct xfs_bmbt_irec    *PREV)
+{
+       if (!xfs_rmap_update_is_needed(mp))
+               return 0;
+
+       return __xfs_rmap_add(mp, dfops, XFS_RMAP_MAP, ip->i_ino,
+                       whichfork, PREV);
+}
+
+/* Unmap an extent out of a file. */
+int
+xfs_rmap_unmap_extent(
+       struct xfs_mount        *mp,
+       struct xfs_defer_ops    *dfops,
+       struct xfs_inode        *ip,
+       int                     whichfork,
+       struct xfs_bmbt_irec    *PREV)
+{
+       if (!xfs_rmap_update_is_needed(mp))
+               return 0;
+
+       return __xfs_rmap_add(mp, dfops, XFS_RMAP_UNMAP, ip->i_ino,
+                       whichfork, PREV);
+}
+
+/* Convert a data fork extent from unwritten to real or vice versa. */
+int
+xfs_rmap_convert_extent(
+       struct xfs_mount        *mp,
+       struct xfs_defer_ops    *dfops,
+       struct xfs_inode        *ip,
+       int                     whichfork,
+       struct xfs_bmbt_irec    *PREV)
+{
+       if (!xfs_rmap_update_is_needed(mp))
+               return 0;
+
+       return __xfs_rmap_add(mp, dfops, XFS_RMAP_CONVERT, ip->i_ino,
+                       whichfork, PREV);
+}
+
+/* Schedule the creation of an rmap for non-file data. */
+int
+xfs_rmap_alloc_extent(
+       struct xfs_mount        *mp,
+       struct xfs_defer_ops    *dfops,
+       xfs_agnumber_t          agno,
+       xfs_agblock_t           bno,
+       xfs_extlen_t            len,
+       __uint64_t              owner)
+{
+       struct xfs_bmbt_irec    bmap;
+
+       if (!xfs_rmap_update_is_needed(mp))
+               return 0;
+
+       bmap.br_startblock = XFS_AGB_TO_FSB(mp, agno, bno);
+       bmap.br_blockcount = len;
+       bmap.br_startoff = 0;
+       bmap.br_state = XFS_EXT_NORM;
+
+       return __xfs_rmap_add(mp, dfops, XFS_RMAP_ALLOC, owner,
+                       XFS_DATA_FORK, &bmap);
+}
+
+/* Schedule the deletion of an rmap for non-file data. */
+int
+xfs_rmap_free_extent(
+       struct xfs_mount        *mp,
+       struct xfs_defer_ops    *dfops,
+       xfs_agnumber_t          agno,
+       xfs_agblock_t           bno,
+       xfs_extlen_t            len,
+       __uint64_t              owner)
+{
+       struct xfs_bmbt_irec    bmap;
+
+       if (!xfs_rmap_update_is_needed(mp))
+               return 0;
+
+       bmap.br_startblock = XFS_AGB_TO_FSB(mp, agno, bno);
+       bmap.br_blockcount = len;
+       bmap.br_startoff = 0;
+       bmap.br_state = XFS_EXT_NORM;
+
+       return __xfs_rmap_add(mp, dfops, XFS_RMAP_FREE, owner,
+                       XFS_DATA_FORK, &bmap);
+}
diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h

new file mode 100644 (file)

index 0000000..71cf99a
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_rmap.h
@@ -0,0 +1,209 @@
+/*
+ * Copyright (C) 2016 Oracle.  All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#ifndef __XFS_RMAP_H__
+#define __XFS_RMAP_H__
+
+static inline void
+xfs_rmap_ag_owner(
+       struct xfs_owner_info   *oi,
+       uint64_t                owner)
+{
+       oi->oi_owner = owner;
+       oi->oi_offset = 0;
+       oi->oi_flags = 0;
+}
+
+static inline void
+xfs_rmap_ino_bmbt_owner(
+       struct xfs_owner_info   *oi,
+       xfs_ino_t               ino,
+       int                     whichfork)
+{
+       oi->oi_owner = ino;
+       oi->oi_offset = 0;
+       oi->oi_flags = XFS_OWNER_INFO_BMBT_BLOCK;
+       if (whichfork == XFS_ATTR_FORK)
+               oi->oi_flags |= XFS_OWNER_INFO_ATTR_FORK;
+}
+
+static inline void
+xfs_rmap_ino_owner(
+       struct xfs_owner_info   *oi,
+       xfs_ino_t               ino,
+       int                     whichfork,
+       xfs_fileoff_t           offset)
+{
+       oi->oi_owner = ino;
+       oi->oi_offset = offset;
+       oi->oi_flags = 0;
+       if (whichfork == XFS_ATTR_FORK)
+               oi->oi_flags |= XFS_OWNER_INFO_ATTR_FORK;
+}
+
+static inline void
+xfs_rmap_skip_owner_update(
+       struct xfs_owner_info   *oi)
+{
+       oi->oi_owner = XFS_RMAP_OWN_UNKNOWN;
+}
+
+/* Reverse mapping functions. */
+
+struct xfs_buf;
+
+static inline __u64
+xfs_rmap_irec_offset_pack(
+       const struct xfs_rmap_irec      *irec)
+{
+       __u64                   x;
+
+       x = XFS_RMAP_OFF(irec->rm_offset);
+       if (irec->rm_flags & XFS_RMAP_ATTR_FORK)
+               x |= XFS_RMAP_OFF_ATTR_FORK;
+       if (irec->rm_flags & XFS_RMAP_BMBT_BLOCK)
+               x |= XFS_RMAP_OFF_BMBT_BLOCK;
+       if (irec->rm_flags & XFS_RMAP_UNWRITTEN)
+               x |= XFS_RMAP_OFF_UNWRITTEN;
+       return x;
+}
+
+static inline int
+xfs_rmap_irec_offset_unpack(
+       __u64                   offset,
+       struct xfs_rmap_irec    *irec)
+{
+       if (offset & ~(XFS_RMAP_OFF_MASK | XFS_RMAP_OFF_FLAGS))
+               return -EFSCORRUPTED;
+       irec->rm_offset = XFS_RMAP_OFF(offset);
+       if (offset & XFS_RMAP_OFF_ATTR_FORK)
+               irec->rm_flags |= XFS_RMAP_ATTR_FORK;
+       if (offset & XFS_RMAP_OFF_BMBT_BLOCK)
+               irec->rm_flags |= XFS_RMAP_BMBT_BLOCK;
+       if (offset & XFS_RMAP_OFF_UNWRITTEN)
+               irec->rm_flags |= XFS_RMAP_UNWRITTEN;
+       return 0;
+}
+
+static inline void
+xfs_owner_info_unpack(
+       struct xfs_owner_info   *oinfo,
+       uint64_t                *owner,
+       uint64_t                *offset,
+       unsigned int            *flags)
+{
+       unsigned int            r = 0;
+
+       *owner = oinfo->oi_owner;
+       *offset = oinfo->oi_offset;
+       if (oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK)
+               r |= XFS_RMAP_ATTR_FORK;
+       if (oinfo->oi_flags & XFS_OWNER_INFO_BMBT_BLOCK)
+               r |= XFS_RMAP_BMBT_BLOCK;
+       *flags = r;
+}
+
+static inline void
+xfs_owner_info_pack(
+       struct xfs_owner_info   *oinfo,
+       uint64_t                owner,
+       uint64_t                offset,
+       unsigned int            flags)
+{
+       oinfo->oi_owner = owner;
+       oinfo->oi_offset = XFS_RMAP_OFF(offset);
+       oinfo->oi_flags = 0;
+       if (flags & XFS_RMAP_ATTR_FORK)
+               oinfo->oi_flags |= XFS_OWNER_INFO_ATTR_FORK;
+       if (flags & XFS_RMAP_BMBT_BLOCK)
+               oinfo->oi_flags |= XFS_OWNER_INFO_BMBT_BLOCK;
+}
+
+int xfs_rmap_alloc(struct xfs_trans *tp, struct xfs_buf *agbp,
+                  xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len,
+                  struct xfs_owner_info *oinfo);
+int xfs_rmap_free(struct xfs_trans *tp, struct xfs_buf *agbp,
+                 xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len,
+                 struct xfs_owner_info *oinfo);
+
+int xfs_rmap_lookup_le(struct xfs_btree_cur *cur, xfs_agblock_t bno,
+               xfs_extlen_t len, uint64_t owner, uint64_t offset,
+               unsigned int flags, int *stat);
+int xfs_rmap_lookup_eq(struct xfs_btree_cur *cur, xfs_agblock_t bno,
+               xfs_extlen_t len, uint64_t owner, uint64_t offset,
+               unsigned int flags, int *stat);
+int xfs_rmap_insert(struct xfs_btree_cur *rcur, xfs_agblock_t agbno,
+               xfs_extlen_t len, uint64_t owner, uint64_t offset,
+               unsigned int flags);
+int xfs_rmap_get_rec(struct xfs_btree_cur *cur, struct xfs_rmap_irec *irec,
+               int *stat);
+
+typedef int (*xfs_rmap_query_range_fn)(
+       struct xfs_btree_cur    *cur,
+       struct xfs_rmap_irec    *rec,
+       void                    *priv);
+
+int xfs_rmap_query_range(struct xfs_btree_cur *cur,
+               struct xfs_rmap_irec *low_rec, struct xfs_rmap_irec *high_rec,
+               xfs_rmap_query_range_fn fn, void *priv);
+
+enum xfs_rmap_intent_type {
+       XFS_RMAP_MAP,
+       XFS_RMAP_MAP_SHARED,
+       XFS_RMAP_UNMAP,
+       XFS_RMAP_UNMAP_SHARED,
+       XFS_RMAP_CONVERT,
+       XFS_RMAP_CONVERT_SHARED,
+       XFS_RMAP_ALLOC,
+       XFS_RMAP_FREE,
+};
+
+struct xfs_rmap_intent {
+       struct list_head                        ri_list;
+       enum xfs_rmap_intent_type               ri_type;
+       __uint64_t                              ri_owner;
+       int                                     ri_whichfork;
+       struct xfs_bmbt_irec                    ri_bmap;
+};
+
+/* functions for updating the rmapbt based on bmbt map/unmap operations */
+int xfs_rmap_map_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
+               struct xfs_inode *ip, int whichfork,
+               struct xfs_bmbt_irec *imap);
+int xfs_rmap_unmap_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
+               struct xfs_inode *ip, int whichfork,
+               struct xfs_bmbt_irec *imap);
+int xfs_rmap_convert_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
+               struct xfs_inode *ip, int whichfork,
+               struct xfs_bmbt_irec *imap);
+int xfs_rmap_alloc_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
+               xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len,
+               __uint64_t owner);
+int xfs_rmap_free_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
+               xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len,
+               __uint64_t owner);
+
+void xfs_rmap_finish_one_cleanup(struct xfs_trans *tp,
+               struct xfs_btree_cur *rcur, int error);
+int xfs_rmap_finish_one(struct xfs_trans *tp, enum xfs_rmap_intent_type type,
+               __uint64_t owner, int whichfork, xfs_fileoff_t startoff,
+               xfs_fsblock_t startblock, xfs_filblks_t blockcount,
+               xfs_exntst_t state, struct xfs_btree_cur **pcur);
+
+#endif /* __XFS_RMAP_H__ */
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c

new file mode 100644 (file)

index 0000000..bc1faeb
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_rmap_btree.c
@@ -0,0 +1,511 @@
+/*
+ * Copyright (c) 2014 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_bit.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_inode.h"
+#include "xfs_trans.h"
+#include "xfs_alloc.h"
+#include "xfs_btree.h"
+#include "xfs_rmap.h"
+#include "xfs_rmap_btree.h"
+#include "xfs_trace.h"
+#include "xfs_cksum.h"
+#include "xfs_error.h"
+#include "xfs_extent_busy.h"
+
+/*
+ * Reverse map btree.
+ *
+ * This is a per-ag tree used to track the owner(s) of a given extent. With
+ * reflink it is possible for there to be multiple owners, which is a departure
+ * from classic XFS. Owner records for data extents are inserted when the
+ * extent is mapped and removed when an extent is unmapped.  Owner records for
+ * all other block types (i.e. metadata) are inserted when an extent is
+ * allocated and removed when an extent is freed. There can only be one owner
+ * of a metadata extent, usually an inode or some other metadata structure like
+ * an AG btree.
+ *
+ * The rmap btree is part of the free space management, so blocks for the tree
+ * are sourced from the agfl. Hence we need transaction reservation support for
+ * this tree so that the freelist is always large enough. This also impacts on
+ * the minimum space we need to leave free in the AG.
+ *
+ * The tree is ordered by [ag block, owner, offset]. This is a large key size,
+ * but it is the only way to enforce unique keys when a block can be owned by
+ * multiple files at any offset. There's no need to order/search by extent
+ * size for online updating/management of the tree. It is intended that most
+ * reverse lookups will be to find the owner(s) of a particular block, or to
+ * try to recover tree and file data from corrupt primary metadata.
+ */
+
+static struct xfs_btree_cur *
+xfs_rmapbt_dup_cursor(
+       struct xfs_btree_cur    *cur)
+{
+       return xfs_rmapbt_init_cursor(cur->bc_mp, cur->bc_tp,
+                       cur->bc_private.a.agbp, cur->bc_private.a.agno);
+}
+
+STATIC void
+xfs_rmapbt_set_root(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_ptr     *ptr,
+       int                     inc)
+{
+       struct xfs_buf          *agbp = cur->bc_private.a.agbp;
+       struct xfs_agf          *agf = XFS_BUF_TO_AGF(agbp);
+       xfs_agnumber_t          seqno = be32_to_cpu(agf->agf_seqno);
+       int                     btnum = cur->bc_btnum;
+       struct xfs_perag        *pag = xfs_perag_get(cur->bc_mp, seqno);
+
+       ASSERT(ptr->s != 0);
+
+       agf->agf_roots[btnum] = ptr->s;
+       be32_add_cpu(&agf->agf_levels[btnum], inc);
+       pag->pagf_levels[btnum] += inc;
+       xfs_perag_put(pag);
+
+       xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
+}
+
+STATIC int
+xfs_rmapbt_alloc_block(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_ptr     *start,
+       union xfs_btree_ptr     *new,
+       int                     *stat)
+{
+       int                     error;
+       xfs_agblock_t           bno;
+
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+
+       /* Allocate the new block from the freelist. If we can't, give up.  */
+       error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
+                                      &bno, 1);
+       if (error) {
+               XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+               return error;
+       }
+
+       trace_xfs_rmapbt_alloc_block(cur->bc_mp, cur->bc_private.a.agno,
+                       bno, 1);
+       if (bno == NULLAGBLOCK) {
+               XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+               *stat = 0;
+               return 0;
+       }
+
+       xfs_extent_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1,
+                       false);
+
+       xfs_trans_agbtree_delta(cur->bc_tp, 1);
+       new->s = cpu_to_be32(bno);
+
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+       *stat = 1;
+       return 0;
+}
+
+STATIC int
+xfs_rmapbt_free_block(
+       struct xfs_btree_cur    *cur,
+       struct xfs_buf          *bp)
+{
+       struct xfs_buf          *agbp = cur->bc_private.a.agbp;
+       struct xfs_agf          *agf = XFS_BUF_TO_AGF(agbp);
+       xfs_agblock_t           bno;
+       int                     error;
+
+       bno = xfs_daddr_to_agbno(cur->bc_mp, XFS_BUF_ADDR(bp));
+       trace_xfs_rmapbt_free_block(cur->bc_mp, cur->bc_private.a.agno,
+                       bno, 1);
+       error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1);
+       if (error)
+               return error;
+
+       xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1,
+                             XFS_EXTENT_BUSY_SKIP_DISCARD);
+       xfs_trans_agbtree_delta(cur->bc_tp, -1);
+
+       return 0;
+}
+
+STATIC int
+xfs_rmapbt_get_minrecs(
+       struct xfs_btree_cur    *cur,
+       int                     level)
+{
+       return cur->bc_mp->m_rmap_mnr[level != 0];
+}
+
+STATIC int
+xfs_rmapbt_get_maxrecs(
+       struct xfs_btree_cur    *cur,
+       int                     level)
+{
+       return cur->bc_mp->m_rmap_mxr[level != 0];
+}
+
+STATIC void
+xfs_rmapbt_init_key_from_rec(
+       union xfs_btree_key     *key,
+       union xfs_btree_rec     *rec)
+{
+       key->rmap.rm_startblock = rec->rmap.rm_startblock;
+       key->rmap.rm_owner = rec->rmap.rm_owner;
+       key->rmap.rm_offset = rec->rmap.rm_offset;
+}
+
+/*
+ * The high key for a reverse mapping record can be computed by shifting
+ * the startblock and offset to the highest value that would still map
+ * to that record.  In practice this means that we add blockcount-1 to
+ * the startblock for all records, and if the record is for a data/attr
+ * fork mapping, we add blockcount-1 to the offset too.
+ */
+STATIC void
+xfs_rmapbt_init_high_key_from_rec(
+       union xfs_btree_key     *key,
+       union xfs_btree_rec     *rec)
+{
+       __uint64_t              off;
+       int                     adj;
+
+       adj = be32_to_cpu(rec->rmap.rm_blockcount) - 1;
+
+       key->rmap.rm_startblock = rec->rmap.rm_startblock;
+       be32_add_cpu(&key->rmap.rm_startblock, adj);
+       key->rmap.rm_owner = rec->rmap.rm_owner;
+       key->rmap.rm_offset = rec->rmap.rm_offset;
+       if (XFS_RMAP_NON_INODE_OWNER(be64_to_cpu(rec->rmap.rm_owner)) ||
+           XFS_RMAP_IS_BMBT_BLOCK(be64_to_cpu(rec->rmap.rm_offset)))
+               return;
+       off = be64_to_cpu(key->rmap.rm_offset);
+       off = (XFS_RMAP_OFF(off) + adj) | (off & ~XFS_RMAP_OFF_MASK);
+       key->rmap.rm_offset = cpu_to_be64(off);
+}
+
+STATIC void
+xfs_rmapbt_init_rec_from_cur(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_rec     *rec)
+{
+       rec->rmap.rm_startblock = cpu_to_be32(cur->bc_rec.r.rm_startblock);
+       rec->rmap.rm_blockcount = cpu_to_be32(cur->bc_rec.r.rm_blockcount);
+       rec->rmap.rm_owner = cpu_to_be64(cur->bc_rec.r.rm_owner);
+       rec->rmap.rm_offset = cpu_to_be64(
+                       xfs_rmap_irec_offset_pack(&cur->bc_rec.r));
+}
+
+STATIC void
+xfs_rmapbt_init_ptr_from_cur(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_ptr     *ptr)
+{
+       struct xfs_agf          *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
+
+       ASSERT(cur->bc_private.a.agno == be32_to_cpu(agf->agf_seqno));
+       ASSERT(agf->agf_roots[cur->bc_btnum] != 0);
+
+       ptr->s = agf->agf_roots[cur->bc_btnum];
+}
+
+STATIC __int64_t
+xfs_rmapbt_key_diff(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_key     *key)
+{
+       struct xfs_rmap_irec    *rec = &cur->bc_rec.r;
+       struct xfs_rmap_key     *kp = &key->rmap;
+       __u64                   x, y;
+       __int64_t               d;
+
+       d = (__int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock;
+       if (d)
+               return d;
+
+       x = be64_to_cpu(kp->rm_owner);
+       y = rec->rm_owner;
+       if (x > y)
+               return 1;
+       else if (y > x)
+               return -1;
+
+       x = XFS_RMAP_OFF(be64_to_cpu(kp->rm_offset));
+       y = rec->rm_offset;
+       if (x > y)
+               return 1;
+       else if (y > x)
+               return -1;
+       return 0;
+}
+
+STATIC __int64_t
+xfs_rmapbt_diff_two_keys(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_key     *k1,
+       union xfs_btree_key     *k2)
+{
+       struct xfs_rmap_key     *kp1 = &k1->rmap;
+       struct xfs_rmap_key     *kp2 = &k2->rmap;
+       __int64_t               d;
+       __u64                   x, y;
+
+       d = (__int64_t)be32_to_cpu(kp1->rm_startblock) -
+                      be32_to_cpu(kp2->rm_startblock);
+       if (d)
+               return d;
+
+       x = be64_to_cpu(kp1->rm_owner);
+       y = be64_to_cpu(kp2->rm_owner);
+       if (x > y)
+               return 1;
+       else if (y > x)
+               return -1;
+
+       x = XFS_RMAP_OFF(be64_to_cpu(kp1->rm_offset));
+       y = XFS_RMAP_OFF(be64_to_cpu(kp2->rm_offset));
+       if (x > y)
+               return 1;
+       else if (y > x)
+               return -1;
+       return 0;
+}
+
+static bool
+xfs_rmapbt_verify(
+       struct xfs_buf          *bp)
+{
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
+       struct xfs_perag        *pag = bp->b_pag;
+       unsigned int            level;
+
+       /*
+        * magic number and level verification
+        *
+        * During growfs operations, we can't verify the exact level or owner as
+        * the perag is not fully initialised and hence not attached to the
+        * buffer.  In this case, check against the maximum tree depth.
+        *
+        * Similarly, during log recovery we will have a perag structure
+        * attached, but the agf information will not yet have been initialised
+        * from the on disk AGF. Again, we can only check against maximum limits
+        * in this case.
+        */
+       if (block->bb_magic != cpu_to_be32(XFS_RMAP_CRC_MAGIC))
+               return false;
+
+       if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+               return false;
+       if (!xfs_btree_sblock_v5hdr_verify(bp))
+               return false;
+
+       level = be16_to_cpu(block->bb_level);
+       if (pag && pag->pagf_init) {
+               if (level >= pag->pagf_levels[XFS_BTNUM_RMAPi])
+                       return false;
+       } else if (level >= mp->m_rmap_maxlevels)
+               return false;
+
+       return xfs_btree_sblock_verify(bp, mp->m_rmap_mxr[level != 0]);
+}
+
+static void
+xfs_rmapbt_read_verify(
+       struct xfs_buf  *bp)
+{
+       if (!xfs_btree_sblock_verify_crc(bp))
+               xfs_buf_ioerror(bp, -EFSBADCRC);
+       else if (!xfs_rmapbt_verify(bp))
+               xfs_buf_ioerror(bp, -EFSCORRUPTED);
+
+       if (bp->b_error) {
+               trace_xfs_btree_corrupt(bp, _RET_IP_);
+               xfs_verifier_error(bp);
+       }
+}
+
+static void
+xfs_rmapbt_write_verify(
+       struct xfs_buf  *bp)
+{
+       if (!xfs_rmapbt_verify(bp)) {
+               trace_xfs_btree_corrupt(bp, _RET_IP_);
+               xfs_buf_ioerror(bp, -EFSCORRUPTED);
+               xfs_verifier_error(bp);
+               return;
+       }
+       xfs_btree_sblock_calc_crc(bp);
+
+}
+
+const struct xfs_buf_ops xfs_rmapbt_buf_ops = {
+       .name                   = "xfs_rmapbt",
+       .verify_read            = xfs_rmapbt_read_verify,
+       .verify_write           = xfs_rmapbt_write_verify,
+};
+
+#if defined(DEBUG) || defined(XFS_WARN)
+STATIC int
+xfs_rmapbt_keys_inorder(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_key     *k1,
+       union xfs_btree_key     *k2)
+{
+       __uint32_t              x;
+       __uint32_t              y;
+       __uint64_t              a;
+       __uint64_t              b;
+
+       x = be32_to_cpu(k1->rmap.rm_startblock);
+       y = be32_to_cpu(k2->rmap.rm_startblock);
+       if (x < y)
+               return 1;
+       else if (x > y)
+               return 0;
+       a = be64_to_cpu(k1->rmap.rm_owner);
+       b = be64_to_cpu(k2->rmap.rm_owner);
+       if (a < b)
+               return 1;
+       else if (a > b)
+               return 0;
+       a = XFS_RMAP_OFF(be64_to_cpu(k1->rmap.rm_offset));
+       b = XFS_RMAP_OFF(be64_to_cpu(k2->rmap.rm_offset));
+       if (a <= b)
+               return 1;
+       return 0;
+}
+
+STATIC int
+xfs_rmapbt_recs_inorder(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_rec     *r1,
+       union xfs_btree_rec     *r2)
+{
+       __uint32_t              x;
+       __uint32_t              y;
+       __uint64_t              a;
+       __uint64_t              b;
+
+       x = be32_to_cpu(r1->rmap.rm_startblock);
+       y = be32_to_cpu(r2->rmap.rm_startblock);
+       if (x < y)
+               return 1;
+       else if (x > y)
+               return 0;
+       a = be64_to_cpu(r1->rmap.rm_owner);
+       b = be64_to_cpu(r2->rmap.rm_owner);
+       if (a < b)
+               return 1;
+       else if (a > b)
+               return 0;
+       a = XFS_RMAP_OFF(be64_to_cpu(r1->rmap.rm_offset));
+       b = XFS_RMAP_OFF(be64_to_cpu(r2->rmap.rm_offset));
+       if (a <= b)
+               return 1;
+       return 0;
+}
+#endif /* DEBUG */
+
+static const struct xfs_btree_ops xfs_rmapbt_ops = {
+       .rec_len                = sizeof(struct xfs_rmap_rec),
+       .key_len                = 2 * sizeof(struct xfs_rmap_key),
+
+       .dup_cursor             = xfs_rmapbt_dup_cursor,
+       .set_root               = xfs_rmapbt_set_root,
+       .alloc_block            = xfs_rmapbt_alloc_block,
+       .free_block             = xfs_rmapbt_free_block,
+       .get_minrecs            = xfs_rmapbt_get_minrecs,
+       .get_maxrecs            = xfs_rmapbt_get_maxrecs,
+       .init_key_from_rec      = xfs_rmapbt_init_key_from_rec,
+       .init_high_key_from_rec = xfs_rmapbt_init_high_key_from_rec,
+       .init_rec_from_cur      = xfs_rmapbt_init_rec_from_cur,
+       .init_ptr_from_cur      = xfs_rmapbt_init_ptr_from_cur,
+       .key_diff               = xfs_rmapbt_key_diff,
+       .buf_ops                = &xfs_rmapbt_buf_ops,
+       .diff_two_keys          = xfs_rmapbt_diff_two_keys,
+#if defined(DEBUG) || defined(XFS_WARN)
+       .keys_inorder           = xfs_rmapbt_keys_inorder,
+       .recs_inorder           = xfs_rmapbt_recs_inorder,
+#endif
+};
+
+/*
+ * Allocate a new allocation btree cursor.
+ */
+struct xfs_btree_cur *
+xfs_rmapbt_init_cursor(
+       struct xfs_mount        *mp,
+       struct xfs_trans        *tp,
+       struct xfs_buf          *agbp,
+       xfs_agnumber_t          agno)
+{
+       struct xfs_agf          *agf = XFS_BUF_TO_AGF(agbp);
+       struct xfs_btree_cur    *cur;
+
+       cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS);
+       cur->bc_tp = tp;
+       cur->bc_mp = mp;
+       /* Overlapping btree; 2 keys per pointer. */
+       cur->bc_btnum = XFS_BTNUM_RMAP;
+       cur->bc_flags = XFS_BTREE_CRC_BLOCKS | XFS_BTREE_OVERLAPPING;
+       cur->bc_blocklog = mp->m_sb.sb_blocklog;
+       cur->bc_ops = &xfs_rmapbt_ops;
+       cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]);
+
+       cur->bc_private.a.agbp = agbp;
+       cur->bc_private.a.agno = agno;
+
+       return cur;
+}
+
+/*
+ * Calculate number of records in an rmap btree block.
+ */
+int
+xfs_rmapbt_maxrecs(
+       struct xfs_mount        *mp,
+       int                     blocklen,
+       int                     leaf)
+{
+       blocklen -= XFS_RMAP_BLOCK_LEN;
+
+       if (leaf)
+               return blocklen / sizeof(struct xfs_rmap_rec);
+       return blocklen /
+               (2 * sizeof(struct xfs_rmap_key) + sizeof(xfs_rmap_ptr_t));
+}
+
+/* Compute the maximum height of an rmap btree. */
+void
+xfs_rmapbt_compute_maxlevels(
+       struct xfs_mount                *mp)
+{
+       mp->m_rmap_maxlevels = xfs_btree_compute_maxlevels(mp,
+                       mp->m_rmap_mnr, mp->m_sb.sb_agblocks);
+}
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.h b/fs/xfs/libxfs/xfs_rmap_btree.h

new file mode 100644 (file)

index 0000000..e73a553
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_rmap_btree.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2014 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_RMAP_BTREE_H__
+#define __XFS_RMAP_BTREE_H__
+
+struct xfs_buf;
+struct xfs_btree_cur;
+struct xfs_mount;
+
+/* rmaps only exist on crc enabled filesystems */
+#define XFS_RMAP_BLOCK_LEN     XFS_BTREE_SBLOCK_CRC_LEN
+
+/*
+ * Record, key, and pointer address macros for btree blocks.
+ *
+ * (note that some of these may appear unused, but they are used in userspace)
+ */
+#define XFS_RMAP_REC_ADDR(block, index) \
+       ((struct xfs_rmap_rec *) \
+               ((char *)(block) + XFS_RMAP_BLOCK_LEN + \
+                (((index) - 1) * sizeof(struct xfs_rmap_rec))))
+
+#define XFS_RMAP_KEY_ADDR(block, index) \
+       ((struct xfs_rmap_key *) \
+               ((char *)(block) + XFS_RMAP_BLOCK_LEN + \
+                ((index) - 1) * 2 * sizeof(struct xfs_rmap_key)))
+
+#define XFS_RMAP_HIGH_KEY_ADDR(block, index) \
+       ((struct xfs_rmap_key *) \
+               ((char *)(block) + XFS_RMAP_BLOCK_LEN + \
+                sizeof(struct xfs_rmap_key) + \
+                ((index) - 1) * 2 * sizeof(struct xfs_rmap_key)))
+
+#define XFS_RMAP_PTR_ADDR(block, index, maxrecs) \
+       ((xfs_rmap_ptr_t *) \
+               ((char *)(block) + XFS_RMAP_BLOCK_LEN + \
+                (maxrecs) * 2 * sizeof(struct xfs_rmap_key) + \
+                ((index) - 1) * sizeof(xfs_rmap_ptr_t)))
+
+struct xfs_btree_cur *xfs_rmapbt_init_cursor(struct xfs_mount *mp,
+                               struct xfs_trans *tp, struct xfs_buf *bp,
+                               xfs_agnumber_t agno);
+int xfs_rmapbt_maxrecs(struct xfs_mount *mp, int blocklen, int leaf);
+extern void xfs_rmapbt_compute_maxlevels(struct xfs_mount *mp);
+
+#endif /* __XFS_RMAP_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c

index 12ca86778e023e4261998660f39b482927a16c02..0e3d4f5ec33c6f945b30b5ad47c9f39c46139ac0 100644 (file)
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -24,6 +24,7 @@
  #include "xfs_bit.h"
  #include "xfs_sb.h"
  #include "xfs_mount.h"
+#include "xfs_defer.h"
  #include "xfs_inode.h"
  #include "xfs_ialloc.h"
  #include "xfs_alloc.h"
@@ -36,6 +37,7 @@
  #include "xfs_alloc_btree.h"
  #include "xfs_ialloc_btree.h"
  #include "xfs_log.h"
+#include "xfs_rmap_btree.h"
  
  /*
   * Physical superblock buffer manipulations. Shared with libxfs in userspace.
@@ -729,6 +731,11 @@ xfs_sb_mount_common(
         mp->m_bmap_dmnr[0] = mp->m_bmap_dmxr[0] / 2;
         mp->m_bmap_dmnr[1] = mp->m_bmap_dmxr[1] / 2;
  
+       mp->m_rmap_mxr[0] = xfs_rmapbt_maxrecs(mp, sbp->sb_blocksize, 1);
+       mp->m_rmap_mxr[1] = xfs_rmapbt_maxrecs(mp, sbp->sb_blocksize, 0);
+       mp->m_rmap_mnr[0] = mp->m_rmap_mxr[0] / 2;
+       mp->m_rmap_mnr[1] = mp->m_rmap_mxr[1] / 2;
+
         mp->m_bsize = XFS_FSB_TO_BB(mp, 1);
         mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK,
                                         sbp->sb_inopblock);
@@ -738,6 +745,8 @@ xfs_sb_mount_common(
                 mp->m_ialloc_min_blks = sbp->sb_spino_align;
         else
                 mp->m_ialloc_min_blks = mp->m_ialloc_blks;
+       mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
+       mp->m_ag_max_usable = xfs_alloc_ag_max_usable(mp);
  }
  
  /*
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h

index 16002b5ec4eb82c2988fc6f559f0e3ed995ec1e9..0c5b30bd884cdce801780290935a5deb0c7d9de2 100644 (file)
--- a/fs/xfs/libxfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
@@ -38,6 +38,7 @@ extern const struct xfs_buf_ops xfs_agi_buf_ops;
  extern const struct xfs_buf_ops xfs_agf_buf_ops;
  extern const struct xfs_buf_ops xfs_agfl_buf_ops;
  extern const struct xfs_buf_ops xfs_allocbt_buf_ops;
+extern const struct xfs_buf_ops xfs_rmapbt_buf_ops;
  extern const struct xfs_buf_ops xfs_attr3_leaf_buf_ops;
  extern const struct xfs_buf_ops xfs_attr3_rmt_buf_ops;
  extern const struct xfs_buf_ops xfs_bmbt_buf_ops;
@@ -116,6 +117,7 @@ int xfs_log_calc_minimum_size(struct xfs_mount *);
  #define        XFS_INO_BTREE_REF       3
  #define        XFS_ALLOC_BTREE_REF     2
  #define        XFS_BMAP_BTREE_REF      2
+#define        XFS_RMAP_BTREE_REF      2
  #define        XFS_DIR_BTREE_REF       2
  #define        XFS_INO_REF             2
  #define        XFS_ATTR_BTREE_REF      1
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c

index 68cb1e7bf2bb1d38e34398ab6dff35be5bd81c18..301ef2f4dbd6258f8981aa2d91bed524a8ce9110 100644 (file)
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -63,6 +63,30 @@ xfs_calc_buf_res(
         return nbufs * (size + xfs_buf_log_overhead());
  }
  
+/*
+ * Per-extent log reservation for the btree changes involved in freeing or
+ * allocating an extent.  In classic XFS there were two trees that will be
+ * modified (bnobt + cntbt).  With rmap enabled, there are three trees
+ * (rmapbt).  The number of blocks reserved is based on the formula:
+ *
+ * num trees * ((2 blocks/level * max depth) - 1)
+ *
+ * Keep in mind that max depth is calculated separately for each type of tree.
+ */
+static uint
+xfs_allocfree_log_count(
+       struct xfs_mount *mp,
+       uint            num_ops)
+{
+       uint            blocks;
+
+       blocks = num_ops * 2 * (2 * mp->m_ag_maxlevels - 1);
+       if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+               blocks += num_ops * (2 * mp->m_rmap_maxlevels - 1);
+
+       return blocks;
+}
+
  /*
   * Logging inodes is really tricksy. They are logged in memory format,
   * which means that what we write into the log doesn't directly translate into
@@ -126,7 +150,7 @@ xfs_calc_inode_res(
   */
  STATIC uint
  xfs_calc_finobt_res(
-       struct xfs_mount        *mp,
+       struct xfs_mount        *mp,
         int                     alloc,
         int                     modify)
  {
@@ -137,7 +161,7 @@ xfs_calc_finobt_res(
  
         res = xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1));
         if (alloc)
-               res += xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), 
+               res += xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
                                         XFS_FSB_TO_B(mp, 1));
         if (modify)
                 res += (uint)XFS_FSB_TO_B(mp, 1);
@@ -153,9 +177,9 @@ xfs_calc_finobt_res(
   * item logged to try to account for the overhead of the transaction mechanism.
   *
   * Note:  Most of the reservations underestimate the number of allocation
- * groups into which they could free extents in the xfs_bmap_finish() call.
+ * groups into which they could free extents in the xfs_defer_finish() call.
   * This is because the number in the worst case is quite high and quite
- * unusual.  In order to fix this we need to change xfs_bmap_finish() to free
+ * unusual.  In order to fix this we need to change xfs_defer_finish() to free
   * extents in only a single AG at a time.  This will require changes to the
   * EFI code as well, however, so that the EFI for the extents not freed is
   * logged again in each transaction.  See SGI PV #261917.
@@ -188,10 +212,10 @@ xfs_calc_write_reservation(
                      xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
                                       XFS_FSB_TO_B(mp, 1)) +
                      xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
-                    xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
+                    xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2),
                                       XFS_FSB_TO_B(mp, 1))),
                     (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
-                    xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
+                    xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2),
                                       XFS_FSB_TO_B(mp, 1))));
  }
  
@@ -217,10 +241,10 @@ xfs_calc_itruncate_reservation(
                      xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1,
                                       XFS_FSB_TO_B(mp, 1))),
                     (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
-                    xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4),
+                    xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4),
                                       XFS_FSB_TO_B(mp, 1)) +
                     xfs_calc_buf_res(5, 0) +
-                   xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+                   xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
                                      XFS_FSB_TO_B(mp, 1)) +
                     xfs_calc_buf_res(2 + mp->m_ialloc_blks +
                                      mp->m_in_maxlevels, 0)));
@@ -247,7 +271,7 @@ xfs_calc_rename_reservation(
                      xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp),
                                       XFS_FSB_TO_B(mp, 1))),
                     (xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) +
-                    xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 3),
+                    xfs_calc_buf_res(xfs_allocfree_log_count(mp, 3),
                                       XFS_FSB_TO_B(mp, 1))));
  }
  
@@ -286,7 +310,7 @@ xfs_calc_link_reservation(
                      xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
                                       XFS_FSB_TO_B(mp, 1))),
                     (xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
-                    xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+                    xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
                                       XFS_FSB_TO_B(mp, 1))));
  }
  
@@ -324,7 +348,7 @@ xfs_calc_remove_reservation(
                      xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
                                       XFS_FSB_TO_B(mp, 1))),
                     (xfs_calc_buf_res(4, mp->m_sb.sb_sectsize) +
-                    xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
+                    xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2),
                                       XFS_FSB_TO_B(mp, 1))));
  }
  
@@ -371,7 +395,7 @@ xfs_calc_create_resv_alloc(
                 mp->m_sb.sb_sectsize +
                 xfs_calc_buf_res(mp->m_ialloc_blks, XFS_FSB_TO_B(mp, 1)) +
                 xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
-               xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+               xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
                                  XFS_FSB_TO_B(mp, 1));
  }
  
@@ -399,7 +423,7 @@ xfs_calc_icreate_resv_alloc(
         return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
                 mp->m_sb.sb_sectsize +
                 xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
-               xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+               xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
                                  XFS_FSB_TO_B(mp, 1)) +
                 xfs_calc_finobt_res(mp, 0, 0);
  }
@@ -483,7 +507,7 @@ xfs_calc_ifree_reservation(
                 xfs_calc_buf_res(1, 0) +
                 xfs_calc_buf_res(2 + mp->m_ialloc_blks +
                                  mp->m_in_maxlevels, 0) +
-               xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+               xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
                                  XFS_FSB_TO_B(mp, 1)) +
                 xfs_calc_finobt_res(mp, 0, 1);
  }
@@ -513,7 +537,7 @@ xfs_calc_growdata_reservation(
         struct xfs_mount        *mp)
  {
         return xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
-               xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+               xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
                                  XFS_FSB_TO_B(mp, 1));
  }
  
@@ -535,7 +559,7 @@ xfs_calc_growrtalloc_reservation(
                 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
                                  XFS_FSB_TO_B(mp, 1)) +
                 xfs_calc_inode_res(mp, 1) +
-               xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+               xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
                                  XFS_FSB_TO_B(mp, 1));
  }
  
@@ -611,7 +635,7 @@ xfs_calc_addafork_reservation(
                 xfs_calc_buf_res(1, mp->m_dir_geo->blksize) +
                 xfs_calc_buf_res(XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1,
                                  XFS_FSB_TO_B(mp, 1)) +
-               xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+               xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
                                  XFS_FSB_TO_B(mp, 1));
  }
  
@@ -634,7 +658,7 @@ xfs_calc_attrinval_reservation(
                     xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK),
                                      XFS_FSB_TO_B(mp, 1))),
                    (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
-                   xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4),
+                   xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4),
                                      XFS_FSB_TO_B(mp, 1))));
  }
  
@@ -701,7 +725,7 @@ xfs_calc_attrrm_reservation(
                                         XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
                      xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), 0)),
                     (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
-                    xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
+                    xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2),
                                       XFS_FSB_TO_B(mp, 1))));
  }
  
diff --git a/fs/xfs/libxfs/xfs_trans_resv.h b/fs/xfs/libxfs/xfs_trans_resv.h

index 797815012c0e31fe711132b3c65aecd5591f7e38..0eb46ed6d404da7d3076e8338f56289ae7f83151 100644 (file)
--- a/fs/xfs/libxfs/xfs_trans_resv.h
+++ b/fs/xfs/libxfs/xfs_trans_resv.h
@@ -67,16 +67,6 @@ struct xfs_trans_resv {
  /* shorthand way of accessing reservation structure */
  #define M_RES(mp)      (&(mp)->m_resv)
  
-/*
- * Per-extent log reservation for the allocation btree changes
- * involved in freeing or allocating an extent.
- * 2 trees * (2 blocks/level * max depth - 1) * block size
- */
-#define        XFS_ALLOCFREE_LOG_RES(mp,nx) \
-       ((nx) * (2 * XFS_FSB_TO_B((mp), 2 * (mp)->m_ag_maxlevels - 1)))
-#define        XFS_ALLOCFREE_LOG_COUNT(mp,nx) \
-       ((nx) * (2 * (2 * (mp)->m_ag_maxlevels - 1)))
-
  /*
   * Per-directory log reservation for any directory change.
   * dir blocks: (1 btree block per level + data block + free block) * dblock size
diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h

index b79dc66b2ecd4afb89f924cb046ac5b37f1ff8d8..3d503647f26b6924ecbe9b702d076bf29220830a 100644 (file)
--- a/fs/xfs/libxfs/xfs_types.h
+++ b/fs/xfs/libxfs/xfs_types.h
@@ -108,8 +108,8 @@ typedef enum {
  } xfs_lookup_t;
  
  typedef enum {
-       XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_BMAPi, XFS_BTNUM_INOi,
-       XFS_BTNUM_FINOi, XFS_BTNUM_MAX
+       XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_RMAPi, XFS_BTNUM_BMAPi,
+       XFS_BTNUM_INOi, XFS_BTNUM_FINOi, XFS_BTNUM_MAX
  } xfs_btnum_t;
  
  struct xfs_name {
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c

index cd4a850564f2a7d7f48d75c4c6b59f0f2994932e..4ece4f2ffc7271ef7249b4e410f5e48391aa0383 100644 (file)
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -25,6 +25,7 @@
  #include "xfs_bit.h"
  #include "xfs_mount.h"
  #include "xfs_da_format.h"
+#include "xfs_defer.h"
  #include "xfs_inode.h"
  #include "xfs_btree.h"
  #include "xfs_trans.h"
@@ -40,6 +41,7 @@
  #include "xfs_trace.h"
  #include "xfs_icache.h"
  #include "xfs_log.h"
+#include "xfs_rmap_btree.h"
  
  /* Kernel only BMAP related definitions and functions */
  
@@ -79,95 +81,6 @@ xfs_zero_extent(
                 GFP_NOFS, true);
  }
  
-/* Sort bmap items by AG. */
-static int
-xfs_bmap_free_list_cmp(
-       void                    *priv,
-       struct list_head        *a,
-       struct list_head        *b)
-{
-       struct xfs_mount        *mp = priv;
-       struct xfs_bmap_free_item       *ra;
-       struct xfs_bmap_free_item       *rb;
-
-       ra = container_of(a, struct xfs_bmap_free_item, xbfi_list);
-       rb = container_of(b, struct xfs_bmap_free_item, xbfi_list);
-       return  XFS_FSB_TO_AGNO(mp, ra->xbfi_startblock) -
-               XFS_FSB_TO_AGNO(mp, rb->xbfi_startblock);
-}
-
-/*
- * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi
- * caller.  Frees all the extents that need freeing, which must be done
- * last due to locking considerations.  We never free any extents in
- * the first transaction.
- *
- * If an inode *ip is provided, rejoin it to the transaction if
- * the transaction was committed.
- */
-int                                            /* error */
-xfs_bmap_finish(
-       struct xfs_trans                **tp,   /* transaction pointer addr */
-       struct xfs_bmap_free            *flist, /* i/o: list extents to free */
-       struct xfs_inode                *ip)
-{
-       struct xfs_efd_log_item         *efd;   /* extent free data */
-       struct xfs_efi_log_item         *efi;   /* extent free intention */
-       int                             error;  /* error return value */
-       int                             committed;/* xact committed or not */
-       struct xfs_bmap_free_item       *free;  /* free extent item */
-
-       ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
-       if (flist->xbf_count == 0)
-               return 0;
-
-       list_sort((*tp)->t_mountp, &flist->xbf_flist, xfs_bmap_free_list_cmp);
-
-       efi = xfs_trans_get_efi(*tp, flist->xbf_count);
-       list_for_each_entry(free, &flist->xbf_flist, xbfi_list)
-               xfs_trans_log_efi_extent(*tp, efi, free->xbfi_startblock,
-                       free->xbfi_blockcount);
-
-       error = __xfs_trans_roll(tp, ip, &committed);
-       if (error) {
-               /*
-                * If the transaction was committed, drop the EFD reference
-                * since we're bailing out of here. The other reference is
-                * dropped when the EFI hits the AIL.
-                *
-                * If the transaction was not committed, the EFI is freed by the
-                * EFI item unlock handler on abort. Also, we have a new
-                * transaction so we should return committed=1 even though we're
-                * returning an error.
-                */
-               if (committed) {
-                       xfs_efi_release(efi);
-                       xfs_force_shutdown((*tp)->t_mountp,
-                                          SHUTDOWN_META_IO_ERROR);
-               }
-               return error;
-       }
-
-       /*
-        * Get an EFD and free each extent in the list, logging to the EFD in
-        * the process. The remaining bmap free list is cleaned up by the caller
-        * on error.
-        */
-       efd = xfs_trans_get_efd(*tp, efi, flist->xbf_count);
-       while (!list_empty(&flist->xbf_flist)) {
-               free = list_first_entry(&flist->xbf_flist,
-                               struct xfs_bmap_free_item, xbfi_list);
-               error = xfs_trans_free_extent(*tp, efd, free->xbfi_startblock,
-                                             free->xbfi_blockcount);
-               if (error)
-                       return error;
-
-               xfs_bmap_del_free(flist, free);
-       }
-
-       return 0;
-}
-
  int
  xfs_bmap_rtalloc(
         struct xfs_bmalloca     *ap)    /* bmap alloc argument struct */
@@ -214,9 +127,9 @@ xfs_bmap_rtalloc(
         /*
          * Lock out modifications to both the RT bitmap and summary inodes
          */
-       xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
+       xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP);
         xfs_trans_ijoin(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL);
-       xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL);
+       xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM);
         xfs_trans_ijoin(ap->tp, mp->m_rsumip, XFS_ILOCK_EXCL);
  
         /*
@@ -773,7 +686,7 @@ xfs_bmap_punch_delalloc_range(
                 xfs_bmbt_irec_t imap;
                 int             nimaps = 1;
                 xfs_fsblock_t   firstblock;
-               xfs_bmap_free_t flist;
+               struct xfs_defer_ops dfops;
  
                 /*
                  * Map the range first and check that it is a delalloc extent
@@ -804,18 +717,18 @@ xfs_bmap_punch_delalloc_range(
                 WARN_ON(imap.br_blockcount == 0);
  
                 /*
-                * Note: while we initialise the firstblock/flist pair, they
+                * Note: while we initialise the firstblock/dfops pair, they
                  * should never be used because blocks should never be
                  * allocated or freed for a delalloc extent and hence we need
                  * don't cancel or finish them after the xfs_bunmapi() call.
                  */
-               xfs_bmap_init(&flist, &firstblock);
+               xfs_defer_init(&dfops, &firstblock);
                 error = xfs_bunmapi(NULL, ip, start_fsb, 1, 0, 1, &firstblock,
-                                       &flist, &done);
+                                       &dfops, &done);
                 if (error)
                         break;
  
-               ASSERT(!flist.xbf_count && list_empty(&flist.xbf_flist));
+               ASSERT(!xfs_defer_has_unfinished_work(&dfops));
  next_block:
                 start_fsb++;
                 remaining--;
@@ -972,7 +885,7 @@ xfs_alloc_file_space(
         int                     rt;
         xfs_trans_t             *tp;
         xfs_bmbt_irec_t         imaps[1], *imapp;
-       xfs_bmap_free_t         free_list;
+       struct xfs_defer_ops    dfops;
         uint                    qblocks, resblks, resrtextents;
         int                     error;
  
@@ -1063,17 +976,17 @@ xfs_alloc_file_space(
  
                 xfs_trans_ijoin(tp, ip, 0);
  
-               xfs_bmap_init(&free_list, &firstfsb);
+               xfs_defer_init(&dfops, &firstfsb);
                 error = xfs_bmapi_write(tp, ip, startoffset_fsb,
                                         allocatesize_fsb, alloc_type, &firstfsb,
-                                       resblks, imapp, &nimaps, &free_list);
+                                       resblks, imapp, &nimaps, &dfops);
                 if (error)
                         goto error0;
  
                 /*
                  * Complete the transaction
                  */
-               error = xfs_bmap_finish(&tp, &free_list, NULL);
+               error = xfs_defer_finish(&tp, &dfops, NULL);
                 if (error)
                         goto error0;
  
@@ -1096,7 +1009,7 @@ xfs_alloc_file_space(
         return error;
  
  error0:        /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
-       xfs_bmap_cancel(&free_list);
+       xfs_defer_cancel(&dfops);
         xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag);
  
  error1:        /* Just cancel transaction */
@@ -1114,7 +1027,7 @@ xfs_unmap_extent(
  {
         struct xfs_mount        *mp = ip->i_mount;
         struct xfs_trans        *tp;
-       struct xfs_bmap_free    free_list;
+       struct xfs_defer_ops    dfops;
         xfs_fsblock_t           firstfsb;
         uint                    resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
         int                     error;
@@ -1133,13 +1046,13 @@ xfs_unmap_extent(
  
         xfs_trans_ijoin(tp, ip, 0);
  
-       xfs_bmap_init(&free_list, &firstfsb);
+       xfs_defer_init(&dfops, &firstfsb);
         error = xfs_bunmapi(tp, ip, startoffset_fsb, len_fsb, 0, 2, &firstfsb,
-                       &free_list, done);
+                       &dfops, done);
         if (error)
                 goto out_bmap_cancel;
  
-       error = xfs_bmap_finish(&tp, &free_list, NULL);
+       error = xfs_defer_finish(&tp, &dfops, ip);
         if (error)
                 goto out_bmap_cancel;
  
@@ -1149,7 +1062,7 @@ out_unlock:
         return error;
  
  out_bmap_cancel:
-       xfs_bmap_cancel(&free_list);
+       xfs_defer_cancel(&dfops);
  out_trans_cancel:
         xfs_trans_cancel(tp);
         goto out_unlock;
@@ -1338,7 +1251,7 @@ xfs_shift_file_space(
         struct xfs_mount        *mp = ip->i_mount;
         struct xfs_trans        *tp;
         int                     error;
-       struct xfs_bmap_free    free_list;
+       struct xfs_defer_ops    dfops;
         xfs_fsblock_t           first_block;
         xfs_fileoff_t           stop_fsb;
         xfs_fileoff_t           next_fsb;
@@ -1416,19 +1329,19 @@ xfs_shift_file_space(
  
                 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
  
-               xfs_bmap_init(&free_list, &first_block);
+               xfs_defer_init(&dfops, &first_block);
  
                 /*
                  * We are using the write transaction in which max 2 bmbt
                  * updates are allowed
                  */
                 error = xfs_bmap_shift_extents(tp, ip, &next_fsb, shift_fsb,
-                               &done, stop_fsb, &first_block, &free_list,
+                               &done, stop_fsb, &first_block, &dfops,
                                 direction, XFS_BMAP_MAX_SHIFT_EXTENTS);
                 if (error)
                         goto out_bmap_cancel;
  
-               error = xfs_bmap_finish(&tp, &free_list, NULL);
+               error = xfs_defer_finish(&tp, &dfops, NULL);
                 if (error)
                         goto out_bmap_cancel;
  
@@ -1438,7 +1351,7 @@ xfs_shift_file_space(
         return error;
  
  out_bmap_cancel:
-       xfs_bmap_cancel(&free_list);
+       xfs_defer_cancel(&dfops);
  out_trans_cancel:
         xfs_trans_cancel(tp);
         return error;
@@ -1622,6 +1535,10 @@ xfs_swap_extents(
         __uint64_t      tmp;
         int             lock_flags;
  
+       /* XXX: we can't do this with rmap, will fix later */
+       if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+               return -EOPNOTSUPP;
+
         tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL);
         if (!tempifp) {
                 error = -ENOMEM;
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h

index f20071432ca6222e0d103ac8d4a54380e90565e0..68a621a8e0c0700adbd6369e3a0a588c43ebf114 100644 (file)
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -21,7 +21,7 @@
  /* Kernel only BMAP related definitions and functions */
  
  struct xfs_bmbt_irec;
-struct xfs_bmap_free_item;
+struct xfs_extent_free_item;
  struct xfs_ifork;
  struct xfs_inode;
  struct xfs_mount;
@@ -40,8 +40,6 @@ int   xfs_getbmap(struct xfs_inode *ip, struct getbmapx *bmv,
                 xfs_bmap_format_t formatter, void *arg);
  
  /* functions in xfs_bmap.c that are only needed by xfs_bmap_util.c */
-void   xfs_bmap_del_free(struct xfs_bmap_free *flist,
-                         struct xfs_bmap_free_item *free);
  int    xfs_bmap_extsize_align(struct xfs_mount *mp, struct xfs_bmbt_irec *gotp,
                                struct xfs_bmbt_irec *prevp, xfs_extlen_t extsz,
                                int rt, int eof, int delay, int convert,
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c

index 272c3f8b6f7d0f11a0564e40b00b0568a28683dd..4ff499aa7338f6b7ea098955710938de67791dd4 100644 (file)
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -179,7 +179,7 @@ xfs_ioc_trim(
          * matter as trimming blocks is an advisory interface.
          */
         if (range.start >= XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks) ||
-           range.minlen > XFS_FSB_TO_B(mp, XFS_ALLOC_AG_MAX_USABLE(mp)) ||
+           range.minlen > XFS_FSB_TO_B(mp, mp->m_ag_max_usable) ||
             range.len < mp->m_sb.sb_blocksize)
                 return -EINVAL;
  
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c

index ccb0811963b27d2b112b23df77337ab9b2c54c2c..7a30b8f11db7a26f8a82ded531e8a5170ea03ad5 100644 (file)
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -23,6 +23,7 @@
  #include "xfs_trans_resv.h"
  #include "xfs_bit.h"
  #include "xfs_mount.h"
+#include "xfs_defer.h"
  #include "xfs_inode.h"
  #include "xfs_bmap.h"
  #include "xfs_bmap_util.h"
@@ -307,7 +308,7 @@ xfs_qm_dqalloc(
         xfs_buf_t       **O_bpp)
  {
         xfs_fsblock_t   firstblock;
-       xfs_bmap_free_t flist;
+       struct xfs_defer_ops dfops;
         xfs_bmbt_irec_t map;
         int             nmaps, error;
         xfs_buf_t       *bp;
@@ -320,7 +321,7 @@ xfs_qm_dqalloc(
         /*
          * Initialize the bmap freelist prior to calling bmapi code.
          */
-       xfs_bmap_init(&flist, &firstblock);
+       xfs_defer_init(&dfops, &firstblock);
         xfs_ilock(quotip, XFS_ILOCK_EXCL);
         /*
          * Return if this type of quotas is turned off while we didn't
@@ -336,7 +337,7 @@ xfs_qm_dqalloc(
         error = xfs_bmapi_write(tp, quotip, offset_fsb,
                                 XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA,
                                 &firstblock, XFS_QM_DQALLOC_SPACE_RES(mp),
-                               &map, &nmaps, &flist);
+                               &map, &nmaps, &dfops);
         if (error)
                 goto error0;
         ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB);
@@ -368,7 +369,7 @@ xfs_qm_dqalloc(
                               dqp->dq_flags & XFS_DQ_ALLTYPES, bp);
  
         /*
-        * xfs_bmap_finish() may commit the current transaction and
+        * xfs_defer_finish() may commit the current transaction and
          * start a second transaction if the freelist is not empty.
          *
          * Since we still want to modify this buffer, we need to
@@ -382,7 +383,7 @@ xfs_qm_dqalloc(
  
         xfs_trans_bhold(tp, bp);
  
-       error = xfs_bmap_finish(tpp, &flist, NULL);
+       error = xfs_defer_finish(tpp, &dfops, NULL);
         if (error)
                 goto error1;
  
@@ -398,7 +399,7 @@ xfs_qm_dqalloc(
         return 0;
  
  error1:
-       xfs_bmap_cancel(&flist);
+       xfs_defer_cancel(&dfops);
  error0:
         xfs_iunlock(quotip, XFS_ILOCK_EXCL);
  
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h

index 2e4f67f688560b39b9a58aa54a4168607f495918..3d224702fbc0c4f6469d1a475ffbbc70beadaead 100644 (file)
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -90,7 +90,9 @@ extern void xfs_verifier_error(struct xfs_buf *bp);
  #define XFS_ERRTAG_STRATCMPL_IOERR                     19
  #define XFS_ERRTAG_DIOWRITE_IOERR                      20
  #define XFS_ERRTAG_BMAPIFORMAT                         21
-#define XFS_ERRTAG_MAX                                 22
+#define XFS_ERRTAG_FREE_EXTENT                         22
+#define XFS_ERRTAG_RMAP_FINISH_ONE                     23
+#define XFS_ERRTAG_MAX                                 24
  
  /*
   * Random factors for above tags, 1 means always, 2 means 1/2 time, etc.
@@ -117,6 +119,8 @@ extern void xfs_verifier_error(struct xfs_buf *bp);
  #define XFS_RANDOM_STRATCMPL_IOERR                     (XFS_RANDOM_DEFAULT/10)
  #define XFS_RANDOM_DIOWRITE_IOERR                      (XFS_RANDOM_DEFAULT/10)
  #define        XFS_RANDOM_BMAPIFORMAT                          XFS_RANDOM_DEFAULT
+#define XFS_RANDOM_FREE_EXTENT                         1
+#define XFS_RANDOM_RMAP_FINISH_ONE                     1
  
  #ifdef DEBUG
  extern int xfs_error_test_active;
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c

index ab779460ecbf3ab8d4c89fecdde1b4a48e66b746..d7bc14906af87f14ef570a968ead42100445c8bd 100644 (file)
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -20,12 +20,15 @@
  #include "xfs_format.h"
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
+#include "xfs_bit.h"
  #include "xfs_mount.h"
  #include "xfs_trans.h"
  #include "xfs_trans_priv.h"
  #include "xfs_buf_item.h"
  #include "xfs_extfree_item.h"
  #include "xfs_log.h"
+#include "xfs_btree.h"
+#include "xfs_rmap.h"
  
  
  kmem_zone_t    *xfs_efi_zone;
@@ -486,3 +489,69 @@ xfs_efd_init(
  
         return efdp;
  }
+
+/*
+ * Process an extent free intent item that was recovered from
+ * the log.  We need to free the extents that it describes.
+ */
+int
+xfs_efi_recover(
+       struct xfs_mount        *mp,
+       struct xfs_efi_log_item *efip)
+{
+       struct xfs_efd_log_item *efdp;
+       struct xfs_trans        *tp;
+       int                     i;
+       int                     error = 0;
+       xfs_extent_t            *extp;
+       xfs_fsblock_t           startblock_fsb;
+       struct xfs_owner_info   oinfo;
+
+       ASSERT(!test_bit(XFS_EFI_RECOVERED, &efip->efi_flags));
+
+       /*
+        * First check the validity of the extents described by the
+        * EFI.  If any are bad, then assume that all are bad and
+        * just toss the EFI.
+        */
+       for (i = 0; i < efip->efi_format.efi_nextents; i++) {
+               extp = &efip->efi_format.efi_extents[i];
+               startblock_fsb = XFS_BB_TO_FSB(mp,
+                                  XFS_FSB_TO_DADDR(mp, extp->ext_start));
+               if (startblock_fsb == 0 ||
+                   extp->ext_len == 0 ||
+                   startblock_fsb >= mp->m_sb.sb_dblocks ||
+                   extp->ext_len >= mp->m_sb.sb_agblocks) {
+                       /*
+                        * This will pull the EFI from the AIL and
+                        * free the memory associated with it.
+                        */
+                       set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
+                       xfs_efi_release(efip);
+                       return -EIO;
+               }
+       }
+
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
+       if (error)
+               return error;
+       efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents);
+
+       xfs_rmap_skip_owner_update(&oinfo);
+       for (i = 0; i < efip->efi_format.efi_nextents; i++) {
+               extp = &efip->efi_format.efi_extents[i];
+               error = xfs_trans_free_extent(tp, efdp, extp->ext_start,
+                                             extp->ext_len, &oinfo);
+               if (error)
+                       goto abort_error;
+
+       }
+
+       set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
+       error = xfs_trans_commit(tp);
+       return error;
+
+abort_error:
+       xfs_trans_cancel(tp);
+       return error;
+}
diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h

index 8fa8651705e1dc33bb84f1234b411aca9c73ef76..a32c794a86b7b48761aac60fe598a6f8876d63f1 100644 (file)
--- a/fs/xfs/xfs_extfree_item.h
+++ b/fs/xfs/xfs_extfree_item.h
@@ -98,4 +98,7 @@ int                   xfs_efi_copy_format(xfs_log_iovec_t *buf,
  void                   xfs_efi_item_free(xfs_efi_log_item_t *);
  void                   xfs_efi_release(struct xfs_efi_log_item *);
  
+int                    xfs_efi_recover(struct xfs_mount *mp,
+                                       struct xfs_efi_log_item *efip);
+
  #endif /* __XFS_EXTFREE_ITEM_H__ */
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c

index a51353a1f87f1a5e78064c0598f42397ece8f767..4a33a3304369109f2864bf97362962865f56c930 100644 (file)
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -22,6 +22,7 @@
  #include "xfs_trans_resv.h"
  #include "xfs_sb.h"
  #include "xfs_mount.h"
+#include "xfs_defer.h"
  #include "xfs_inode.h"
  #include "xfs_bmap.h"
  #include "xfs_bmap_util.h"
@@ -385,7 +386,7 @@ xfs_filestream_new_ag(
         }
  
         flags = (ap->userdata ? XFS_PICK_USERDATA : 0) |
-               (ap->flist->xbf_low ? XFS_PICK_LOWSPACE : 0);
+               (ap->dfops->dop_low ? XFS_PICK_LOWSPACE : 0);
  
         err = xfs_filestream_pick_ag(pip, startag, agp, flags, minlen);
  
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c

index 7191c3878b4a774e26be7a0d6a40a750edfa6538..0f96847b90e1175d2c6d0f497fe278e67f95e141 100644 (file)
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -23,6 +23,7 @@
  #include "xfs_trans_resv.h"
  #include "xfs_sb.h"
  #include "xfs_mount.h"
+#include "xfs_defer.h"
  #include "xfs_da_format.h"
  #include "xfs_da_btree.h"
  #include "xfs_inode.h"
@@ -32,6 +33,7 @@
  #include "xfs_btree.h"
  #include "xfs_alloc_btree.h"
  #include "xfs_alloc.h"
+#include "xfs_rmap_btree.h"
  #include "xfs_ialloc.h"
  #include "xfs_fsops.h"
  #include "xfs_itable.h"
@@ -40,6 +42,7 @@
  #include "xfs_trace.h"
  #include "xfs_log.h"
  #include "xfs_filestream.h"
+#include "xfs_rmap.h"
  
  /*
   * File system operations
@@ -103,7 +106,9 @@ xfs_fs_geometry(
                         (xfs_sb_version_hasfinobt(&mp->m_sb) ?
                                 XFS_FSOP_GEOM_FLAGS_FINOBT : 0) |
                         (xfs_sb_version_hassparseinodes(&mp->m_sb) ?
-                               XFS_FSOP_GEOM_FLAGS_SPINODES : 0);
+                               XFS_FSOP_GEOM_FLAGS_SPINODES : 0) |
+                       (xfs_sb_version_hasrmapbt(&mp->m_sb) ?
+                               XFS_FSOP_GEOM_FLAGS_RMAPBT : 0);
                 geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ?
                                 mp->m_sb.sb_logsectsize : BBSIZE;
                 geo->rtsectsize = mp->m_sb.sb_blocksize;
@@ -239,10 +244,16 @@ xfs_growfs_data_private(
                 agf->agf_roots[XFS_BTNUM_CNTi] = cpu_to_be32(XFS_CNT_BLOCK(mp));
                 agf->agf_levels[XFS_BTNUM_BNOi] = cpu_to_be32(1);
                 agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(1);
+               if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
+                       agf->agf_roots[XFS_BTNUM_RMAPi] =
+                                               cpu_to_be32(XFS_RMAP_BLOCK(mp));
+                       agf->agf_levels[XFS_BTNUM_RMAPi] = cpu_to_be32(1);
+               }
+
                 agf->agf_flfirst = cpu_to_be32(1);
                 agf->agf_fllast = 0;
                 agf->agf_flcount = 0;
-               tmpsize = agsize - XFS_PREALLOC_BLOCKS(mp);
+               tmpsize = agsize - mp->m_ag_prealloc_blocks;
                 agf->agf_freeblks = cpu_to_be32(tmpsize);
                 agf->agf_longest = cpu_to_be32(tmpsize);
                 if (xfs_sb_version_hascrc(&mp->m_sb))
@@ -339,7 +350,7 @@ xfs_growfs_data_private(
                                                 agno, 0);
  
                 arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
-               arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp));
+               arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks);
                 arec->ar_blockcount = cpu_to_be32(
                         agsize - be32_to_cpu(arec->ar_startblock));
  
@@ -368,7 +379,7 @@ xfs_growfs_data_private(
                                                 agno, 0);
  
                 arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
-               arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp));
+               arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks);
                 arec->ar_blockcount = cpu_to_be32(
                         agsize - be32_to_cpu(arec->ar_startblock));
                 nfree += be32_to_cpu(arec->ar_blockcount);
@@ -378,6 +389,72 @@ xfs_growfs_data_private(
                 if (error)
                         goto error0;
  
+               /* RMAP btree root block */
+               if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
+                       struct xfs_rmap_rec     *rrec;
+                       struct xfs_btree_block  *block;
+
+                       bp = xfs_growfs_get_hdr_buf(mp,
+                               XFS_AGB_TO_DADDR(mp, agno, XFS_RMAP_BLOCK(mp)),
+                               BTOBB(mp->m_sb.sb_blocksize), 0,
+                               &xfs_rmapbt_buf_ops);
+                       if (!bp) {
+                               error = -ENOMEM;
+                               goto error0;
+                       }
+
+                       xfs_btree_init_block(mp, bp, XFS_RMAP_CRC_MAGIC, 0, 0,
+                                               agno, XFS_BTREE_CRC_BLOCKS);
+                       block = XFS_BUF_TO_BLOCK(bp);
+
+
+                       /*
+                        * mark the AG header regions as static metadata The BNO
+                        * btree block is the first block after the headers, so
+                        * it's location defines the size of region the static
+                        * metadata consumes.
+                        *
+                        * Note: unlike mkfs, we never have to account for log
+                        * space when growing the data regions
+                        */
+                       rrec = XFS_RMAP_REC_ADDR(block, 1);
+                       rrec->rm_startblock = 0;
+                       rrec->rm_blockcount = cpu_to_be32(XFS_BNO_BLOCK(mp));
+                       rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_FS);
+                       rrec->rm_offset = 0;
+                       be16_add_cpu(&block->bb_numrecs, 1);
+
+                       /* account freespace btree root blocks */
+                       rrec = XFS_RMAP_REC_ADDR(block, 2);
+                       rrec->rm_startblock = cpu_to_be32(XFS_BNO_BLOCK(mp));
+                       rrec->rm_blockcount = cpu_to_be32(2);
+                       rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG);
+                       rrec->rm_offset = 0;
+                       be16_add_cpu(&block->bb_numrecs, 1);
+
+                       /* account inode btree root blocks */
+                       rrec = XFS_RMAP_REC_ADDR(block, 3);
+                       rrec->rm_startblock = cpu_to_be32(XFS_IBT_BLOCK(mp));
+                       rrec->rm_blockcount = cpu_to_be32(XFS_RMAP_BLOCK(mp) -
+                                                       XFS_IBT_BLOCK(mp));
+                       rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_INOBT);
+                       rrec->rm_offset = 0;
+                       be16_add_cpu(&block->bb_numrecs, 1);
+
+                       /* account for rmap btree root */
+                       rrec = XFS_RMAP_REC_ADDR(block, 4);
+                       rrec->rm_startblock = cpu_to_be32(XFS_RMAP_BLOCK(mp));
+                       rrec->rm_blockcount = cpu_to_be32(1);
+                       rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG);
+                       rrec->rm_offset = 0;
+                       be16_add_cpu(&block->bb_numrecs, 1);
+
+                       error = xfs_bwrite(bp);
+                       xfs_buf_relse(bp);
+                       if (error)
+                               goto error0;
+               }
+
                 /*
                  * INO btree root block
                  */
@@ -435,6 +512,8 @@ xfs_growfs_data_private(
          * There are new blocks in the old last a.g.
          */
         if (new) {
+               struct xfs_owner_info   oinfo;
+
                 /*
                  * Change the agi length.
                  */
@@ -462,14 +541,20 @@ xfs_growfs_data_private(
                        be32_to_cpu(agi->agi_length));
  
                 xfs_alloc_log_agf(tp, bp, XFS_AGF_LENGTH);
+
                 /*
                  * Free the new space.
+                *
+                * XFS_RMAP_OWN_NULL is used here to tell the rmap btree that
+                * this doesn't actually exist in the rmap btree.
                  */
-               error = xfs_free_extent(tp, XFS_AGB_TO_FSB(mp, agno,
-                       be32_to_cpu(agf->agf_length) - new), new);
-               if (error) {
+               xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_NULL);
+               error = xfs_free_extent(tp,
+                               XFS_AGB_TO_FSB(mp, agno,
+                                       be32_to_cpu(agf->agf_length) - new),
+                               new, &oinfo);
+               if (error)
                         goto error0;
-               }
         }
  
         /*
@@ -501,6 +586,7 @@ xfs_growfs_data_private(
         } else
                 mp->m_maxicount = 0;
         xfs_set_low_space_thresholds(mp);
+       mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
  
         /* update secondary superblocks. */
         for (agno = 1; agno < nagcount; agno++) {
@@ -638,7 +724,7 @@ xfs_fs_counts(
         cnt->allocino = percpu_counter_read_positive(&mp->m_icount);
         cnt->freeino = percpu_counter_read_positive(&mp->m_ifree);
         cnt->freedata = percpu_counter_read_positive(&mp->m_fdblocks) -
-                                                       XFS_ALLOC_SET_ASIDE(mp);
+                                               mp->m_alloc_set_aside;
  
         spin_lock(&mp->m_sb_lock);
         cnt->freertx = mp->m_sb.sb_frextents;
@@ -726,7 +812,7 @@ xfs_reserve_blocks(
         error = -ENOSPC;
         do {
                 free = percpu_counter_sum(&mp->m_fdblocks) -
-                                                       XFS_ALLOC_SET_ASIDE(mp);
+                                               mp->m_alloc_set_aside;
                 if (!free)
                         break;
  
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c

index 8825bcfd314c1d228b83a065fa901ce6a1be7128..e08eaea6327b5c4752264c7a54996b75fd2e2447 100644 (file)
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -25,6 +25,7 @@
  #include "xfs_trans_resv.h"
  #include "xfs_sb.h"
  #include "xfs_mount.h"
+#include "xfs_defer.h"
  #include "xfs_inode.h"
  #include "xfs_da_format.h"
  #include "xfs_da_btree.h"
@@ -1122,7 +1123,7 @@ xfs_create(
         struct xfs_inode        *ip = NULL;
         struct xfs_trans        *tp = NULL;
         int                     error;
-       xfs_bmap_free_t         free_list;
+       struct xfs_defer_ops    dfops;
         xfs_fsblock_t           first_block;
         bool                    unlock_dp_on_error = false;
         prid_t                  prid;
@@ -1182,7 +1183,7 @@ xfs_create(
                       XFS_IOLOCK_PARENT | XFS_ILOCK_PARENT);
         unlock_dp_on_error = true;
  
-       xfs_bmap_init(&free_list, &first_block);
+       xfs_defer_init(&dfops, &first_block);
  
         /*
          * Reserve disk quota and the inode.
@@ -1219,7 +1220,7 @@ xfs_create(
         unlock_dp_on_error = false;
  
         error = xfs_dir_createname(tp, dp, name, ip->i_ino,
-                                       &first_block, &free_list, resblks ?
+                                       &first_block, &dfops, resblks ?
                                         resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
         if (error) {
                 ASSERT(error != -ENOSPC);
@@ -1253,7 +1254,7 @@ xfs_create(
          */
         xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
  
-       error = xfs_bmap_finish(&tp, &free_list, NULL);
+       error = xfs_defer_finish(&tp, &dfops, NULL);
         if (error)
                 goto out_bmap_cancel;
  
@@ -1269,7 +1270,7 @@ xfs_create(
         return 0;
  
   out_bmap_cancel:
-       xfs_bmap_cancel(&free_list);
+       xfs_defer_cancel(&dfops);
   out_trans_cancel:
         xfs_trans_cancel(tp);
   out_release_inode:
@@ -1401,7 +1402,7 @@ xfs_link(
         xfs_mount_t             *mp = tdp->i_mount;
         xfs_trans_t             *tp;
         int                     error;
-       xfs_bmap_free_t         free_list;
+       struct xfs_defer_ops    dfops;
         xfs_fsblock_t           first_block;
         int                     resblks;
  
@@ -1452,7 +1453,7 @@ xfs_link(
                         goto error_return;
         }
  
-       xfs_bmap_init(&free_list, &first_block);
+       xfs_defer_init(&dfops, &first_block);
  
         /*
          * Handle initial link state of O_TMPFILE inode
@@ -1464,7 +1465,7 @@ xfs_link(
         }
  
         error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino,
-                                       &first_block, &free_list, resblks);
+                                       &first_block, &dfops, resblks);
         if (error)
                 goto error_return;
         xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -1482,9 +1483,9 @@ xfs_link(
         if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
                 xfs_trans_set_sync(tp);
  
-       error = xfs_bmap_finish(&tp, &free_list, NULL);
+       error = xfs_defer_finish(&tp, &dfops, NULL);
         if (error) {
-               xfs_bmap_cancel(&free_list);
+               xfs_defer_cancel(&dfops);
                 goto error_return;
         }
  
@@ -1526,7 +1527,7 @@ xfs_itruncate_extents(
  {
         struct xfs_mount        *mp = ip->i_mount;
         struct xfs_trans        *tp = *tpp;
-       xfs_bmap_free_t         free_list;
+       struct xfs_defer_ops    dfops;
         xfs_fsblock_t           first_block;
         xfs_fileoff_t           first_unmap_block;
         xfs_fileoff_t           last_block;
@@ -1562,12 +1563,12 @@ xfs_itruncate_extents(
         ASSERT(first_unmap_block < last_block);
         unmap_len = last_block - first_unmap_block + 1;
         while (!done) {
-               xfs_bmap_init(&free_list, &first_block);
+               xfs_defer_init(&dfops, &first_block);
                 error = xfs_bunmapi(tp, ip,
                                     first_unmap_block, unmap_len,
                                     xfs_bmapi_aflag(whichfork),
                                     XFS_ITRUNC_MAX_EXTENTS,
-                                   &first_block, &free_list,
+                                   &first_block, &dfops,
                                     &done);
                 if (error)
                         goto out_bmap_cancel;
@@ -1576,7 +1577,7 @@ xfs_itruncate_extents(
                  * Duplicate the transaction that has the permanent
                  * reservation and commit the old transaction.
                  */
-               error = xfs_bmap_finish(&tp, &free_list, ip);
+               error = xfs_defer_finish(&tp, &dfops, ip);
                 if (error)
                         goto out_bmap_cancel;
  
@@ -1602,7 +1603,7 @@ out_bmap_cancel:
          * the transaction can be properly aborted.  We just need to make sure
          * we're not holding any resources that we were not when we came in.
          */
-       xfs_bmap_cancel(&free_list);
+       xfs_defer_cancel(&dfops);
         goto out;
  }
  
@@ -1743,7 +1744,7 @@ STATIC int
  xfs_inactive_ifree(
         struct xfs_inode *ip)
  {
-       xfs_bmap_free_t         free_list;
+       struct xfs_defer_ops    dfops;
         xfs_fsblock_t           first_block;
         struct xfs_mount        *mp = ip->i_mount;
         struct xfs_trans        *tp;
@@ -1780,8 +1781,8 @@ xfs_inactive_ifree(
         xfs_ilock(ip, XFS_ILOCK_EXCL);
         xfs_trans_ijoin(tp, ip, 0);
  
-       xfs_bmap_init(&free_list, &first_block);
-       error = xfs_ifree(tp, ip, &free_list);
+       xfs_defer_init(&dfops, &first_block);
+       error = xfs_ifree(tp, ip, &dfops);
         if (error) {
                 /*
                  * If we fail to free the inode, shut down.  The cancel
@@ -1807,11 +1808,11 @@ xfs_inactive_ifree(
          * Just ignore errors at this point.  There is nothing we can do except
          * to try to keep going. Make sure it's not a silent error.
          */
-       error = xfs_bmap_finish(&tp, &free_list, NULL);
+       error = xfs_defer_finish(&tp, &dfops, NULL);
         if (error) {
-               xfs_notice(mp, "%s: xfs_bmap_finish returned error %d",
+               xfs_notice(mp, "%s: xfs_defer_finish returned error %d",
                         __func__, error);
-               xfs_bmap_cancel(&free_list);
+               xfs_defer_cancel(&dfops);
         }
         error = xfs_trans_commit(tp);
         if (error)
@@ -2367,7 +2368,7 @@ int
  xfs_ifree(
         xfs_trans_t     *tp,
         xfs_inode_t     *ip,
-       xfs_bmap_free_t *flist)
+       struct xfs_defer_ops    *dfops)
  {
         int                     error;
         struct xfs_icluster     xic = { 0 };
@@ -2386,7 +2387,7 @@ xfs_ifree(
         if (error)
                 return error;
  
-       error = xfs_difree(tp, ip->i_ino, flist, &xic);
+       error = xfs_difree(tp, ip->i_ino, dfops, &xic);
         if (error)
                 return error;
  
@@ -2474,7 +2475,7 @@ xfs_iunpin_wait(
   * directory entry.
   *
   * This is still safe from a transactional point of view - it is not until we
- * get to xfs_bmap_finish() that we have the possibility of multiple
+ * get to xfs_defer_finish() that we have the possibility of multiple
   * transactions in this operation. Hence as long as we remove the directory
   * entry and drop the link count in the first transaction of the remove
   * operation, there are no transactional constraints on the ordering here.
@@ -2489,7 +2490,7 @@ xfs_remove(
         xfs_trans_t             *tp = NULL;
         int                     is_dir = S_ISDIR(VFS_I(ip)->i_mode);
         int                     error = 0;
-       xfs_bmap_free_t         free_list;
+       struct xfs_defer_ops    dfops;
         xfs_fsblock_t           first_block;
         uint                    resblks;
  
@@ -2571,9 +2572,9 @@ xfs_remove(
         if (error)
                 goto out_trans_cancel;
  
-       xfs_bmap_init(&free_list, &first_block);
+       xfs_defer_init(&dfops, &first_block);
         error = xfs_dir_removename(tp, dp, name, ip->i_ino,
-                                       &first_block, &free_list, resblks);
+                                       &first_block, &dfops, resblks);
         if (error) {
                 ASSERT(error != -ENOENT);
                 goto out_bmap_cancel;
@@ -2587,7 +2588,7 @@ xfs_remove(
         if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
                 xfs_trans_set_sync(tp);
  
-       error = xfs_bmap_finish(&tp, &free_list, NULL);
+       error = xfs_defer_finish(&tp, &dfops, NULL);
         if (error)
                 goto out_bmap_cancel;
  
@@ -2601,7 +2602,7 @@ xfs_remove(
         return 0;
  
   out_bmap_cancel:
-       xfs_bmap_cancel(&free_list);
+       xfs_defer_cancel(&dfops);
   out_trans_cancel:
         xfs_trans_cancel(tp);
   std_return:
@@ -2662,7 +2663,7 @@ xfs_sort_for_rename(
  static int
  xfs_finish_rename(
         struct xfs_trans        *tp,
-       struct xfs_bmap_free    *free_list)
+       struct xfs_defer_ops    *dfops)
  {
         int                     error;
  
@@ -2673,9 +2674,9 @@ xfs_finish_rename(
         if (tp->t_mountp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
                 xfs_trans_set_sync(tp);
  
-       error = xfs_bmap_finish(&tp, free_list, NULL);
+       error = xfs_defer_finish(&tp, dfops, NULL);
         if (error) {
-               xfs_bmap_cancel(free_list);
+               xfs_defer_cancel(dfops);
                 xfs_trans_cancel(tp);
                 return error;
         }
@@ -2697,7 +2698,7 @@ xfs_cross_rename(
         struct xfs_inode        *dp2,
         struct xfs_name         *name2,
         struct xfs_inode        *ip2,
-       struct xfs_bmap_free    *free_list,
+       struct xfs_defer_ops    *dfops,
         xfs_fsblock_t           *first_block,
         int                     spaceres)
  {
@@ -2709,14 +2710,14 @@ xfs_cross_rename(
         /* Swap inode number for dirent in first parent */
         error = xfs_dir_replace(tp, dp1, name1,
                                 ip2->i_ino,
-                               first_block, free_list, spaceres);
+                               first_block, dfops, spaceres);
         if (error)
                 goto out_trans_abort;
  
         /* Swap inode number for dirent in second parent */
         error = xfs_dir_replace(tp, dp2, name2,
                                 ip1->i_ino,
-                               first_block, free_list, spaceres);
+                               first_block, dfops, spaceres);
         if (error)
                 goto out_trans_abort;
  
@@ -2731,7 +2732,7 @@ xfs_cross_rename(
                 if (S_ISDIR(VFS_I(ip2)->i_mode)) {
                         error = xfs_dir_replace(tp, ip2, &xfs_name_dotdot,
                                                 dp1->i_ino, first_block,
-                                               free_list, spaceres);
+                                               dfops, spaceres);
                         if (error)
                                 goto out_trans_abort;
  
@@ -2758,7 +2759,7 @@ xfs_cross_rename(
                 if (S_ISDIR(VFS_I(ip1)->i_mode)) {
                         error = xfs_dir_replace(tp, ip1, &xfs_name_dotdot,
                                                 dp2->i_ino, first_block,
-                                               free_list, spaceres);
+                                               dfops, spaceres);
                         if (error)
                                 goto out_trans_abort;
  
@@ -2797,10 +2798,10 @@ xfs_cross_rename(
         }
         xfs_trans_ichgtime(tp, dp1, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
         xfs_trans_log_inode(tp, dp1, XFS_ILOG_CORE);
-       return xfs_finish_rename(tp, free_list);
+       return xfs_finish_rename(tp, dfops);
  
  out_trans_abort:
-       xfs_bmap_cancel(free_list);
+       xfs_defer_cancel(dfops);
         xfs_trans_cancel(tp);
         return error;
  }
@@ -2855,7 +2856,7 @@ xfs_rename(
  {
         struct xfs_mount        *mp = src_dp->i_mount;
         struct xfs_trans        *tp;
-       struct xfs_bmap_free    free_list;
+       struct xfs_defer_ops    dfops;
         xfs_fsblock_t           first_block;
         struct xfs_inode        *wip = NULL;            /* whiteout inode */
         struct xfs_inode        *inodes[__XFS_SORT_INODES];
@@ -2944,13 +2945,13 @@ xfs_rename(
                 goto out_trans_cancel;
         }
  
-       xfs_bmap_init(&free_list, &first_block);
+       xfs_defer_init(&dfops, &first_block);
  
         /* RENAME_EXCHANGE is unique from here on. */
         if (flags & RENAME_EXCHANGE)
                 return xfs_cross_rename(tp, src_dp, src_name, src_ip,
                                         target_dp, target_name, target_ip,
-                                       &free_list, &first_block, spaceres);
+                                       &dfops, &first_block, spaceres);
  
         /*
          * Set up the target.
@@ -2972,7 +2973,7 @@ xfs_rename(
                  */
                 error = xfs_dir_createname(tp, target_dp, target_name,
                                                 src_ip->i_ino, &first_block,
-                                               &free_list, spaceres);
+                                               &dfops, spaceres);
                 if (error)
                         goto out_bmap_cancel;
  
@@ -3012,7 +3013,7 @@ xfs_rename(
                  */
                 error = xfs_dir_replace(tp, target_dp, target_name,
                                         src_ip->i_ino,
-                                       &first_block, &free_list, spaceres);
+                                       &first_block, &dfops, spaceres);
                 if (error)
                         goto out_bmap_cancel;
  
@@ -3047,7 +3048,7 @@ xfs_rename(
                  */
                 error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot,
                                         target_dp->i_ino,
-                                       &first_block, &free_list, spaceres);
+                                       &first_block, &dfops, spaceres);
                 ASSERT(error != -EEXIST);
                 if (error)
                         goto out_bmap_cancel;
@@ -3086,10 +3087,10 @@ xfs_rename(
          */
         if (wip) {
                 error = xfs_dir_replace(tp, src_dp, src_name, wip->i_ino,
-                                       &first_block, &free_list, spaceres);
+                                       &first_block, &dfops, spaceres);
         } else
                 error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino,
-                                          &first_block, &free_list, spaceres);
+                                          &first_block, &dfops, spaceres);
         if (error)
                 goto out_bmap_cancel;
  
@@ -3124,13 +3125,13 @@ xfs_rename(
         if (new_parent)
                 xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);
  
-       error = xfs_finish_rename(tp, &free_list);
+       error = xfs_finish_rename(tp, &dfops);
         if (wip)
                 IRELE(wip);
         return error;
  
  out_bmap_cancel:
-       xfs_bmap_cancel(&free_list);
+       xfs_defer_cancel(&dfops);
  out_trans_cancel:
         xfs_trans_cancel(tp);
  out_release_wip:
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h

index 8eb78ec4a6e227d6034848edaf2a8fce77c2ca7e..e1a411e08f00f6b6e0815b6f7af3a58aed746db2 100644 (file)
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -27,7 +27,7 @@
  struct xfs_dinode;
  struct xfs_inode;
  struct xfs_buf;
-struct xfs_bmap_free;
+struct xfs_defer_ops;
  struct xfs_bmbt_irec;
  struct xfs_inode_log_item;
  struct xfs_mount;
@@ -398,7 +398,7 @@ uint                xfs_ilock_attr_map_shared(struct xfs_inode *);
  
  uint           xfs_ip2xflags(struct xfs_inode *);
  int            xfs_ifree(struct xfs_trans *, xfs_inode_t *,
-                          struct xfs_bmap_free *);
+                          struct xfs_defer_ops *);
  int            xfs_itruncate_extents(struct xfs_trans **, struct xfs_inode *,
                                       int, xfs_fsize_t);
  void           xfs_iext_realloc(xfs_inode_t *, int, int);
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c

index 9a7c87809d3b331bce018873f06ebae2ecc700ec..cf46658392ceadebc09b35ccf4dd5f7b4db8ddb5 100644 (file)
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -387,6 +387,7 @@ xfs_attrlist_by_handle(
  {
         int                     error = -ENOMEM;
         attrlist_cursor_kern_t  *cursor;
+       struct xfs_fsop_attrlist_handlereq __user       *p = arg;
         xfs_fsop_attrlist_handlereq_t al_hreq;
         struct dentry           *dentry;
         char                    *kbuf;
@@ -419,6 +420,11 @@ xfs_attrlist_by_handle(
         if (error)
                 goto out_kfree;
  
+       if (copy_to_user(&p->pos, cursor, sizeof(attrlist_cursor_kern_t))) {
+               error = -EFAULT;
+               goto out_kfree;
+       }
+
         if (copy_to_user(al_hreq.buffer, kbuf, al_hreq.buflen))
                 error = -EFAULT;
  
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c

index 620fc91204443c62466283c1e92553656cde35f1..2114d53df433134a35084635b5238e2a775c6f0f 100644 (file)
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -23,6 +23,7 @@
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
+#include "xfs_defer.h"
  #include "xfs_inode.h"
  #include "xfs_btree.h"
  #include "xfs_bmap_btree.h"
@@ -128,7 +129,7 @@ xfs_iomap_write_direct(
         int             quota_flag;
         int             rt;
         xfs_trans_t     *tp;
-       xfs_bmap_free_t free_list;
+       struct xfs_defer_ops dfops;
         uint            qblocks, resblks, resrtextents;
         int             error;
         int             lockmode;
@@ -231,18 +232,18 @@ xfs_iomap_write_direct(
          * From this point onwards we overwrite the imap pointer that the
          * caller gave to us.
          */
-       xfs_bmap_init(&free_list, &firstfsb);
+       xfs_defer_init(&dfops, &firstfsb);
         nimaps = 1;
         error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
                                 bmapi_flags, &firstfsb, resblks, imap,
-                               &nimaps, &free_list);
+                               &nimaps, &dfops);
         if (error)
                 goto out_bmap_cancel;
  
         /*
          * Complete the transaction
          */
-       error = xfs_bmap_finish(&tp, &free_list, NULL);
+       error = xfs_defer_finish(&tp, &dfops, NULL);
         if (error)
                 goto out_bmap_cancel;
  
@@ -266,7 +267,7 @@ out_unlock:
         return error;
  
  out_bmap_cancel:
-       xfs_bmap_cancel(&free_list);
+       xfs_defer_cancel(&dfops);
         xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag);
  out_trans_cancel:
         xfs_trans_cancel(tp);
@@ -685,7 +686,7 @@ xfs_iomap_write_allocate(
         xfs_fileoff_t   offset_fsb, last_block;
         xfs_fileoff_t   end_fsb, map_start_fsb;
         xfs_fsblock_t   first_block;
-       xfs_bmap_free_t free_list;
+       struct xfs_defer_ops    dfops;
         xfs_filblks_t   count_fsb;
         xfs_trans_t     *tp;
         int             nimaps;
@@ -727,7 +728,7 @@ xfs_iomap_write_allocate(
                         xfs_ilock(ip, XFS_ILOCK_EXCL);
                         xfs_trans_ijoin(tp, ip, 0);
  
-                       xfs_bmap_init(&free_list, &first_block);
+                       xfs_defer_init(&dfops, &first_block);
  
                         /*
                          * it is possible that the extents have changed since
@@ -783,11 +784,11 @@ xfs_iomap_write_allocate(
                         error = xfs_bmapi_write(tp, ip, map_start_fsb,
                                                 count_fsb, 0, &first_block,
                                                 nres, imap, &nimaps,
-                                               &free_list);
+                                               &dfops);
                         if (error)
                                 goto trans_cancel;
  
-                       error = xfs_bmap_finish(&tp, &free_list, NULL);
+                       error = xfs_defer_finish(&tp, &dfops, NULL);
                         if (error)
                                 goto trans_cancel;
  
@@ -821,7 +822,7 @@ xfs_iomap_write_allocate(
         }
  
  trans_cancel:
-       xfs_bmap_cancel(&free_list);
+       xfs_defer_cancel(&dfops);
         xfs_trans_cancel(tp);
  error0:
         xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -842,7 +843,7 @@ xfs_iomap_write_unwritten(
         int             nimaps;
         xfs_trans_t     *tp;
         xfs_bmbt_irec_t imap;
-       xfs_bmap_free_t free_list;
+       struct xfs_defer_ops dfops;
         xfs_fsize_t     i_size;
         uint            resblks;
         int             error;
@@ -886,11 +887,11 @@ xfs_iomap_write_unwritten(
                 /*
                  * Modify the unwritten extent state of the buffer.
                  */
-               xfs_bmap_init(&free_list, &firstfsb);
+               xfs_defer_init(&dfops, &firstfsb);
                 nimaps = 1;
                 error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
                                         XFS_BMAPI_CONVERT, &firstfsb, resblks,
-                                       &imap, &nimaps, &free_list);
+                                       &imap, &nimaps, &dfops);
                 if (error)
                         goto error_on_bmapi_transaction;
  
@@ -909,7 +910,7 @@ xfs_iomap_write_unwritten(
                         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
                 }
  
-               error = xfs_bmap_finish(&tp, &free_list, NULL);
+               error = xfs_defer_finish(&tp, &dfops, NULL);
                 if (error)
                         goto error_on_bmapi_transaction;
  
@@ -936,7 +937,7 @@ xfs_iomap_write_unwritten(
         return 0;
  
  error_on_bmapi_transaction:
-       xfs_bmap_cancel(&free_list);
+       xfs_defer_cancel(&dfops);
         xfs_trans_cancel(tp);
         xfs_iunlock(ip, XFS_ILOCK_EXCL);
         return error;
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c

index 83599784384686c2cb306bd2c2843422d5a2966c..e8638fd2c0c3a046c9ede4a2e4891c8b12110d92 100644 (file)
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -43,6 +43,7 @@
  #include "xfs_bmap_btree.h"
  #include "xfs_error.h"
  #include "xfs_dir2.h"
+#include "xfs_rmap_item.h"
  
  #define BLK_AVG(blk1, blk2)    ((blk1+blk2) >> 1)
  
@@ -1911,6 +1912,8 @@ xlog_recover_reorder_trans(
                 case XFS_LI_QUOTAOFF:
                 case XFS_LI_EFD:
                 case XFS_LI_EFI:
+               case XFS_LI_RUI:
+               case XFS_LI_RUD:
                         trace_xfs_log_recover_item_reorder_tail(log,
                                                         trans, item, pass);
                         list_move_tail(&item->ri_list, &inode_list);
@@ -2228,6 +2231,7 @@ xlog_recover_get_buf_lsn(
         case XFS_ABTC_CRC_MAGIC:
         case XFS_ABTB_MAGIC:
         case XFS_ABTC_MAGIC:
+       case XFS_RMAP_CRC_MAGIC:
         case XFS_IBT_CRC_MAGIC:
         case XFS_IBT_MAGIC: {
                 struct xfs_btree_block *btb = blk;
@@ -2396,6 +2400,9 @@ xlog_recover_validate_buf_type(
                 case XFS_BMAP_MAGIC:
                         bp->b_ops = &xfs_bmbt_buf_ops;
                         break;
+               case XFS_RMAP_CRC_MAGIC:
+                       bp->b_ops = &xfs_rmapbt_buf_ops;
+                       break;
                 default:
                         xfs_warn(mp, "Bad btree block magic!");
                         ASSERT(0);
@@ -3414,6 +3421,99 @@ xlog_recover_efd_pass2(
         return 0;
  }
  
+/*
+ * This routine is called to create an in-core extent rmap update
+ * item from the rui format structure which was logged on disk.
+ * It allocates an in-core rui, copies the extents from the format
+ * structure into it, and adds the rui to the AIL with the given
+ * LSN.
+ */
+STATIC int
+xlog_recover_rui_pass2(
+       struct xlog                     *log,
+       struct xlog_recover_item        *item,
+       xfs_lsn_t                       lsn)
+{
+       int                             error;
+       struct xfs_mount                *mp = log->l_mp;
+       struct xfs_rui_log_item         *ruip;
+       struct xfs_rui_log_format       *rui_formatp;
+
+       rui_formatp = item->ri_buf[0].i_addr;
+
+       ruip = xfs_rui_init(mp, rui_formatp->rui_nextents);
+       error = xfs_rui_copy_format(&item->ri_buf[0], &ruip->rui_format);
+       if (error) {
+               xfs_rui_item_free(ruip);
+               return error;
+       }
+       atomic_set(&ruip->rui_next_extent, rui_formatp->rui_nextents);
+
+       spin_lock(&log->l_ailp->xa_lock);
+       /*
+        * The RUI has two references. One for the RUD and one for RUI to ensure
+        * it makes it into the AIL. Insert the RUI into the AIL directly and
+        * drop the RUI reference. Note that xfs_trans_ail_update() drops the
+        * AIL lock.
+        */
+       xfs_trans_ail_update(log->l_ailp, &ruip->rui_item, lsn);
+       xfs_rui_release(ruip);
+       return 0;
+}
+
+
+/*
+ * This routine is called when an RUD format structure is found in a committed
+ * transaction in the log. Its purpose is to cancel the corresponding RUI if it
+ * was still in the log. To do this it searches the AIL for the RUI with an id
+ * equal to that in the RUD format structure. If we find it we drop the RUD
+ * reference, which removes the RUI from the AIL and frees it.
+ */
+STATIC int
+xlog_recover_rud_pass2(
+       struct xlog                     *log,
+       struct xlog_recover_item        *item)
+{
+       struct xfs_rud_log_format       *rud_formatp;
+       struct xfs_rui_log_item         *ruip = NULL;
+       struct xfs_log_item             *lip;
+       __uint64_t                      rui_id;
+       struct xfs_ail_cursor           cur;
+       struct xfs_ail                  *ailp = log->l_ailp;
+
+       rud_formatp = item->ri_buf[0].i_addr;
+       ASSERT(item->ri_buf[0].i_len == sizeof(struct xfs_rud_log_format));
+       rui_id = rud_formatp->rud_rui_id;
+
+       /*
+        * Search for the RUI with the id in the RUD format structure in the
+        * AIL.
+        */
+       spin_lock(&ailp->xa_lock);
+       lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
+       while (lip != NULL) {
+               if (lip->li_type == XFS_LI_RUI) {
+                       ruip = (struct xfs_rui_log_item *)lip;
+                       if (ruip->rui_format.rui_id == rui_id) {
+                               /*
+                                * Drop the RUD reference to the RUI. This
+                                * removes the RUI from the AIL and frees it.
+                                */
+                               spin_unlock(&ailp->xa_lock);
+                               xfs_rui_release(ruip);
+                               spin_lock(&ailp->xa_lock);
+                               break;
+                       }
+               }
+               lip = xfs_trans_ail_cursor_next(ailp, &cur);
+       }
+
+       xfs_trans_ail_cursor_done(&cur);
+       spin_unlock(&ailp->xa_lock);
+
+       return 0;
+}
+
  /*
   * This routine is called when an inode create format structure is found in a
   * committed transaction in the log.  It's purpose is to initialise the inodes
@@ -3639,6 +3739,8 @@ xlog_recover_ra_pass2(
         case XFS_LI_EFI:
         case XFS_LI_EFD:
         case XFS_LI_QUOTAOFF:
+       case XFS_LI_RUI:
+       case XFS_LI_RUD:
         default:
                 break;
         }
@@ -3662,6 +3764,8 @@ xlog_recover_commit_pass1(
         case XFS_LI_EFD:
         case XFS_LI_DQUOT:
         case XFS_LI_ICREATE:
+       case XFS_LI_RUI:
+       case XFS_LI_RUD:
                 /* nothing to do in pass 1 */
                 return 0;
         default:
@@ -3692,6 +3796,10 @@ xlog_recover_commit_pass2(
                 return xlog_recover_efi_pass2(log, item, trans->r_lsn);
         case XFS_LI_EFD:
                 return xlog_recover_efd_pass2(log, item);
+       case XFS_LI_RUI:
+               return xlog_recover_rui_pass2(log, item, trans->r_lsn);
+       case XFS_LI_RUD:
+               return xlog_recover_rud_pass2(log, item);
         case XFS_LI_DQUOT:
                 return xlog_recover_dquot_pass2(log, buffer_list, item,
                                                 trans->r_lsn);
@@ -4164,126 +4272,156 @@ xlog_recover_process_data(
         return 0;
  }
  
-/*
- * Process an extent free intent item that was recovered from
- * the log.  We need to free the extents that it describes.
- */
+/* Recover the EFI if necessary. */
  STATIC int
  xlog_recover_process_efi(
-       xfs_mount_t             *mp,
-       xfs_efi_log_item_t      *efip)
+       struct xfs_mount                *mp,
+       struct xfs_ail                  *ailp,
+       struct xfs_log_item             *lip)
  {
-       xfs_efd_log_item_t      *efdp;
-       xfs_trans_t             *tp;
-       int                     i;
-       int                     error = 0;
-       xfs_extent_t            *extp;
-       xfs_fsblock_t           startblock_fsb;
-
-       ASSERT(!test_bit(XFS_EFI_RECOVERED, &efip->efi_flags));
+       struct xfs_efi_log_item         *efip;
+       int                             error;
  
         /*
-        * First check the validity of the extents described by the
-        * EFI.  If any are bad, then assume that all are bad and
-        * just toss the EFI.
+        * Skip EFIs that we've already processed.
          */
-       for (i = 0; i < efip->efi_format.efi_nextents; i++) {
-               extp = &(efip->efi_format.efi_extents[i]);
-               startblock_fsb = XFS_BB_TO_FSB(mp,
-                                  XFS_FSB_TO_DADDR(mp, extp->ext_start));
-               if ((startblock_fsb == 0) ||
-                   (extp->ext_len == 0) ||
-                   (startblock_fsb >= mp->m_sb.sb_dblocks) ||
-                   (extp->ext_len >= mp->m_sb.sb_agblocks)) {
-                       /*
-                        * This will pull the EFI from the AIL and
-                        * free the memory associated with it.
-                        */
-                       set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
-                       xfs_efi_release(efip);
-                       return -EIO;
-               }
-       }
+       efip = container_of(lip, struct xfs_efi_log_item, efi_item);
+       if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags))
+               return 0;
  
-       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
-       if (error)
-               return error;
-       efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents);
+       spin_unlock(&ailp->xa_lock);
+       error = xfs_efi_recover(mp, efip);
+       spin_lock(&ailp->xa_lock);
  
-       for (i = 0; i < efip->efi_format.efi_nextents; i++) {
-               extp = &(efip->efi_format.efi_extents[i]);
-               error = xfs_trans_free_extent(tp, efdp, extp->ext_start,
-                                             extp->ext_len);
-               if (error)
-                       goto abort_error;
+       return error;
+}
  
-       }
+/* Release the EFI since we're cancelling everything. */
+STATIC void
+xlog_recover_cancel_efi(
+       struct xfs_mount                *mp,
+       struct xfs_ail                  *ailp,
+       struct xfs_log_item             *lip)
+{
+       struct xfs_efi_log_item         *efip;
  
-       set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
-       error = xfs_trans_commit(tp);
-       return error;
+       efip = container_of(lip, struct xfs_efi_log_item, efi_item);
+
+       spin_unlock(&ailp->xa_lock);
+       xfs_efi_release(efip);
+       spin_lock(&ailp->xa_lock);
+}
+
+/* Recover the RUI if necessary. */
+STATIC int
+xlog_recover_process_rui(
+       struct xfs_mount                *mp,
+       struct xfs_ail                  *ailp,
+       struct xfs_log_item             *lip)
+{
+       struct xfs_rui_log_item         *ruip;
+       int                             error;
+
+       /*
+        * Skip RUIs that we've already processed.
+        */
+       ruip = container_of(lip, struct xfs_rui_log_item, rui_item);
+       if (test_bit(XFS_RUI_RECOVERED, &ruip->rui_flags))
+               return 0;
+
+       spin_unlock(&ailp->xa_lock);
+       error = xfs_rui_recover(mp, ruip);
+       spin_lock(&ailp->xa_lock);
  
-abort_error:
-       xfs_trans_cancel(tp);
         return error;
  }
  
+/* Release the RUI since we're cancelling everything. */
+STATIC void
+xlog_recover_cancel_rui(
+       struct xfs_mount                *mp,
+       struct xfs_ail                  *ailp,
+       struct xfs_log_item             *lip)
+{
+       struct xfs_rui_log_item         *ruip;
+
+       ruip = container_of(lip, struct xfs_rui_log_item, rui_item);
+
+       spin_unlock(&ailp->xa_lock);
+       xfs_rui_release(ruip);
+       spin_lock(&ailp->xa_lock);
+}
+
+/* Is this log item a deferred action intent? */
+static inline bool xlog_item_is_intent(struct xfs_log_item *lip)
+{
+       switch (lip->li_type) {
+       case XFS_LI_EFI:
+       case XFS_LI_RUI:
+               return true;
+       default:
+               return false;
+       }
+}
+
  /*
- * When this is called, all of the EFIs which did not have
- * corresponding EFDs should be in the AIL.  What we do now
- * is free the extents associated with each one.
+ * When this is called, all of the log intent items which did not have
+ * corresponding log done items should be in the AIL.  What we do now
+ * is update the data structures associated with each one.
   *
- * Since we process the EFIs in normal transactions, they
- * will be removed at some point after the commit.  This prevents
- * us from just walking down the list processing each one.
- * We'll use a flag in the EFI to skip those that we've already
- * processed and use the AIL iteration mechanism's generation
- * count to try to speed this up at least a bit.
+ * Since we process the log intent items in normal transactions, they
+ * will be removed at some point after the commit.  This prevents us
+ * from just walking down the list processing each one.  We'll use a
+ * flag in the intent item to skip those that we've already processed
+ * and use the AIL iteration mechanism's generation count to try to
+ * speed this up at least a bit.
   *
- * When we start, we know that the EFIs are the only things in
- * the AIL.  As we process them, however, other items are added
- * to the AIL.  Since everything added to the AIL must come after
- * everything already in the AIL, we stop processing as soon as
- * we see something other than an EFI in the AIL.
+ * When we start, we know that the intents are the only things in the
+ * AIL.  As we process them, however, other items are added to the
+ * AIL.
   */
  STATIC int
-xlog_recover_process_efis(
+xlog_recover_process_intents(
         struct xlog             *log)
  {
         struct xfs_log_item     *lip;
-       struct xfs_efi_log_item *efip;
         int                     error = 0;
         struct xfs_ail_cursor   cur;
         struct xfs_ail          *ailp;
+       xfs_lsn_t               last_lsn;
  
         ailp = log->l_ailp;
         spin_lock(&ailp->xa_lock);
         lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
+       last_lsn = xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block);
         while (lip != NULL) {
                 /*
-                * We're done when we see something other than an EFI.
-                * There should be no EFIs left in the AIL now.
+                * We're done when we see something other than an intent.
+                * There should be no intents left in the AIL now.
                  */
-               if (lip->li_type != XFS_LI_EFI) {
+               if (!xlog_item_is_intent(lip)) {
  #ifdef DEBUG
                         for (; lip; lip = xfs_trans_ail_cursor_next(ailp, &cur))
-                               ASSERT(lip->li_type != XFS_LI_EFI);
+                               ASSERT(!xlog_item_is_intent(lip));
  #endif
                         break;
                 }
  
                 /*
-                * Skip EFIs that we've already processed.
+                * We should never see a redo item with a LSN higher than
+                * the last transaction we found in the log at the start
+                * of recovery.
                  */
-               efip = container_of(lip, struct xfs_efi_log_item, efi_item);
-               if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags)) {
-                       lip = xfs_trans_ail_cursor_next(ailp, &cur);
-                       continue;
-               }
+               ASSERT(XFS_LSN_CMP(last_lsn, lip->li_lsn) >= 0);
  
-               spin_unlock(&ailp->xa_lock);
-               error = xlog_recover_process_efi(log->l_mp, efip);
-               spin_lock(&ailp->xa_lock);
+               switch (lip->li_type) {
+               case XFS_LI_EFI:
+                       error = xlog_recover_process_efi(log->l_mp, ailp, lip);
+                       break;
+               case XFS_LI_RUI:
+                       error = xlog_recover_process_rui(log->l_mp, ailp, lip);
+                       break;
+               }
                 if (error)
                         goto out;
                 lip = xfs_trans_ail_cursor_next(ailp, &cur);
@@ -4295,15 +4433,14 @@ out:
  }
  
  /*
- * A cancel occurs when the mount has failed and we're bailing out. Release all
- * pending EFIs so they don't pin the AIL.
+ * A cancel occurs when the mount has failed and we're bailing out.
+ * Release all pending log intent items so they don't pin the AIL.
   */
  STATIC int
-xlog_recover_cancel_efis(
+xlog_recover_cancel_intents(
         struct xlog             *log)
  {
         struct xfs_log_item     *lip;
-       struct xfs_efi_log_item *efip;
         int                     error = 0;
         struct xfs_ail_cursor   cur;
         struct xfs_ail          *ailp;
@@ -4313,22 +4450,25 @@ xlog_recover_cancel_efis(
         lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
         while (lip != NULL) {
                 /*
-                * We're done when we see something other than an EFI.
-                * There should be no EFIs left in the AIL now.
+                * We're done when we see something other than an intent.
+                * There should be no intents left in the AIL now.
                  */
-               if (lip->li_type != XFS_LI_EFI) {
+               if (!xlog_item_is_intent(lip)) {
  #ifdef DEBUG
                         for (; lip; lip = xfs_trans_ail_cursor_next(ailp, &cur))
-                               ASSERT(lip->li_type != XFS_LI_EFI);
+                               ASSERT(!xlog_item_is_intent(lip));
  #endif
                         break;
                 }
  
-               efip = container_of(lip, struct xfs_efi_log_item, efi_item);
-
-               spin_unlock(&ailp->xa_lock);
-               xfs_efi_release(efip);
-               spin_lock(&ailp->xa_lock);
+               switch (lip->li_type) {
+               case XFS_LI_EFI:
+                       xlog_recover_cancel_efi(log->l_mp, ailp, lip);
+                       break;
+               case XFS_LI_RUI:
+                       xlog_recover_cancel_rui(log->l_mp, ailp, lip);
+                       break;
+               }
  
                 lip = xfs_trans_ail_cursor_next(ailp, &cur);
         }
@@ -5023,6 +5163,7 @@ xlog_do_recover(
                 xfs_warn(mp, "Failed post-recovery per-ag init: %d", error);
                 return error;
         }
+       mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
  
         xlog_recover_check_summary(log);
  
@@ -5139,16 +5280,17 @@ xlog_recover_finish(
          */
         if (log->l_flags & XLOG_RECOVERY_NEEDED) {
                 int     error;
-               error = xlog_recover_process_efis(log);
+               error = xlog_recover_process_intents(log);
                 if (error) {
-                       xfs_alert(log->l_mp, "Failed to recover EFIs");
+                       xfs_alert(log->l_mp, "Failed to recover intents");
                         return error;
                 }
+
                 /*
-                * Sync the log to get all the EFIs out of the AIL.
+                * Sync the log to get all the intents out of the AIL.
                  * This isn't absolutely necessary, but it helps in
                  * case the unlink transactions would have problems
-                * pushing the EFIs out of the way.
+                * pushing the intents out of the way.
                  */
                 xfs_log_force(log->l_mp, XFS_LOG_SYNC);
  
@@ -5173,7 +5315,7 @@ xlog_recover_cancel(
         int             error = 0;
  
         if (log->l_flags & XLOG_RECOVERY_NEEDED)
-               error = xlog_recover_cancel_efis(log);
+               error = xlog_recover_cancel_intents(log);
  
         return error;
  }
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c

index 970c19ba2f560f62bc077d6797c6713848dcd5ec..faeead671f9ff02af6ca9c797a1480611ed4c95b 100644 (file)
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -24,6 +24,7 @@
  #include "xfs_bit.h"
  #include "xfs_sb.h"
  #include "xfs_mount.h"
+#include "xfs_defer.h"
  #include "xfs_da_format.h"
  #include "xfs_da_btree.h"
  #include "xfs_inode.h"
@@ -41,6 +42,7 @@
  #include "xfs_trace.h"
  #include "xfs_icache.h"
  #include "xfs_sysfs.h"
+#include "xfs_rmap_btree.h"
  
  
  static DEFINE_MUTEX(xfs_uuid_table_mutex);
@@ -230,6 +232,8 @@ xfs_initialize_perag(
  
         if (maxagi)
                 *maxagi = index;
+
+       mp->m_ag_prealloc_blocks = xfs_prealloc_blocks(mp);
         return 0;
  
  out_unwind:
@@ -679,6 +683,7 @@ xfs_mountfs(
         xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK);
         xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK);
         xfs_ialloc_compute_maxlevels(mp);
+       xfs_rmapbt_compute_maxlevels(mp);
  
         xfs_set_maxicount(mp);
  
@@ -1216,7 +1221,7 @@ xfs_mod_fdblocks(
                 batch = XFS_FDBLOCKS_BATCH;
  
         __percpu_counter_add(&mp->m_fdblocks, delta, batch);
-       if (__percpu_counter_compare(&mp->m_fdblocks, XFS_ALLOC_SET_ASIDE(mp),
+       if (__percpu_counter_compare(&mp->m_fdblocks, mp->m_alloc_set_aside,
                                      XFS_FDBLOCKS_BATCH) >= 0) {
                 /* we had space! */
                 return 0;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h

index c1b798c7212618462ee2130814eac8544c691bf0..b36676cde10302acd3937b5308f11a6c7c42db03 100644 (file)
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -116,9 +116,15 @@ typedef struct xfs_mount {
         uint                    m_bmap_dmnr[2]; /* min bmap btree records */
         uint                    m_inobt_mxr[2]; /* max inobt btree records */
         uint                    m_inobt_mnr[2]; /* min inobt btree records */
+       uint                    m_rmap_mxr[2];  /* max rmap btree records */
+       uint                    m_rmap_mnr[2];  /* min rmap btree records */
         uint                    m_ag_maxlevels; /* XFS_AG_MAXLEVELS */
         uint                    m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */
         uint                    m_in_maxlevels; /* max inobt btree levels. */
+       uint                    m_rmap_maxlevels; /* max rmap btree levels */
+       xfs_extlen_t            m_ag_prealloc_blocks; /* reserved ag blocks */
+       uint                    m_alloc_set_aside; /* space we can't use */
+       uint                    m_ag_max_usable; /* max space per AG */
         struct radix_tree_root  m_perag_tree;   /* per-ag accounting info */
         spinlock_t              m_perag_lock;   /* lock for m_perag_tree */
         struct mutex            m_growlock;     /* growfs mutex */
diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h

index 0cc8d8f74356759917f778d02d1e9a79888dd1f6..69e2986a377619876ac3757ed6d255db9433b91a 100644 (file)
--- a/fs/xfs/xfs_ondisk.h
+++ b/fs/xfs/xfs_ondisk.h
@@ -49,11 +49,14 @@ xfs_check_ondisk_structs(void)
         XFS_CHECK_STRUCT_SIZE(struct xfs_dsymlink_hdr,          56);
         XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_key,             4);
         XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_rec,             16);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_rmap_key,              20);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_rmap_rec,              24);
         XFS_CHECK_STRUCT_SIZE(struct xfs_timestamp,             8);
         XFS_CHECK_STRUCT_SIZE(xfs_alloc_key_t,                  8);
         XFS_CHECK_STRUCT_SIZE(xfs_alloc_ptr_t,                  4);
         XFS_CHECK_STRUCT_SIZE(xfs_alloc_rec_t,                  8);
         XFS_CHECK_STRUCT_SIZE(xfs_inobt_ptr_t,                  4);
+       XFS_CHECK_STRUCT_SIZE(xfs_rmap_ptr_t,                   4);
  
         /* dir/attr trees */
         XFS_CHECK_STRUCT_SIZE(struct xfs_attr3_leaf_hdr,        80);
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c

new file mode 100644 (file)

index 0000000..2500f28
--- /dev/null
+++ b/fs/xfs/xfs_rmap_item.c
@@ -0,0 +1,536 @@
+/*
+ * Copyright (C) 2016 Oracle.  All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_bit.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_trans.h"
+#include "xfs_trans_priv.h"
+#include "xfs_buf_item.h"
+#include "xfs_rmap_item.h"
+#include "xfs_log.h"
+#include "xfs_rmap.h"
+
+
+kmem_zone_t    *xfs_rui_zone;
+kmem_zone_t    *xfs_rud_zone;
+
+static inline struct xfs_rui_log_item *RUI_ITEM(struct xfs_log_item *lip)
+{
+       return container_of(lip, struct xfs_rui_log_item, rui_item);
+}
+
+void
+xfs_rui_item_free(
+       struct xfs_rui_log_item *ruip)
+{
+       if (ruip->rui_format.rui_nextents > XFS_RUI_MAX_FAST_EXTENTS)
+               kmem_free(ruip);
+       else
+               kmem_zone_free(xfs_rui_zone, ruip);
+}
+
+/*
+ * This returns the number of iovecs needed to log the given rui item.
+ * We only need 1 iovec for an rui item.  It just logs the rui_log_format
+ * structure.
+ */
+static inline int
+xfs_rui_item_sizeof(
+       struct xfs_rui_log_item *ruip)
+{
+       return sizeof(struct xfs_rui_log_format) +
+                       (ruip->rui_format.rui_nextents - 1) *
+                       sizeof(struct xfs_map_extent);
+}
+
+STATIC void
+xfs_rui_item_size(
+       struct xfs_log_item     *lip,
+       int                     *nvecs,
+       int                     *nbytes)
+{
+       *nvecs += 1;
+       *nbytes += xfs_rui_item_sizeof(RUI_ITEM(lip));
+}
+
+/*
+ * This is called to fill in the vector of log iovecs for the
+ * given rui log item. We use only 1 iovec, and we point that
+ * at the rui_log_format structure embedded in the rui item.
+ * It is at this point that we assert that all of the extent
+ * slots in the rui item have been filled.
+ */
+STATIC void
+xfs_rui_item_format(
+       struct xfs_log_item     *lip,
+       struct xfs_log_vec      *lv)
+{
+       struct xfs_rui_log_item *ruip = RUI_ITEM(lip);
+       struct xfs_log_iovec    *vecp = NULL;
+
+       ASSERT(atomic_read(&ruip->rui_next_extent) ==
+                       ruip->rui_format.rui_nextents);
+
+       ruip->rui_format.rui_type = XFS_LI_RUI;
+       ruip->rui_format.rui_size = 1;
+
+       xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_RUI_FORMAT, &ruip->rui_format,
+                       xfs_rui_item_sizeof(ruip));
+}
+
+/*
+ * Pinning has no meaning for an rui item, so just return.
+ */
+STATIC void
+xfs_rui_item_pin(
+       struct xfs_log_item     *lip)
+{
+}
+
+/*
+ * The unpin operation is the last place an RUI is manipulated in the log. It is
+ * either inserted in the AIL or aborted in the event of a log I/O error. In
+ * either case, the RUI transaction has been successfully committed to make it
+ * this far. Therefore, we expect whoever committed the RUI to either construct
+ * and commit the RUD or drop the RUD's reference in the event of error. Simply
+ * drop the log's RUI reference now that the log is done with it.
+ */
+STATIC void
+xfs_rui_item_unpin(
+       struct xfs_log_item     *lip,
+       int                     remove)
+{
+       struct xfs_rui_log_item *ruip = RUI_ITEM(lip);
+
+       xfs_rui_release(ruip);
+}
+
+/*
+ * RUI items have no locking or pushing.  However, since RUIs are pulled from
+ * the AIL when their corresponding RUDs are committed to disk, their situation
+ * is very similar to being pinned.  Return XFS_ITEM_PINNED so that the caller
+ * will eventually flush the log.  This should help in getting the RUI out of
+ * the AIL.
+ */
+STATIC uint
+xfs_rui_item_push(
+       struct xfs_log_item     *lip,
+       struct list_head        *buffer_list)
+{
+       return XFS_ITEM_PINNED;
+}
+
+/*
+ * The RUI has been either committed or aborted if the transaction has been
+ * cancelled. If the transaction was cancelled, an RUD isn't going to be
+ * constructed and thus we free the RUI here directly.
+ */
+STATIC void
+xfs_rui_item_unlock(
+       struct xfs_log_item     *lip)
+{
+       if (lip->li_flags & XFS_LI_ABORTED)
+               xfs_rui_item_free(RUI_ITEM(lip));
+}
+
+/*
+ * The RUI is logged only once and cannot be moved in the log, so simply return
+ * the lsn at which it's been logged.
+ */
+STATIC xfs_lsn_t
+xfs_rui_item_committed(
+       struct xfs_log_item     *lip,
+       xfs_lsn_t               lsn)
+{
+       return lsn;
+}
+
+/*
+ * The RUI dependency tracking op doesn't do squat.  It can't because
+ * it doesn't know where the free extent is coming from.  The dependency
+ * tracking has to be handled by the "enclosing" metadata object.  For
+ * example, for inodes, the inode is locked throughout the extent freeing
+ * so the dependency should be recorded there.
+ */
+STATIC void
+xfs_rui_item_committing(
+       struct xfs_log_item     *lip,
+       xfs_lsn_t               lsn)
+{
+}
+
+/*
+ * This is the ops vector shared by all rui log items.
+ */
+static const struct xfs_item_ops xfs_rui_item_ops = {
+       .iop_size       = xfs_rui_item_size,
+       .iop_format     = xfs_rui_item_format,
+       .iop_pin        = xfs_rui_item_pin,
+       .iop_unpin      = xfs_rui_item_unpin,
+       .iop_unlock     = xfs_rui_item_unlock,
+       .iop_committed  = xfs_rui_item_committed,
+       .iop_push       = xfs_rui_item_push,
+       .iop_committing = xfs_rui_item_committing,
+};
+
+/*
+ * Allocate and initialize an rui item with the given number of extents.
+ */
+struct xfs_rui_log_item *
+xfs_rui_init(
+       struct xfs_mount                *mp,
+       uint                            nextents)
+
+{
+       struct xfs_rui_log_item         *ruip;
+       uint                            size;
+
+       ASSERT(nextents > 0);
+       if (nextents > XFS_RUI_MAX_FAST_EXTENTS) {
+               size = (uint)(sizeof(struct xfs_rui_log_item) +
+                       ((nextents - 1) * sizeof(struct xfs_map_extent)));
+               ruip = kmem_zalloc(size, KM_SLEEP);
+       } else {
+               ruip = kmem_zone_zalloc(xfs_rui_zone, KM_SLEEP);
+       }
+
+       xfs_log_item_init(mp, &ruip->rui_item, XFS_LI_RUI, &xfs_rui_item_ops);
+       ruip->rui_format.rui_nextents = nextents;
+       ruip->rui_format.rui_id = (uintptr_t)(void *)ruip;
+       atomic_set(&ruip->rui_next_extent, 0);
+       atomic_set(&ruip->rui_refcount, 2);
+
+       return ruip;
+}
+
+/*
+ * Copy an RUI format buffer from the given buf, and into the destination
+ * RUI format structure.  The RUI/RUD items were designed not to need any
+ * special alignment handling.
+ */
+int
+xfs_rui_copy_format(
+       struct xfs_log_iovec            *buf,
+       struct xfs_rui_log_format       *dst_rui_fmt)
+{
+       struct xfs_rui_log_format       *src_rui_fmt;
+       uint                            len;
+
+       src_rui_fmt = buf->i_addr;
+       len = sizeof(struct xfs_rui_log_format) +
+                       (src_rui_fmt->rui_nextents - 1) *
+                       sizeof(struct xfs_map_extent);
+
+       if (buf->i_len != len)
+               return -EFSCORRUPTED;
+
+       memcpy((char *)dst_rui_fmt, (char *)src_rui_fmt, len);
+       return 0;
+}
+
+/*
+ * Freeing the RUI requires that we remove it from the AIL if it has already
+ * been placed there. However, the RUI may not yet have been placed in the AIL
+ * when called by xfs_rui_release() from RUD processing due to the ordering of
+ * committed vs unpin operations in bulk insert operations. Hence the reference
+ * count to ensure only the last caller frees the RUI.
+ */
+void
+xfs_rui_release(
+       struct xfs_rui_log_item *ruip)
+{
+       if (atomic_dec_and_test(&ruip->rui_refcount)) {
+               xfs_trans_ail_remove(&ruip->rui_item, SHUTDOWN_LOG_IO_ERROR);
+               xfs_rui_item_free(ruip);
+       }
+}
+
+static inline struct xfs_rud_log_item *RUD_ITEM(struct xfs_log_item *lip)
+{
+       return container_of(lip, struct xfs_rud_log_item, rud_item);
+}
+
+STATIC void
+xfs_rud_item_size(
+       struct xfs_log_item     *lip,
+       int                     *nvecs,
+       int                     *nbytes)
+{
+       *nvecs += 1;
+       *nbytes += sizeof(struct xfs_rud_log_format);
+}
+
+/*
+ * This is called to fill in the vector of log iovecs for the
+ * given rud log item. We use only 1 iovec, and we point that
+ * at the rud_log_format structure embedded in the rud item.
+ * It is at this point that we assert that all of the extent
+ * slots in the rud item have been filled.
+ */
+STATIC void
+xfs_rud_item_format(
+       struct xfs_log_item     *lip,
+       struct xfs_log_vec      *lv)
+{
+       struct xfs_rud_log_item *rudp = RUD_ITEM(lip);
+       struct xfs_log_iovec    *vecp = NULL;
+
+       rudp->rud_format.rud_type = XFS_LI_RUD;
+       rudp->rud_format.rud_size = 1;
+
+       xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_RUD_FORMAT, &rudp->rud_format,
+                       sizeof(struct xfs_rud_log_format));
+}
+
+/*
+ * Pinning has no meaning for an rud item, so just return.
+ */
+STATIC void
+xfs_rud_item_pin(
+       struct xfs_log_item     *lip)
+{
+}
+
+/*
+ * Since pinning has no meaning for an rud item, unpinning does
+ * not either.
+ */
+STATIC void
+xfs_rud_item_unpin(
+       struct xfs_log_item     *lip,
+       int                     remove)
+{
+}
+
+/*
+ * There isn't much you can do to push on an rud item.  It is simply stuck
+ * waiting for the log to be flushed to disk.
+ */
+STATIC uint
+xfs_rud_item_push(
+       struct xfs_log_item     *lip,
+       struct list_head        *buffer_list)
+{
+       return XFS_ITEM_PINNED;
+}
+
+/*
+ * The RUD is either committed or aborted if the transaction is cancelled. If
+ * the transaction is cancelled, drop our reference to the RUI and free the
+ * RUD.
+ */
+STATIC void
+xfs_rud_item_unlock(
+       struct xfs_log_item     *lip)
+{
+       struct xfs_rud_log_item *rudp = RUD_ITEM(lip);
+
+       if (lip->li_flags & XFS_LI_ABORTED) {
+               xfs_rui_release(rudp->rud_ruip);
+               kmem_zone_free(xfs_rud_zone, rudp);
+       }
+}
+
+/*
+ * When the rud item is committed to disk, all we need to do is delete our
+ * reference to our partner rui item and then free ourselves. Since we're
+ * freeing ourselves we must return -1 to keep the transaction code from
+ * further referencing this item.
+ */
+STATIC xfs_lsn_t
+xfs_rud_item_committed(
+       struct xfs_log_item     *lip,
+       xfs_lsn_t               lsn)
+{
+       struct xfs_rud_log_item *rudp = RUD_ITEM(lip);
+
+       /*
+        * Drop the RUI reference regardless of whether the RUD has been
+        * aborted. Once the RUD transaction is constructed, it is the sole
+        * responsibility of the RUD to release the RUI (even if the RUI is
+        * aborted due to log I/O error).
+        */
+       xfs_rui_release(rudp->rud_ruip);
+       kmem_zone_free(xfs_rud_zone, rudp);
+
+       return (xfs_lsn_t)-1;
+}
+
+/*
+ * The RUD dependency tracking op doesn't do squat.  It can't because
+ * it doesn't know where the free extent is coming from.  The dependency
+ * tracking has to be handled by the "enclosing" metadata object.  For
+ * example, for inodes, the inode is locked throughout the extent freeing
+ * so the dependency should be recorded there.
+ */
+STATIC void
+xfs_rud_item_committing(
+       struct xfs_log_item     *lip,
+       xfs_lsn_t               lsn)
+{
+}
+
+/*
+ * This is the ops vector shared by all rud log items.
+ */
+static const struct xfs_item_ops xfs_rud_item_ops = {
+       .iop_size       = xfs_rud_item_size,
+       .iop_format     = xfs_rud_item_format,
+       .iop_pin        = xfs_rud_item_pin,
+       .iop_unpin      = xfs_rud_item_unpin,
+       .iop_unlock     = xfs_rud_item_unlock,
+       .iop_committed  = xfs_rud_item_committed,
+       .iop_push       = xfs_rud_item_push,
+       .iop_committing = xfs_rud_item_committing,
+};
+
+/*
+ * Allocate and initialize an rud item with the given number of extents.
+ */
+struct xfs_rud_log_item *
+xfs_rud_init(
+       struct xfs_mount                *mp,
+       struct xfs_rui_log_item         *ruip)
+
+{
+       struct xfs_rud_log_item *rudp;
+
+       rudp = kmem_zone_zalloc(xfs_rud_zone, KM_SLEEP);
+       xfs_log_item_init(mp, &rudp->rud_item, XFS_LI_RUD, &xfs_rud_item_ops);
+       rudp->rud_ruip = ruip;
+       rudp->rud_format.rud_rui_id = ruip->rui_format.rui_id;
+
+       return rudp;
+}
+
+/*
+ * Process an rmap update intent item that was recovered from the log.
+ * We need to update the rmapbt.
+ */
+int
+xfs_rui_recover(
+       struct xfs_mount                *mp,
+       struct xfs_rui_log_item         *ruip)
+{
+       int                             i;
+       int                             error = 0;
+       struct xfs_map_extent           *rmap;
+       xfs_fsblock_t                   startblock_fsb;
+       bool                            op_ok;
+       struct xfs_rud_log_item         *rudp;
+       enum xfs_rmap_intent_type       type;
+       int                             whichfork;
+       xfs_exntst_t                    state;
+       struct xfs_trans                *tp;
+       struct xfs_btree_cur            *rcur = NULL;
+
+       ASSERT(!test_bit(XFS_RUI_RECOVERED, &ruip->rui_flags));
+
+       /*
+        * First check the validity of the extents described by the
+        * RUI.  If any are bad, then assume that all are bad and
+        * just toss the RUI.
+        */
+       for (i = 0; i < ruip->rui_format.rui_nextents; i++) {
+               rmap = &ruip->rui_format.rui_extents[i];
+               startblock_fsb = XFS_BB_TO_FSB(mp,
+                                  XFS_FSB_TO_DADDR(mp, rmap->me_startblock));
+               switch (rmap->me_flags & XFS_RMAP_EXTENT_TYPE_MASK) {
+               case XFS_RMAP_EXTENT_MAP:
+               case XFS_RMAP_EXTENT_UNMAP:
+               case XFS_RMAP_EXTENT_CONVERT:
+               case XFS_RMAP_EXTENT_ALLOC:
+               case XFS_RMAP_EXTENT_FREE:
+                       op_ok = true;
+                       break;
+               default:
+                       op_ok = false;
+                       break;
+               }
+               if (!op_ok || startblock_fsb == 0 ||
+                   rmap->me_len == 0 ||
+                   startblock_fsb >= mp->m_sb.sb_dblocks ||
+                   rmap->me_len >= mp->m_sb.sb_agblocks ||
+                   (rmap->me_flags & ~XFS_RMAP_EXTENT_FLAGS)) {
+                       /*
+                        * This will pull the RUI from the AIL and
+                        * free the memory associated with it.
+                        */
+                       set_bit(XFS_RUI_RECOVERED, &ruip->rui_flags);
+                       xfs_rui_release(ruip);
+                       return -EIO;
+               }
+       }
+
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
+       if (error)
+               return error;
+       rudp = xfs_trans_get_rud(tp, ruip);
+
+       for (i = 0; i < ruip->rui_format.rui_nextents; i++) {
+               rmap = &ruip->rui_format.rui_extents[i];
+               state = (rmap->me_flags & XFS_RMAP_EXTENT_UNWRITTEN) ?
+                               XFS_EXT_UNWRITTEN : XFS_EXT_NORM;
+               whichfork = (rmap->me_flags & XFS_RMAP_EXTENT_ATTR_FORK) ?
+                               XFS_ATTR_FORK : XFS_DATA_FORK;
+               switch (rmap->me_flags & XFS_RMAP_EXTENT_TYPE_MASK) {
+               case XFS_RMAP_EXTENT_MAP:
+                       type = XFS_RMAP_MAP;
+                       break;
+               case XFS_RMAP_EXTENT_UNMAP:
+                       type = XFS_RMAP_UNMAP;
+                       break;
+               case XFS_RMAP_EXTENT_CONVERT:
+                       type = XFS_RMAP_CONVERT;
+                       break;
+               case XFS_RMAP_EXTENT_ALLOC:
+                       type = XFS_RMAP_ALLOC;
+                       break;
+               case XFS_RMAP_EXTENT_FREE:
+                       type = XFS_RMAP_FREE;
+                       break;
+               default:
+                       error = -EFSCORRUPTED;
+                       goto abort_error;
+               }
+               error = xfs_trans_log_finish_rmap_update(tp, rudp, type,
+                               rmap->me_owner, whichfork,
+                               rmap->me_startoff, rmap->me_startblock,
+                               rmap->me_len, state, &rcur);
+               if (error)
+                       goto abort_error;
+
+       }
+
+       xfs_rmap_finish_one_cleanup(tp, rcur, error);
+       set_bit(XFS_RUI_RECOVERED, &ruip->rui_flags);
+       error = xfs_trans_commit(tp);
+       return error;
+
+abort_error:
+       xfs_rmap_finish_one_cleanup(tp, rcur, error);
+       xfs_trans_cancel(tp);
+       return error;
+}
diff --git a/fs/xfs/xfs_rmap_item.h b/fs/xfs/xfs_rmap_item.h

new file mode 100644 (file)

index 0000000..aefcc3a
--- /dev/null
+++ b/fs/xfs/xfs_rmap_item.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2016 Oracle.  All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#ifndef        __XFS_RMAP_ITEM_H__
+#define        __XFS_RMAP_ITEM_H__
+
+/*
+ * There are (currently) three pairs of rmap btree redo item types: map, unmap,
+ * and convert.  The common abbreviations for these are RUI (rmap update
+ * intent) and RUD (rmap update done).  The redo item type is encoded in the
+ * flags field of each xfs_map_extent.
+ *
+ * *I items should be recorded in the *first* of a series of rolled
+ * transactions, and the *D items should be recorded in the same transaction
+ * that records the associated rmapbt updates.  Typically, the first
+ * transaction will record a bmbt update, followed by some number of
+ * transactions containing rmapbt updates, and finally transactions with any
+ * bnobt/cntbt updates.
+ *
+ * Should the system crash after the commit of the first transaction but
+ * before the commit of the final transaction in a series, log recovery will
+ * use the redo information recorded by the intent items to replay the
+ * (rmapbt/bnobt/cntbt) metadata updates in the non-first transaction.
+ */
+
+/* kernel only RUI/RUD definitions */
+
+struct xfs_mount;
+struct kmem_zone;
+
+/*
+ * Max number of extents in fast allocation path.
+ */
+#define        XFS_RUI_MAX_FAST_EXTENTS        16
+
+/*
+ * Define RUI flag bits. Manipulated by set/clear/test_bit operators.
+ */
+#define        XFS_RUI_RECOVERED               1
+
+/*
+ * This is the "rmap update intent" log item.  It is used to log the fact that
+ * some reverse mappings need to change.  It is used in conjunction with the
+ * "rmap update done" log item described below.
+ *
+ * These log items follow the same rules as struct xfs_efi_log_item; see the
+ * comments about that structure (in xfs_extfree_item.h) for more details.
+ */
+struct xfs_rui_log_item {
+       struct xfs_log_item             rui_item;
+       atomic_t                        rui_refcount;
+       atomic_t                        rui_next_extent;
+       unsigned long                   rui_flags;      /* misc flags */
+       struct xfs_rui_log_format       rui_format;
+};
+
+/*
+ * This is the "rmap update done" log item.  It is used to log the fact that
+ * some rmapbt updates mentioned in an earlier rui item have been performed.
+ */
+struct xfs_rud_log_item {
+       struct xfs_log_item             rud_item;
+       struct xfs_rui_log_item         *rud_ruip;
+       struct xfs_rud_log_format       rud_format;
+};
+
+extern struct kmem_zone        *xfs_rui_zone;
+extern struct kmem_zone        *xfs_rud_zone;
+
+struct xfs_rui_log_item *xfs_rui_init(struct xfs_mount *, uint);
+struct xfs_rud_log_item *xfs_rud_init(struct xfs_mount *,
+               struct xfs_rui_log_item *);
+int xfs_rui_copy_format(struct xfs_log_iovec *buf,
+               struct xfs_rui_log_format *dst_rui_fmt);
+void xfs_rui_item_free(struct xfs_rui_log_item *);
+void xfs_rui_release(struct xfs_rui_log_item *);
+int xfs_rui_recover(struct xfs_mount *mp, struct xfs_rui_log_item *ruip);
+
+#endif /* __XFS_RMAP_ITEM_H__ */
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c

index 3938b37d1043bb6fd98879fa4783b6bbec8cfef6..802bcc326d9fbe37fab8b412ce69b37390d94ba0 100644 (file)
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -23,6 +23,7 @@
  #include "xfs_trans_resv.h"
  #include "xfs_bit.h"
  #include "xfs_mount.h"
+#include "xfs_defer.h"
  #include "xfs_inode.h"
  #include "xfs_bmap.h"
  #include "xfs_bmap_util.h"
@@ -769,7 +770,7 @@ xfs_growfs_rt_alloc(
         xfs_daddr_t             d;              /* disk block address */
         int                     error;          /* error return value */
         xfs_fsblock_t           firstblock;/* first block allocated in xaction */
-       struct xfs_bmap_free    flist;          /* list of freed blocks */
+       struct xfs_defer_ops    dfops;          /* list of freed blocks */
         xfs_fsblock_t           fsbno;          /* filesystem block for bno */
         struct xfs_bmbt_irec    map;            /* block map output */
         int                     nmap;           /* number of block maps */
@@ -794,14 +795,14 @@ xfs_growfs_rt_alloc(
                 xfs_ilock(ip, XFS_ILOCK_EXCL);
                 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
  
-               xfs_bmap_init(&flist, &firstblock);
+               xfs_defer_init(&dfops, &firstblock);
                 /*
                  * Allocate blocks to the bitmap file.
                  */
                 nmap = 1;
                 error = xfs_bmapi_write(tp, ip, oblocks, nblocks - oblocks,
                                         XFS_BMAPI_METADATA, &firstblock,
-                                       resblks, &map, &nmap, &flist);
+                                       resblks, &map, &nmap, &dfops);
                 if (!error && nmap < 1)
                         error = -ENOSPC;
                 if (error)
@@ -809,7 +810,7 @@ xfs_growfs_rt_alloc(
                 /*
                  * Free any blocks freed up in the transaction, then commit.
                  */
-               error = xfs_bmap_finish(&tp, &flist, NULL);
+               error = xfs_defer_finish(&tp, &dfops, NULL);
                 if (error)
                         goto out_bmap_cancel;
                 error = xfs_trans_commit(tp);
@@ -862,7 +863,7 @@ xfs_growfs_rt_alloc(
         return 0;
  
  out_bmap_cancel:
-       xfs_bmap_cancel(&flist);
+       xfs_defer_cancel(&dfops);
  out_trans_cancel:
         xfs_trans_cancel(tp);
         return error;
diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c

index d266e835ecc3eb22f92a7400aa67718501a20fff..6e812fe0fd43cc04b4f879c053c296f6cc1a5092 100644 (file)
--- a/fs/xfs/xfs_stats.c
+++ b/fs/xfs/xfs_stats.c
@@ -61,6 +61,7 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
                 { "bmbt2",              XFSSTAT_END_BMBT_V2             },
                 { "ibt2",               XFSSTAT_END_IBT_V2              },
                 { "fibt2",              XFSSTAT_END_FIBT_V2             },
+               { "rmapbt",             XFSSTAT_END_RMAP_V2             },
                 /* we print both series of quota information together */
                 { "qm",                 XFSSTAT_END_QM                  },
         };
diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h

index 483b0eff198836ca1516cf56bdffc5f283af8dad..657865f51e78332dae8ff7892e93213c07f71e6e 100644 (file)
--- a/fs/xfs/xfs_stats.h
+++ b/fs/xfs/xfs_stats.h
@@ -197,7 +197,23 @@ struct xfsstats {
         __uint32_t              xs_fibt_2_alloc;
         __uint32_t              xs_fibt_2_free;
         __uint32_t              xs_fibt_2_moves;
-#define XFSSTAT_END_XQMSTAT            (XFSSTAT_END_FIBT_V2+6)
+#define XFSSTAT_END_RMAP_V2            (XFSSTAT_END_FIBT_V2+15)
+       __uint32_t              xs_rmap_2_lookup;
+       __uint32_t              xs_rmap_2_compare;
+       __uint32_t              xs_rmap_2_insrec;
+       __uint32_t              xs_rmap_2_delrec;
+       __uint32_t              xs_rmap_2_newroot;
+       __uint32_t              xs_rmap_2_killroot;
+       __uint32_t              xs_rmap_2_increment;
+       __uint32_t              xs_rmap_2_decrement;
+       __uint32_t              xs_rmap_2_lshift;
+       __uint32_t              xs_rmap_2_rshift;
+       __uint32_t              xs_rmap_2_split;
+       __uint32_t              xs_rmap_2_join;
+       __uint32_t              xs_rmap_2_alloc;
+       __uint32_t              xs_rmap_2_free;
+       __uint32_t              xs_rmap_2_moves;
+#define XFSSTAT_END_XQMSTAT            (XFSSTAT_END_RMAP_V2+6)
         __uint32_t              xs_qm_dqreclaims;
         __uint32_t              xs_qm_dqreclaim_misses;
         __uint32_t              xs_qm_dquot_dups;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c

index 0303f1005f884e0314539c94bf2a36349bc31fab..24ef83ef04de2be5c8fd1e77ab62fa3b59106376 100644 (file)
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -46,6 +46,7 @@
  #include "xfs_quota.h"
  #include "xfs_sysfs.h"
  #include "xfs_ondisk.h"
+#include "xfs_rmap_item.h"
  
  #include <linux/namei.h>
  #include <linux/init.h>
@@ -1075,7 +1076,7 @@ xfs_fs_statfs(
         statp->f_blocks = sbp->sb_dblocks - lsize;
         spin_unlock(&mp->m_sb_lock);
  
-       statp->f_bfree = fdblocks - XFS_ALLOC_SET_ASIDE(mp);
+       statp->f_bfree = fdblocks - mp->m_alloc_set_aside;
         statp->f_bavail = statp->f_bfree;
  
         fakeinos = statp->f_bfree << sbp->sb_inopblog;
@@ -1573,6 +1574,10 @@ xfs_fs_fill_super(
                 }
         }
  
+       if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+               xfs_alert(mp,
+       "EXPERIMENTAL reverse mapping btree feature enabled. Use at your own risk!");
+
         error = xfs_mountfs(mp);
         if (error)
                 goto out_filestream_unmount;
@@ -1697,7 +1702,7 @@ xfs_init_zones(void)
                 goto out_free_ioend_bioset;
  
         xfs_bmap_free_item_zone = kmem_zone_init(
-                       sizeof(struct xfs_bmap_free_item),
+                       sizeof(struct xfs_extent_free_item),
                         "xfs_bmap_free_item");
         if (!xfs_bmap_free_item_zone)
                 goto out_destroy_log_ticket_zone;
@@ -1765,8 +1770,24 @@ xfs_init_zones(void)
         if (!xfs_icreate_zone)
                 goto out_destroy_ili_zone;
  
+       xfs_rud_zone = kmem_zone_init(sizeof(struct xfs_rud_log_item),
+                       "xfs_rud_item");
+       if (!xfs_rud_zone)
+               goto out_destroy_icreate_zone;
+
+       xfs_rui_zone = kmem_zone_init((sizeof(struct xfs_rui_log_item) +
+                       ((XFS_RUI_MAX_FAST_EXTENTS - 1) *
+                               sizeof(struct xfs_map_extent))),
+                       "xfs_rui_item");
+       if (!xfs_rui_zone)
+               goto out_destroy_rud_zone;
+
         return 0;
  
+ out_destroy_rud_zone:
+       kmem_zone_destroy(xfs_rud_zone);
+ out_destroy_icreate_zone:
+       kmem_zone_destroy(xfs_icreate_zone);
   out_destroy_ili_zone:
         kmem_zone_destroy(xfs_ili_zone);
   out_destroy_inode_zone:
@@ -1805,6 +1826,8 @@ xfs_destroy_zones(void)
          * destroy caches.
          */
         rcu_barrier();
+       kmem_zone_destroy(xfs_rui_zone);
+       kmem_zone_destroy(xfs_rud_zone);
         kmem_zone_destroy(xfs_icreate_zone);
         kmem_zone_destroy(xfs_ili_zone);
         kmem_zone_destroy(xfs_inode_zone);
@@ -1854,6 +1877,9 @@ init_xfs_fs(void)
         printk(KERN_INFO XFS_VERSION_STRING " with "
                          XFS_BUILD_OPTIONS " enabled\n");
  
+       xfs_extent_free_init_defer_op();
+       xfs_rmap_update_init_defer_op();
+
         xfs_dir_startup();
  
         error = xfs_init_zones();
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c

index 08a46c6181fdb698bf6b6deed28e21fa6c01ce7d..58142aeeeea69d2b191354911c0ebc07fd12f3fc 100644 (file)
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -26,6 +26,7 @@
  #include "xfs_mount.h"
  #include "xfs_da_format.h"
  #include "xfs_da_btree.h"
+#include "xfs_defer.h"
  #include "xfs_dir2.h"
  #include "xfs_inode.h"
  #include "xfs_ialloc.h"
@@ -172,7 +173,7 @@ xfs_symlink(
         struct xfs_inode        *ip = NULL;
         int                     error = 0;
         int                     pathlen;
-       struct xfs_bmap_free    free_list;
+       struct xfs_defer_ops    dfops;
         xfs_fsblock_t           first_block;
         bool                    unlock_dp_on_error = false;
         xfs_fileoff_t           first_fsb;
@@ -269,7 +270,7 @@ xfs_symlink(
          * Initialize the bmap freelist prior to calling either
          * bmapi or the directory create code.
          */
-       xfs_bmap_init(&free_list, &first_block);
+       xfs_defer_init(&dfops, &first_block);
  
         /*
          * Allocate an inode for the symlink.
@@ -313,7 +314,7 @@ xfs_symlink(
  
                 error = xfs_bmapi_write(tp, ip, first_fsb, fs_blocks,
                                   XFS_BMAPI_METADATA, &first_block, resblks,
-                                 mval, &nmaps, &free_list);
+                                 mval, &nmaps, &dfops);
                 if (error)
                         goto out_bmap_cancel;
  
@@ -361,7 +362,7 @@ xfs_symlink(
          * Create the directory entry for the symlink.
          */
         error = xfs_dir_createname(tp, dp, link_name, ip->i_ino,
-                                       &first_block, &free_list, resblks);
+                                       &first_block, &dfops, resblks);
         if (error)
                 goto out_bmap_cancel;
         xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -376,7 +377,7 @@ xfs_symlink(
                 xfs_trans_set_sync(tp);
         }
  
-       error = xfs_bmap_finish(&tp, &free_list, NULL);
+       error = xfs_defer_finish(&tp, &dfops, NULL);
         if (error)
                 goto out_bmap_cancel;
  
@@ -392,7 +393,7 @@ xfs_symlink(
         return 0;
  
  out_bmap_cancel:
-       xfs_bmap_cancel(&free_list);
+       xfs_defer_cancel(&dfops);
  out_trans_cancel:
         xfs_trans_cancel(tp);
  out_release_inode:
@@ -426,7 +427,7 @@ xfs_inactive_symlink_rmt(
         int             done;
         int             error;
         xfs_fsblock_t   first_block;
-       xfs_bmap_free_t free_list;
+       struct xfs_defer_ops    dfops;
         int             i;
         xfs_mount_t     *mp;
         xfs_bmbt_irec_t mval[XFS_SYMLINK_MAPS];
@@ -465,7 +466,7 @@ xfs_inactive_symlink_rmt(
          * Find the block(s) so we can inval and unmap them.
          */
         done = 0;
-       xfs_bmap_init(&free_list, &first_block);
+       xfs_defer_init(&dfops, &first_block);
         nmaps = ARRAY_SIZE(mval);
         error = xfs_bmapi_read(ip, 0, xfs_symlink_blocks(mp, size),
                                 mval, &nmaps, 0);
@@ -485,17 +486,17 @@ xfs_inactive_symlink_rmt(
                 xfs_trans_binval(tp, bp);
         }
         /*
-        * Unmap the dead block(s) to the free_list.
+        * Unmap the dead block(s) to the dfops.
          */
         error = xfs_bunmapi(tp, ip, 0, size, 0, nmaps,
-                           &first_block, &free_list, &done);
+                           &first_block, &dfops, &done);
         if (error)
                 goto error_bmap_cancel;
         ASSERT(done);
         /*
          * Commit the first transaction.  This logs the EFI and the inode.
          */
-       error = xfs_bmap_finish(&tp, &free_list, ip);
+       error = xfs_defer_finish(&tp, &dfops, ip);
         if (error)
                 goto error_bmap_cancel;
         /*
@@ -525,7 +526,7 @@ xfs_inactive_symlink_rmt(
         return 0;
  
  error_bmap_cancel:
-       xfs_bmap_cancel(&free_list);
+       xfs_defer_cancel(&dfops);
  error_trans_cancel:
         xfs_trans_cancel(tp);
  error_unlock:
diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c

index 13a029806805fe680a919923841c636dcc64934a..7f17ae6d709a1013f277ab608eb98f64298573f2 100644 (file)
--- a/fs/xfs/xfs_trace.c
+++ b/fs/xfs/xfs_trace.c
@@ -22,7 +22,9 @@
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
+#include "xfs_defer.h"
  #include "xfs_da_format.h"
+#include "xfs_defer.h"
  #include "xfs_inode.h"
  #include "xfs_btree.h"
  #include "xfs_da_btree.h"
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h

index 145169093fe0cfd8c04dba93bb701061f35bff63..551b7e26980c51886d4bd8edb19c45824fc45198 100644 (file)
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -38,6 +38,7 @@ struct xlog_recover_item;
  struct xfs_buf_log_format;
  struct xfs_inode_log_format;
  struct xfs_bmbt_irec;
+struct xfs_btree_cur;
  
  DECLARE_EVENT_CLASS(xfs_attr_list_class,
         TP_PROTO(struct xfs_attr_list_context *ctx),
@@ -2185,6 +2186,379 @@ DEFINE_DISCARD_EVENT(xfs_discard_toosmall);
  DEFINE_DISCARD_EVENT(xfs_discard_exclude);
  DEFINE_DISCARD_EVENT(xfs_discard_busy);
  
+/* btree cursor events */
+DECLARE_EVENT_CLASS(xfs_btree_cur_class,
+       TP_PROTO(struct xfs_btree_cur *cur, int level, struct xfs_buf *bp),
+       TP_ARGS(cur, level, bp),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_btnum_t, btnum)
+               __field(int, level)
+               __field(int, nlevels)
+               __field(int, ptr)
+               __field(xfs_daddr_t, daddr)
+       ),
+       TP_fast_assign(
+               __entry->dev = cur->bc_mp->m_super->s_dev;
+               __entry->btnum = cur->bc_btnum;
+               __entry->level = level;
+               __entry->nlevels = cur->bc_nlevels;
+               __entry->ptr = cur->bc_ptrs[level];
+               __entry->daddr = bp ? bp->b_bn : -1;
+       ),
+       TP_printk("dev %d:%d btnum %d level %d/%d ptr %d daddr 0x%llx",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->btnum,
+                 __entry->level,
+                 __entry->nlevels,
+                 __entry->ptr,
+                 (unsigned long long)__entry->daddr)
+)
+
+#define DEFINE_BTREE_CUR_EVENT(name) \
+DEFINE_EVENT(xfs_btree_cur_class, name, \
+       TP_PROTO(struct xfs_btree_cur *cur, int level, struct xfs_buf *bp), \
+       TP_ARGS(cur, level, bp))
+DEFINE_BTREE_CUR_EVENT(xfs_btree_updkeys);
+DEFINE_BTREE_CUR_EVENT(xfs_btree_overlapped_query_range);
+
+/* deferred ops */
+struct xfs_defer_pending;
+struct xfs_defer_intake;
+struct xfs_defer_ops;
+
+DECLARE_EVENT_CLASS(xfs_defer_class,
+       TP_PROTO(struct xfs_mount *mp, struct xfs_defer_ops *dop),
+       TP_ARGS(mp, dop),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(void *, dop)
+               __field(bool, committed)
+               __field(bool, low)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp ? mp->m_super->s_dev : 0;
+               __entry->dop = dop;
+               __entry->committed = dop->dop_committed;
+               __entry->low = dop->dop_low;
+       ),
+       TP_printk("dev %d:%d ops %p committed %d low %d\n",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->dop,
+                 __entry->committed,
+                 __entry->low)
+)
+#define DEFINE_DEFER_EVENT(name) \
+DEFINE_EVENT(xfs_defer_class, name, \
+       TP_PROTO(struct xfs_mount *mp, struct xfs_defer_ops *dop), \
+       TP_ARGS(mp, dop))
+
+DECLARE_EVENT_CLASS(xfs_defer_error_class,
+       TP_PROTO(struct xfs_mount *mp, struct xfs_defer_ops *dop, int error),
+       TP_ARGS(mp, dop, error),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(void *, dop)
+               __field(bool, committed)
+               __field(bool, low)
+               __field(int, error)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp ? mp->m_super->s_dev : 0;
+               __entry->dop = dop;
+               __entry->committed = dop->dop_committed;
+               __entry->low = dop->dop_low;
+               __entry->error = error;
+       ),
+       TP_printk("dev %d:%d ops %p committed %d low %d err %d\n",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->dop,
+                 __entry->committed,
+                 __entry->low,
+                 __entry->error)
+)
+#define DEFINE_DEFER_ERROR_EVENT(name) \
+DEFINE_EVENT(xfs_defer_error_class, name, \
+       TP_PROTO(struct xfs_mount *mp, struct xfs_defer_ops *dop, int error), \
+       TP_ARGS(mp, dop, error))
+
+DECLARE_EVENT_CLASS(xfs_defer_pending_class,
+       TP_PROTO(struct xfs_mount *mp, struct xfs_defer_pending *dfp),
+       TP_ARGS(mp, dfp),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(int, type)
+               __field(void *, intent)
+               __field(bool, committed)
+               __field(int, nr)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp ? mp->m_super->s_dev : 0;
+               __entry->type = dfp->dfp_type->type;
+               __entry->intent = dfp->dfp_intent;
+               __entry->committed = dfp->dfp_committed;
+               __entry->nr = dfp->dfp_count;
+       ),
+       TP_printk("dev %d:%d optype %d intent %p committed %d nr %d\n",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->type,
+                 __entry->intent,
+                 __entry->committed,
+                 __entry->nr)
+)
+#define DEFINE_DEFER_PENDING_EVENT(name) \
+DEFINE_EVENT(xfs_defer_pending_class, name, \
+       TP_PROTO(struct xfs_mount *mp, struct xfs_defer_pending *dfp), \
+       TP_ARGS(mp, dfp))
+
+DECLARE_EVENT_CLASS(xfs_phys_extent_deferred_class,
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+                int type, xfs_agblock_t agbno, xfs_extlen_t len),
+       TP_ARGS(mp, agno, type, agbno, len),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_agnumber_t, agno)
+               __field(int, type)
+               __field(xfs_agblock_t, agbno)
+               __field(xfs_extlen_t, len)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp->m_super->s_dev;
+               __entry->agno = agno;
+               __entry->type = type;
+               __entry->agbno = agbno;
+               __entry->len = len;
+       ),
+       TP_printk("dev %d:%d op %d agno %u agbno %u len %u",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->type,
+                 __entry->agno,
+                 __entry->agbno,
+                 __entry->len)
+);
+#define DEFINE_PHYS_EXTENT_DEFERRED_EVENT(name) \
+DEFINE_EVENT(xfs_phys_extent_deferred_class, name, \
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
+                int type, \
+                xfs_agblock_t bno, \
+                xfs_extlen_t len), \
+       TP_ARGS(mp, agno, type, bno, len))
+
+DECLARE_EVENT_CLASS(xfs_map_extent_deferred_class,
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+                int op,
+                xfs_agblock_t agbno,
+                xfs_ino_t ino,
+                int whichfork,
+                xfs_fileoff_t offset,
+                xfs_filblks_t len,
+                xfs_exntst_t state),
+       TP_ARGS(mp, agno, op, agbno, ino, whichfork, offset, len, state),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_agnumber_t, agno)
+               __field(xfs_ino_t, ino)
+               __field(xfs_agblock_t, agbno)
+               __field(int, whichfork)
+               __field(xfs_fileoff_t, l_loff)
+               __field(xfs_filblks_t, l_len)
+               __field(xfs_exntst_t, l_state)
+               __field(int, op)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp->m_super->s_dev;
+               __entry->agno = agno;
+               __entry->ino = ino;
+               __entry->agbno = agbno;
+               __entry->whichfork = whichfork;
+               __entry->l_loff = offset;
+               __entry->l_len = len;
+               __entry->l_state = state;
+               __entry->op = op;
+       ),
+       TP_printk("dev %d:%d op %d agno %u agbno %u owner %lld %s offset %llu len %llu state %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->op,
+                 __entry->agno,
+                 __entry->agbno,
+                 __entry->ino,
+                 __entry->whichfork == XFS_ATTR_FORK ? "attr" : "data",
+                 __entry->l_loff,
+                 __entry->l_len,
+                 __entry->l_state)
+);
+#define DEFINE_MAP_EXTENT_DEFERRED_EVENT(name) \
+DEFINE_EVENT(xfs_map_extent_deferred_class, name, \
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
+                int op, \
+                xfs_agblock_t agbno, \
+                xfs_ino_t ino, \
+                int whichfork, \
+                xfs_fileoff_t offset, \
+                xfs_filblks_t len, \
+                xfs_exntst_t state), \
+       TP_ARGS(mp, agno, op, agbno, ino, whichfork, offset, len, state))
+
+DEFINE_DEFER_EVENT(xfs_defer_init);
+DEFINE_DEFER_EVENT(xfs_defer_cancel);
+DEFINE_DEFER_EVENT(xfs_defer_trans_roll);
+DEFINE_DEFER_EVENT(xfs_defer_trans_abort);
+DEFINE_DEFER_EVENT(xfs_defer_finish);
+DEFINE_DEFER_EVENT(xfs_defer_finish_done);
+
+DEFINE_DEFER_ERROR_EVENT(xfs_defer_trans_roll_error);
+DEFINE_DEFER_ERROR_EVENT(xfs_defer_finish_error);
+DEFINE_DEFER_ERROR_EVENT(xfs_defer_op_finish_error);
+
+DEFINE_DEFER_PENDING_EVENT(xfs_defer_intake_work);
+DEFINE_DEFER_PENDING_EVENT(xfs_defer_intake_cancel);
+DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_commit);
+DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_cancel);
+DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_finish);
+DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_abort);
+
+#define DEFINE_BMAP_FREE_DEFERRED_EVENT DEFINE_PHYS_EXTENT_DEFERRED_EVENT
+DEFINE_BMAP_FREE_DEFERRED_EVENT(xfs_bmap_free_defer);
+DEFINE_BMAP_FREE_DEFERRED_EVENT(xfs_bmap_free_deferred);
+
+/* rmap tracepoints */
+DECLARE_EVENT_CLASS(xfs_rmap_class,
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+                xfs_agblock_t agbno, xfs_extlen_t len, bool unwritten,
+                struct xfs_owner_info *oinfo),
+       TP_ARGS(mp, agno, agbno, len, unwritten, oinfo),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_agnumber_t, agno)
+               __field(xfs_agblock_t, agbno)
+               __field(xfs_extlen_t, len)
+               __field(uint64_t, owner)
+               __field(uint64_t, offset)
+               __field(unsigned long, flags)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp->m_super->s_dev;
+               __entry->agno = agno;
+               __entry->agbno = agbno;
+               __entry->len = len;
+               __entry->owner = oinfo->oi_owner;
+               __entry->offset = oinfo->oi_offset;
+               __entry->flags = oinfo->oi_flags;
+               if (unwritten)
+                       __entry->flags |= XFS_RMAP_UNWRITTEN;
+       ),
+       TP_printk("dev %d:%d agno %u agbno %u len %u owner %lld offset %llu flags 0x%lx",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->agno,
+                 __entry->agbno,
+                 __entry->len,
+                 __entry->owner,
+                 __entry->offset,
+                 __entry->flags)
+);
+#define DEFINE_RMAP_EVENT(name) \
+DEFINE_EVENT(xfs_rmap_class, name, \
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
+                xfs_agblock_t agbno, xfs_extlen_t len, bool unwritten, \
+                struct xfs_owner_info *oinfo), \
+       TP_ARGS(mp, agno, agbno, len, unwritten, oinfo))
+
+/* simple AG-based error/%ip tracepoint class */
+DECLARE_EVENT_CLASS(xfs_ag_error_class,
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int error,
+                unsigned long caller_ip),
+       TP_ARGS(mp, agno, error, caller_ip),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_agnumber_t, agno)
+               __field(int, error)
+               __field(unsigned long, caller_ip)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp->m_super->s_dev;
+               __entry->agno = agno;
+               __entry->error = error;
+               __entry->caller_ip = caller_ip;
+       ),
+       TP_printk("dev %d:%d agno %u error %d caller %ps",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->agno,
+                 __entry->error,
+                 (char *)__entry->caller_ip)
+);
+
+#define DEFINE_AG_ERROR_EVENT(name) \
+DEFINE_EVENT(xfs_ag_error_class, name, \
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int error, \
+                unsigned long caller_ip), \
+       TP_ARGS(mp, agno, error, caller_ip))
+
+DEFINE_RMAP_EVENT(xfs_rmap_unmap);
+DEFINE_RMAP_EVENT(xfs_rmap_unmap_done);
+DEFINE_AG_ERROR_EVENT(xfs_rmap_unmap_error);
+DEFINE_RMAP_EVENT(xfs_rmap_map);
+DEFINE_RMAP_EVENT(xfs_rmap_map_done);
+DEFINE_AG_ERROR_EVENT(xfs_rmap_map_error);
+DEFINE_RMAP_EVENT(xfs_rmap_convert);
+DEFINE_RMAP_EVENT(xfs_rmap_convert_done);
+DEFINE_AG_ERROR_EVENT(xfs_rmap_convert_error);
+DEFINE_AG_ERROR_EVENT(xfs_rmap_convert_state);
+
+DECLARE_EVENT_CLASS(xfs_rmapbt_class,
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+                xfs_agblock_t agbno, xfs_extlen_t len,
+                uint64_t owner, uint64_t offset, unsigned int flags),
+       TP_ARGS(mp, agno, agbno, len, owner, offset, flags),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_agnumber_t, agno)
+               __field(xfs_agblock_t, agbno)
+               __field(xfs_extlen_t, len)
+               __field(uint64_t, owner)
+               __field(uint64_t, offset)
+               __field(unsigned int, flags)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp->m_super->s_dev;
+               __entry->agno = agno;
+               __entry->agbno = agbno;
+               __entry->len = len;
+               __entry->owner = owner;
+               __entry->offset = offset;
+               __entry->flags = flags;
+       ),
+       TP_printk("dev %d:%d agno %u agbno %u len %u owner %lld offset %llu flags 0x%x",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->agno,
+                 __entry->agbno,
+                 __entry->len,
+                 __entry->owner,
+                 __entry->offset,
+                 __entry->flags)
+);
+#define DEFINE_RMAPBT_EVENT(name) \
+DEFINE_EVENT(xfs_rmapbt_class, name, \
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
+                xfs_agblock_t agbno, xfs_extlen_t len, \
+                uint64_t owner, uint64_t offset, unsigned int flags), \
+       TP_ARGS(mp, agno, agbno, len, owner, offset, flags))
+
+#define DEFINE_RMAP_DEFERRED_EVENT DEFINE_MAP_EXTENT_DEFERRED_EVENT
+DEFINE_RMAP_DEFERRED_EVENT(xfs_rmap_defer);
+DEFINE_RMAP_DEFERRED_EVENT(xfs_rmap_deferred);
+
+DEFINE_BUSY_EVENT(xfs_rmapbt_alloc_block);
+DEFINE_BUSY_EVENT(xfs_rmapbt_free_block);
+DEFINE_RMAPBT_EVENT(xfs_rmap_update);
+DEFINE_RMAPBT_EVENT(xfs_rmap_insert);
+DEFINE_RMAPBT_EVENT(xfs_rmap_delete);
+DEFINE_AG_ERROR_EVENT(xfs_rmap_insert_error);
+DEFINE_AG_ERROR_EVENT(xfs_rmap_delete_error);
+DEFINE_AG_ERROR_EVENT(xfs_rmap_update_error);
+DEFINE_RMAPBT_EVENT(xfs_rmap_lookup_le_range_result);
+DEFINE_RMAPBT_EVENT(xfs_rmap_find_right_neighbor_result);
+DEFINE_RMAPBT_EVENT(xfs_rmap_find_left_neighbor_result);
+
  #endif /* _TRACE_XFS_H */
  
  #undef TRACE_INCLUDE_PATH
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h

index 9b2b9fa89331c10d57818218ccd8fc985a3325f6..e2bf86aad33dfaef40dd877ef12e32a7715cbabc 100644 (file)
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -33,6 +33,9 @@ struct xfs_trans;
  struct xfs_trans_res;
  struct xfs_dquot_acct;
  struct xfs_busy_extent;
+struct xfs_rud_log_item;
+struct xfs_rui_log_item;
+struct xfs_btree_cur;
  
  typedef struct xfs_log_item {
         struct list_head                li_ail;         /* AIL pointers */
@@ -210,17 +213,14 @@ void              xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int);
  void           xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *, uint);
  void           xfs_trans_log_buf(xfs_trans_t *, struct xfs_buf *, uint, uint);
  void           xfs_trans_log_inode(xfs_trans_t *, struct xfs_inode *, uint);
-struct xfs_efi_log_item        *xfs_trans_get_efi(xfs_trans_t *, uint);
-void           xfs_trans_log_efi_extent(xfs_trans_t *,
-                                        struct xfs_efi_log_item *,
-                                        xfs_fsblock_t,
-                                        xfs_extlen_t);
-struct xfs_efd_log_item        *xfs_trans_get_efd(xfs_trans_t *,
+
+void           xfs_extent_free_init_defer_op(void);
+struct xfs_efd_log_item        *xfs_trans_get_efd(struct xfs_trans *,
                                   struct xfs_efi_log_item *,
                                   uint);
  int            xfs_trans_free_extent(struct xfs_trans *,
                                       struct xfs_efd_log_item *, xfs_fsblock_t,
-                                     xfs_extlen_t);
+                                     xfs_extlen_t, struct xfs_owner_info *);
  int            xfs_trans_commit(struct xfs_trans *);
  int            __xfs_trans_roll(struct xfs_trans **, struct xfs_inode *, int *);
  int            xfs_trans_roll(struct xfs_trans **, struct xfs_inode *);
@@ -236,4 +236,16 @@ void               xfs_trans_buf_copy_type(struct xfs_buf *dst_bp,
  extern kmem_zone_t     *xfs_trans_zone;
  extern kmem_zone_t     *xfs_log_item_desc_zone;
  
+/* rmap updates */
+enum xfs_rmap_intent_type;
+
+void xfs_rmap_update_init_defer_op(void);
+struct xfs_rud_log_item *xfs_trans_get_rud(struct xfs_trans *tp,
+               struct xfs_rui_log_item *ruip);
+int xfs_trans_log_finish_rmap_update(struct xfs_trans *tp,
+               struct xfs_rud_log_item *rudp, enum xfs_rmap_intent_type type,
+               __uint64_t owner, int whichfork, xfs_fileoff_t startoff,
+               xfs_fsblock_t startblock, xfs_filblks_t blockcount,
+               xfs_exntst_t state, struct xfs_btree_cur **pcur);
+
  #endif /* __XFS_TRANS_H__ */
diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c

index a96ae540eb629c86e15c004dc66eb60fbb6be90e..459ddec137a48a2aec19d57a739e0cdbef44ae70 100644 (file)
--- a/fs/xfs/xfs_trans_extfree.c
+++ b/fs/xfs/xfs_trans_extfree.c
@@ -21,66 +21,15 @@
  #include "xfs_format.h"
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
+#include "xfs_bit.h"
  #include "xfs_mount.h"
+#include "xfs_defer.h"
  #include "xfs_trans.h"
  #include "xfs_trans_priv.h"
  #include "xfs_extfree_item.h"
  #include "xfs_alloc.h"
-
-/*
- * This routine is called to allocate an "extent free intention"
- * log item that will hold nextents worth of extents.  The
- * caller must use all nextents extents, because we are not
- * flexible about this at all.
- */
-xfs_efi_log_item_t *
-xfs_trans_get_efi(xfs_trans_t  *tp,
-                 uint          nextents)
-{
-       xfs_efi_log_item_t      *efip;
-
-       ASSERT(tp != NULL);
-       ASSERT(nextents > 0);
-
-       efip = xfs_efi_init(tp->t_mountp, nextents);
-       ASSERT(efip != NULL);
-
-       /*
-        * Get a log_item_desc to point at the new item.
-        */
-       xfs_trans_add_item(tp, &efip->efi_item);
-       return efip;
-}
-
-/*
- * This routine is called to indicate that the described
- * extent is to be logged as needing to be freed.  It should
- * be called once for each extent to be freed.
- */
-void
-xfs_trans_log_efi_extent(xfs_trans_t           *tp,
-                        xfs_efi_log_item_t     *efip,
-                        xfs_fsblock_t          start_block,
-                        xfs_extlen_t           ext_len)
-{
-       uint                    next_extent;
-       xfs_extent_t            *extp;
-
-       tp->t_flags |= XFS_TRANS_DIRTY;
-       efip->efi_item.li_desc->lid_flags |= XFS_LID_DIRTY;
-
-       /*
-        * atomic_inc_return gives us the value after the increment;
-        * we want to use it as an array index so we need to subtract 1 from
-        * it.
-        */
-       next_extent = atomic_inc_return(&efip->efi_next_extent) - 1;
-       ASSERT(next_extent < efip->efi_format.efi_nextents);
-       extp = &(efip->efi_format.efi_extents[next_extent]);
-       extp->ext_start = start_block;
-       extp->ext_len = ext_len;
-}
-
+#include "xfs_bmap.h"
+#include "xfs_trace.h"
  
  /*
   * This routine is called to allocate an "extent free done"
@@ -88,12 +37,12 @@ xfs_trans_log_efi_extent(xfs_trans_t                *tp,
   * caller must use all nextents extents, because we are not
   * flexible about this at all.
   */
-xfs_efd_log_item_t *
-xfs_trans_get_efd(xfs_trans_t          *tp,
-                 xfs_efi_log_item_t    *efip,
-                 uint                  nextents)
+struct xfs_efd_log_item *
+xfs_trans_get_efd(struct xfs_trans             *tp,
+                 struct xfs_efi_log_item       *efip,
+                 uint                          nextents)
  {
-       xfs_efd_log_item_t      *efdp;
+       struct xfs_efd_log_item                 *efdp;
  
         ASSERT(tp != NULL);
         ASSERT(nextents > 0);
@@ -118,13 +67,19 @@ xfs_trans_free_extent(
         struct xfs_trans        *tp,
         struct xfs_efd_log_item *efdp,
         xfs_fsblock_t           start_block,
-       xfs_extlen_t            ext_len)
+       xfs_extlen_t            ext_len,
+       struct xfs_owner_info   *oinfo)
  {
+       struct xfs_mount        *mp = tp->t_mountp;
         uint                    next_extent;
+       xfs_agnumber_t          agno = XFS_FSB_TO_AGNO(mp, start_block);
+       xfs_agblock_t           agbno = XFS_FSB_TO_AGBNO(mp, start_block);
         struct xfs_extent       *extp;
         int                     error;
  
-       error = xfs_free_extent(tp, start_block, ext_len);
+       trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, ext_len);
+
+       error = xfs_free_extent(tp, start_block, ext_len, oinfo);
  
         /*
          * Mark the transaction dirty, even on error. This ensures the
@@ -145,3 +100,139 @@ xfs_trans_free_extent(
  
         return error;
  }
+
+/* Sort bmap items by AG. */
+static int
+xfs_extent_free_diff_items(
+       void                            *priv,
+       struct list_head                *a,
+       struct list_head                *b)
+{
+       struct xfs_mount                *mp = priv;
+       struct xfs_extent_free_item     *ra;
+       struct xfs_extent_free_item     *rb;
+
+       ra = container_of(a, struct xfs_extent_free_item, xefi_list);
+       rb = container_of(b, struct xfs_extent_free_item, xefi_list);
+       return  XFS_FSB_TO_AGNO(mp, ra->xefi_startblock) -
+               XFS_FSB_TO_AGNO(mp, rb->xefi_startblock);
+}
+
+/* Get an EFI. */
+STATIC void *
+xfs_extent_free_create_intent(
+       struct xfs_trans                *tp,
+       unsigned int                    count)
+{
+       struct xfs_efi_log_item         *efip;
+
+       ASSERT(tp != NULL);
+       ASSERT(count > 0);
+
+       efip = xfs_efi_init(tp->t_mountp, count);
+       ASSERT(efip != NULL);
+
+       /*
+        * Get a log_item_desc to point at the new item.
+        */
+       xfs_trans_add_item(tp, &efip->efi_item);
+       return efip;
+}
+
+/* Log a free extent to the intent item. */
+STATIC void
+xfs_extent_free_log_item(
+       struct xfs_trans                *tp,
+       void                            *intent,
+       struct list_head                *item)
+{
+       struct xfs_efi_log_item         *efip = intent;
+       struct xfs_extent_free_item     *free;
+       uint                            next_extent;
+       struct xfs_extent               *extp;
+
+       free = container_of(item, struct xfs_extent_free_item, xefi_list);
+
+       tp->t_flags |= XFS_TRANS_DIRTY;
+       efip->efi_item.li_desc->lid_flags |= XFS_LID_DIRTY;
+
+       /*
+        * atomic_inc_return gives us the value after the increment;
+        * we want to use it as an array index so we need to subtract 1 from
+        * it.
+        */
+       next_extent = atomic_inc_return(&efip->efi_next_extent) - 1;
+       ASSERT(next_extent < efip->efi_format.efi_nextents);
+       extp = &efip->efi_format.efi_extents[next_extent];
+       extp->ext_start = free->xefi_startblock;
+       extp->ext_len = free->xefi_blockcount;
+}
+
+/* Get an EFD so we can process all the free extents. */
+STATIC void *
+xfs_extent_free_create_done(
+       struct xfs_trans                *tp,
+       void                            *intent,
+       unsigned int                    count)
+{
+       return xfs_trans_get_efd(tp, intent, count);
+}
+
+/* Process a free extent. */
+STATIC int
+xfs_extent_free_finish_item(
+       struct xfs_trans                *tp,
+       struct xfs_defer_ops            *dop,
+       struct list_head                *item,
+       void                            *done_item,
+       void                            **state)
+{
+       struct xfs_extent_free_item     *free;
+       int                             error;
+
+       free = container_of(item, struct xfs_extent_free_item, xefi_list);
+       error = xfs_trans_free_extent(tp, done_item,
+                       free->xefi_startblock,
+                       free->xefi_blockcount,
+                       &free->xefi_oinfo);
+       kmem_free(free);
+       return error;
+}
+
+/* Abort all pending EFIs. */
+STATIC void
+xfs_extent_free_abort_intent(
+       void                            *intent)
+{
+       xfs_efi_release(intent);
+}
+
+/* Cancel a free extent. */
+STATIC void
+xfs_extent_free_cancel_item(
+       struct list_head                *item)
+{
+       struct xfs_extent_free_item     *free;
+
+       free = container_of(item, struct xfs_extent_free_item, xefi_list);
+       kmem_free(free);
+}
+
+static const struct xfs_defer_op_type xfs_extent_free_defer_type = {
+       .type           = XFS_DEFER_OPS_TYPE_FREE,
+       .max_items      = XFS_EFI_MAX_FAST_EXTENTS,
+       .diff_items     = xfs_extent_free_diff_items,
+       .create_intent  = xfs_extent_free_create_intent,
+       .abort_intent   = xfs_extent_free_abort_intent,
+       .log_item       = xfs_extent_free_log_item,
+       .create_done    = xfs_extent_free_create_done,
+       .finish_item    = xfs_extent_free_finish_item,
+       .cancel_item    = xfs_extent_free_cancel_item,
+};
+
+/* Register the deferred op type. */
+void
+xfs_extent_free_init_defer_op(void)
+{
+       xfs_defer_init_op_type(&xfs_extent_free_defer_type);
+}
diff --git a/fs/xfs/xfs_trans_rmap.c b/fs/xfs/xfs_trans_rmap.c

new file mode 100644 (file)

index 0000000..5a50ef8
--- /dev/null
+++ b/fs/xfs/xfs_trans_rmap.c
@@ -0,0 +1,271 @@
+/*
+ * Copyright (C) 2016 Oracle.  All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_trans.h"
+#include "xfs_trans_priv.h"
+#include "xfs_rmap_item.h"
+#include "xfs_alloc.h"
+#include "xfs_rmap.h"
+
+/* Set the map extent flags for this reverse mapping. */
+static void
+xfs_trans_set_rmap_flags(
+       struct xfs_map_extent           *rmap,
+       enum xfs_rmap_intent_type       type,
+       int                             whichfork,
+       xfs_exntst_t                    state)
+{
+       rmap->me_flags = 0;
+       if (state == XFS_EXT_UNWRITTEN)
+               rmap->me_flags |= XFS_RMAP_EXTENT_UNWRITTEN;
+       if (whichfork == XFS_ATTR_FORK)
+               rmap->me_flags |= XFS_RMAP_EXTENT_ATTR_FORK;
+       switch (type) {
+       case XFS_RMAP_MAP:
+               rmap->me_flags |= XFS_RMAP_EXTENT_MAP;
+               break;
+       case XFS_RMAP_UNMAP:
+               rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP;
+               break;
+       case XFS_RMAP_CONVERT:
+               rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT;
+               break;
+       case XFS_RMAP_ALLOC:
+               rmap->me_flags |= XFS_RMAP_EXTENT_ALLOC;
+               break;
+       case XFS_RMAP_FREE:
+               rmap->me_flags |= XFS_RMAP_EXTENT_FREE;
+               break;
+       default:
+               ASSERT(0);
+       }
+}
+
+struct xfs_rud_log_item *
+xfs_trans_get_rud(
+       struct xfs_trans                *tp,
+       struct xfs_rui_log_item         *ruip)
+{
+       struct xfs_rud_log_item         *rudp;
+
+       rudp = xfs_rud_init(tp->t_mountp, ruip);
+       xfs_trans_add_item(tp, &rudp->rud_item);
+       return rudp;
+}
+
+/*
+ * Finish an rmap update and log it to the RUD. Note that the transaction is
+ * marked dirty regardless of whether the rmap update succeeds or fails to
+ * support the RUI/RUD lifecycle rules.
+ */
+int
+xfs_trans_log_finish_rmap_update(
+       struct xfs_trans                *tp,
+       struct xfs_rud_log_item         *rudp,
+       enum xfs_rmap_intent_type       type,
+       __uint64_t                      owner,
+       int                             whichfork,
+       xfs_fileoff_t                   startoff,
+       xfs_fsblock_t                   startblock,
+       xfs_filblks_t                   blockcount,
+       xfs_exntst_t                    state,
+       struct xfs_btree_cur            **pcur)
+{
+       int                             error;
+
+       error = xfs_rmap_finish_one(tp, type, owner, whichfork, startoff,
+                       startblock, blockcount, state, pcur);
+
+       /*
+        * Mark the transaction dirty, even on error. This ensures the
+        * transaction is aborted, which:
+        *
+        * 1.) releases the RUI and frees the RUD
+        * 2.) shuts down the filesystem
+        */
+       tp->t_flags |= XFS_TRANS_DIRTY;
+       rudp->rud_item.li_desc->lid_flags |= XFS_LID_DIRTY;
+
+       return error;
+}
+
+/* Sort rmap intents by AG. */
+static int
+xfs_rmap_update_diff_items(
+       void                            *priv,
+       struct list_head                *a,
+       struct list_head                *b)
+{
+       struct xfs_mount                *mp = priv;
+       struct xfs_rmap_intent          *ra;
+       struct xfs_rmap_intent          *rb;
+
+       ra = container_of(a, struct xfs_rmap_intent, ri_list);
+       rb = container_of(b, struct xfs_rmap_intent, ri_list);
+       return  XFS_FSB_TO_AGNO(mp, ra->ri_bmap.br_startblock) -
+               XFS_FSB_TO_AGNO(mp, rb->ri_bmap.br_startblock);
+}
+
+/* Get an RUI. */
+STATIC void *
+xfs_rmap_update_create_intent(
+       struct xfs_trans                *tp,
+       unsigned int                    count)
+{
+       struct xfs_rui_log_item         *ruip;
+
+       ASSERT(tp != NULL);
+       ASSERT(count > 0);
+
+       ruip = xfs_rui_init(tp->t_mountp, count);
+       ASSERT(ruip != NULL);
+
+       /*
+        * Get a log_item_desc to point at the new item.
+        */
+       xfs_trans_add_item(tp, &ruip->rui_item);
+       return ruip;
+}
+
+/* Log rmap updates in the intent item. */
+STATIC void
+xfs_rmap_update_log_item(
+       struct xfs_trans                *tp,
+       void                            *intent,
+       struct list_head                *item)
+{
+       struct xfs_rui_log_item         *ruip = intent;
+       struct xfs_rmap_intent          *rmap;
+       uint                            next_extent;
+       struct xfs_map_extent           *map;
+
+       rmap = container_of(item, struct xfs_rmap_intent, ri_list);
+
+       tp->t_flags |= XFS_TRANS_DIRTY;
+       ruip->rui_item.li_desc->lid_flags |= XFS_LID_DIRTY;
+
+       /*
+        * atomic_inc_return gives us the value after the increment;
+        * we want to use it as an array index so we need to subtract 1 from
+        * it.
+        */
+       next_extent = atomic_inc_return(&ruip->rui_next_extent) - 1;
+       ASSERT(next_extent < ruip->rui_format.rui_nextents);
+       map = &ruip->rui_format.rui_extents[next_extent];
+       map->me_owner = rmap->ri_owner;
+       map->me_startblock = rmap->ri_bmap.br_startblock;
+       map->me_startoff = rmap->ri_bmap.br_startoff;
+       map->me_len = rmap->ri_bmap.br_blockcount;
+       xfs_trans_set_rmap_flags(map, rmap->ri_type, rmap->ri_whichfork,
+                       rmap->ri_bmap.br_state);
+}
+
+/* Get an RUD so we can process all the deferred rmap updates. */
+STATIC void *
+xfs_rmap_update_create_done(
+       struct xfs_trans                *tp,
+       void                            *intent,
+       unsigned int                    count)
+{
+       return xfs_trans_get_rud(tp, intent);
+}
+
+/* Process a deferred rmap update. */
+STATIC int
+xfs_rmap_update_finish_item(
+       struct xfs_trans                *tp,
+       struct xfs_defer_ops            *dop,
+       struct list_head                *item,
+       void                            *done_item,
+       void                            **state)
+{
+       struct xfs_rmap_intent          *rmap;
+       int                             error;
+
+       rmap = container_of(item, struct xfs_rmap_intent, ri_list);
+       error = xfs_trans_log_finish_rmap_update(tp, done_item,
+                       rmap->ri_type,
+                       rmap->ri_owner, rmap->ri_whichfork,
+                       rmap->ri_bmap.br_startoff,
+                       rmap->ri_bmap.br_startblock,
+                       rmap->ri_bmap.br_blockcount,
+                       rmap->ri_bmap.br_state,
+                       (struct xfs_btree_cur **)state);
+       kmem_free(rmap);
+       return error;
+}
+
+/* Clean up after processing deferred rmaps. */
+STATIC void
+xfs_rmap_update_finish_cleanup(
+       struct xfs_trans        *tp,
+       void                    *state,
+       int                     error)
+{
+       struct xfs_btree_cur    *rcur = state;
+
+       xfs_rmap_finish_one_cleanup(tp, rcur, error);
+}
+
+/* Abort all pending RUIs. */
+STATIC void
+xfs_rmap_update_abort_intent(
+       void                            *intent)
+{
+       xfs_rui_release(intent);
+}
+
+/* Cancel a deferred rmap update. */
+STATIC void
+xfs_rmap_update_cancel_item(
+       struct list_head                *item)
+{
+       struct xfs_rmap_intent          *rmap;
+
+       rmap = container_of(item, struct xfs_rmap_intent, ri_list);
+       kmem_free(rmap);
+}
+
+static const struct xfs_defer_op_type xfs_rmap_update_defer_type = {
+       .type           = XFS_DEFER_OPS_TYPE_RMAP,
+       .max_items      = XFS_RUI_MAX_FAST_EXTENTS,
+       .diff_items     = xfs_rmap_update_diff_items,
+       .create_intent  = xfs_rmap_update_create_intent,
+       .abort_intent   = xfs_rmap_update_abort_intent,
+       .log_item       = xfs_rmap_update_log_item,
+       .create_done    = xfs_rmap_update_create_done,
+       .finish_item    = xfs_rmap_update_finish_item,
+       .finish_cleanup = xfs_rmap_update_finish_cleanup,
+       .cancel_item    = xfs_rmap_update_cancel_item,
+};
+
+/* Register the deferred op type. */
+void
+xfs_rmap_update_init_defer_op(void)
+{
+       xfs_defer_init_op_type(&xfs_rmap_update_defer_type);
+}
author	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 6 Aug 2016 13:50:36 +0000 (09:50 -0400)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 6 Aug 2016 13:50:36 +0000 (09:50 -0400)
fs/xfs/Makefile		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_alloc.c		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_alloc.h		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_alloc_btree.c		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_attr.c		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_attr_leaf.c		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_attr_remote.c		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_bmap.c		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_bmap.h		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_bmap_btree.c		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_btree.c		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_btree.h		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_da_btree.c		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_da_btree.h		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_da_format.h		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_defer.c	[new file with mode: 0644]	patch \| blob
fs/xfs/libxfs/xfs_defer.h	[new file with mode: 0644]	patch \| blob
fs/xfs/libxfs/xfs_dir2.c		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_dir2.h		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_format.h		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_fs.h		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_ialloc.c		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_ialloc.h		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_ialloc_btree.c		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_inode_buf.c		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_log_format.h		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_rmap.c	[new file with mode: 0644]	patch \| blob
fs/xfs/libxfs/xfs_rmap.h	[new file with mode: 0644]	patch \| blob
fs/xfs/libxfs/xfs_rmap_btree.c	[new file with mode: 0644]	patch \| blob
fs/xfs/libxfs/xfs_rmap_btree.h	[new file with mode: 0644]	patch \| blob
fs/xfs/libxfs/xfs_sb.c		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_shared.h		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_trans_resv.c		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_trans_resv.h		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_types.h		patch \| blob \| blame \| history
fs/xfs/xfs_bmap_util.c		patch \| blob \| blame \| history
fs/xfs/xfs_bmap_util.h		patch \| blob \| blame \| history
fs/xfs/xfs_discard.c		patch \| blob \| blame \| history
fs/xfs/xfs_dquot.c		patch \| blob \| blame \| history
fs/xfs/xfs_error.h		patch \| blob \| blame \| history
fs/xfs/xfs_extfree_item.c		patch \| blob \| blame \| history
fs/xfs/xfs_extfree_item.h		patch \| blob \| blame \| history
fs/xfs/xfs_filestream.c		patch \| blob \| blame \| history
fs/xfs/xfs_fsops.c		patch \| blob \| blame \| history
fs/xfs/xfs_inode.c		patch \| blob \| blame \| history
fs/xfs/xfs_inode.h		patch \| blob \| blame \| history
fs/xfs/xfs_ioctl.c		patch \| blob \| blame \| history
fs/xfs/xfs_iomap.c		patch \| blob \| blame \| history
fs/xfs/xfs_log_recover.c		patch \| blob \| blame \| history
fs/xfs/xfs_mount.c		patch \| blob \| blame \| history
fs/xfs/xfs_mount.h		patch \| blob \| blame \| history
fs/xfs/xfs_ondisk.h		patch \| blob \| blame \| history
fs/xfs/xfs_rmap_item.c	[new file with mode: 0644]	patch \| blob
fs/xfs/xfs_rmap_item.h	[new file with mode: 0644]	patch \| blob
fs/xfs/xfs_rtalloc.c		patch \| blob \| blame \| history
fs/xfs/xfs_stats.c		patch \| blob \| blame \| history
fs/xfs/xfs_stats.h		patch \| blob \| blame \| history
fs/xfs/xfs_super.c		patch \| blob \| blame \| history
fs/xfs/xfs_symlink.c		patch \| blob \| blame \| history
fs/xfs/xfs_trace.c		patch \| blob \| blame \| history
fs/xfs/xfs_trace.h		patch \| blob \| blame \| history
fs/xfs/xfs_trans.h		patch \| blob \| blame \| history
fs/xfs/xfs_trans_extfree.c		patch \| blob \| blame \| history
fs/xfs/xfs_trans_rmap.c	[new file with mode: 0644]	patch \| blob