fs/xfs/xfs_trans_buf.c

   1 /*
   2  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
   3  * All Rights Reserved.
   4  *
   5  * This program is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU General Public License as
   7  * published by the Free Software Foundation.
   8  *
   9  * This program is distributed in the hope that it would be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * GNU General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public License
  15  * along with this program; if not, write the Free Software Foundation,
  16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  17  */
  18 #include "xfs.h"
  19 #include "xfs_fs.h"
  20 #include "xfs_shared.h"
  21 #include "xfs_log_format.h"
  22 #include "xfs_trans_resv.h"
  23 #include "xfs_sb.h"
  24 #include "xfs_ag.h"
  25 #include "xfs_mount.h"
  26 #include "xfs_bmap_btree.h"
  27 #include "xfs_alloc_btree.h"
  28 #include "xfs_ialloc_btree.h"
  29 #include "xfs_dinode.h"
  30 #include "xfs_inode.h"
  31 #include "xfs_trans.h"
  32 #include "xfs_buf_item.h"
  33 #include "xfs_trans_priv.h"
  34 #include "xfs_error.h"
  35 #include "xfs_trace.h"
  36
  37 /*
  38  * Check to see if a buffer matching the given parameters is already
  39  * a part of the given transaction.
  40  */
  41 STATIC struct xfs_buf *
  42 xfs_trans_buf_item_match(
  43         struct xfs_trans        *tp,
  44         struct xfs_buftarg      *target,
  45         struct xfs_buf_map      *map,
  46         int                     nmaps)
  47 {
  48         struct xfs_log_item_desc *lidp;
  49         struct xfs_buf_log_item *blip;
  50         int                     len = 0;
  51         int                     i;
  52
  53         for (i = 0; i < nmaps; i++)
  54                 len += map[i].bm_len;
  55
  56         list_for_each_entry(lidp, &tp->t_items, lid_trans) {
  57                 blip = (struct xfs_buf_log_item *)lidp->lid_item;
  58                 if (blip->bli_item.li_type == XFS_LI_BUF &&
  59                     blip->bli_buf->b_target == target &&
  60                     XFS_BUF_ADDR(blip->bli_buf) == map[0].bm_bn &&
  61                     blip->bli_buf->b_length == len) {
  62                         ASSERT(blip->bli_buf->b_map_count == nmaps);
  63                         return blip->bli_buf;
  64                 }
  65         }
  66
  67         return NULL;
  68 }
  69
  70 /*
  71  * Add the locked buffer to the transaction.
  72  *
  73  * The buffer must be locked, and it cannot be associated with any
  74  * transaction.
  75  *
  76  * If the buffer does not yet have a buf log item associated with it,
  77  * then allocate one for it.  Then add the buf item to the transaction.
  78  */
  79 STATIC void
  80 _xfs_trans_bjoin(
  81         struct xfs_trans        *tp,
  82         struct xfs_buf          *bp,
  83         int                     reset_recur)
  84 {
  85         struct xfs_buf_log_item *bip;
  86
  87         ASSERT(bp->b_transp == NULL);
  88
  89         /*
  90          * The xfs_buf_log_item pointer is stored in b_fsprivate.  If
  91          * it doesn't have one yet, then allocate one and initialize it.
  92          * The checks to see if one is there are in xfs_buf_item_init().
  93          */
  94         xfs_buf_item_init(bp, tp->t_mountp);
  95         bip = bp->b_fspriv;
  96         ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
  97         ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL));
  98         ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
  99         if (reset_recur)
 100                 bip->bli_recur = 0;
 101
 102         /*
 103          * Take a reference for this transaction on the buf item.
 104          */
 105         atomic_inc(&bip->bli_refcount);
 106
 107         /*
 108          * Get a log_item_desc to point at the new item.
 109          */
 110         xfs_trans_add_item(tp, &bip->bli_item);
 111
 112         /*
 113          * Initialize b_fsprivate2 so we can find it with incore_match()
 114          * in xfs_trans_get_buf() and friends above.
 115          */
 116         bp->b_transp = tp;
 117
 118 }
 119
 120 void
 121 xfs_trans_bjoin(
 122         struct xfs_trans        *tp,
 123         struct xfs_buf          *bp)
 124 {
 125         _xfs_trans_bjoin(tp, bp, 0);
 126         trace_xfs_trans_bjoin(bp->b_fspriv);
 127 }
 128
 129 /*
 130  * Get and lock the buffer for the caller if it is not already
 131  * locked within the given transaction.  If it is already locked
 132  * within the transaction, just increment its lock recursion count
 133  * and return a pointer to it.
 134  *
 135  * If the transaction pointer is NULL, make this just a normal
 136  * get_buf() call.
 137  */
 138 struct xfs_buf *
 139 xfs_trans_get_buf_map(
 140         struct xfs_trans        *tp,
 141         struct xfs_buftarg      *target,
 142         struct xfs_buf_map      *map,
 143         int                     nmaps,
 144         xfs_buf_flags_t         flags)
 145 {
 146         xfs_buf_t               *bp;
 147         xfs_buf_log_item_t      *bip;
 148
 149         if (!tp)
 150                 return xfs_buf_get_map(target, map, nmaps, flags);
 151
 152         /*
 153          * If we find the buffer in the cache with this transaction
 154          * pointer in its b_fsprivate2 field, then we know we already
 155          * have it locked.  In this case we just increment the lock
 156          * recursion count and return the buffer to the caller.
 157          */
 158         bp = xfs_trans_buf_item_match(tp, target, map, nmaps);
 159         if (bp != NULL) {
 160                 ASSERT(xfs_buf_islocked(bp));
 161                 if (XFS_FORCED_SHUTDOWN(tp->t_mountp)) {
 162                         xfs_buf_stale(bp);
 163                         XFS_BUF_DONE(bp);
 164                 }
 165
 166                 ASSERT(bp->b_transp == tp);
 167                 bip = bp->b_fspriv;
 168                 ASSERT(bip != NULL);
 169                 ASSERT(atomic_read(&bip->bli_refcount) > 0);
 170                 bip->bli_recur++;
 171                 trace_xfs_trans_get_buf_recur(bip);
 172                 return (bp);
 173         }
 174
 175         bp = xfs_buf_get_map(target, map, nmaps, flags);
 176         if (bp == NULL) {
 177                 return NULL;
 178         }
 179
 180         ASSERT(!bp->b_error);
 181
 182         _xfs_trans_bjoin(tp, bp, 1);
 183         trace_xfs_trans_get_buf(bp->b_fspriv);
 184         return (bp);
 185 }
 186
 187 /*
 188  * Get and lock the superblock buffer of this file system for the
 189  * given transaction.
 190  *
 191  * We don't need to use incore_match() here, because the superblock
 192  * buffer is a private buffer which we keep a pointer to in the
 193  * mount structure.
 194  */
 195 xfs_buf_t *
 196 xfs_trans_getsb(xfs_trans_t     *tp,
 197                 struct xfs_mount *mp,
 198                 int             flags)
 199 {
 200         xfs_buf_t               *bp;
 201         xfs_buf_log_item_t      *bip;
 202
 203         /*
 204          * Default to just trying to lock the superblock buffer
 205          * if tp is NULL.
 206          */
 207         if (tp == NULL) {
 208                 return (xfs_getsb(mp, flags));
 209         }
 210
 211         /*
 212          * If the superblock buffer already has this transaction
 213          * pointer in its b_fsprivate2 field, then we know we already
 214          * have it locked.  In this case we just increment the lock
 215          * recursion count and return the buffer to the caller.
 216          */
 217         bp = mp->m_sb_bp;
 218         if (bp->b_transp == tp) {
 219                 bip = bp->b_fspriv;
 220                 ASSERT(bip != NULL);
 221                 ASSERT(atomic_read(&bip->bli_refcount) > 0);
 222                 bip->bli_recur++;
 223                 trace_xfs_trans_getsb_recur(bip);
 224                 return (bp);
 225         }
 226
 227         bp = xfs_getsb(mp, flags);
 228         if (bp == NULL)
 229                 return NULL;
 230
 231         _xfs_trans_bjoin(tp, bp, 1);
 232         trace_xfs_trans_getsb(bp->b_fspriv);
 233         return (bp);
 234 }
 235
 236 #ifdef DEBUG
 237 xfs_buftarg_t *xfs_error_target;
 238 int     xfs_do_error;
 239 int     xfs_req_num;
 240 int     xfs_error_mod = 33;
 241 #endif
 242
 243 /*
 244  * Get and lock the buffer for the caller if it is not already
 245  * locked within the given transaction.  If it has not yet been
 246  * read in, read it from disk. If it is already locked
 247  * within the transaction and already read in, just increment its
 248  * lock recursion count and return a pointer to it.
 249  *
 250  * If the transaction pointer is NULL, make this just a normal
 251  * read_buf() call.
 252  */
 253 int
 254 xfs_trans_read_buf_map(
 255         struct xfs_mount        *mp,
 256         struct xfs_trans        *tp,
 257         struct xfs_buftarg      *target,
 258         struct xfs_buf_map      *map,
 259         int                     nmaps,
 260         xfs_buf_flags_t         flags,
 261         struct xfs_buf          **bpp,
 262         const struct xfs_buf_ops *ops)
 263 {
 264         xfs_buf_t               *bp;
 265         xfs_buf_log_item_t      *bip;
 266         int                     error;
 267
 268         *bpp = NULL;
 269         if (!tp) {
 270                 bp = xfs_buf_read_map(target, map, nmaps, flags, ops);
 271                 if (!bp)
 272                         return (flags & XBF_TRYLOCK) ?
 273                                         EAGAIN : XFS_ERROR(ENOMEM);
 274
 275                 if (bp->b_error) {
 276                         error = bp->b_error;
 277                         xfs_buf_ioerror_alert(bp, __func__);
 278                         XFS_BUF_UNDONE(bp);
 279                         xfs_buf_stale(bp);
 280                         xfs_buf_relse(bp);
 281                         return error;
 282                 }
 283 #ifdef DEBUG
 284                 if (xfs_do_error) {
 285                         if (xfs_error_target == target) {
 286                                 if (((xfs_req_num++) % xfs_error_mod) == 0) {
 287                                         xfs_buf_relse(bp);
 288                                         xfs_debug(mp, "Returning error!");
 289                                         return XFS_ERROR(EIO);
 290                                 }
 291                         }
 292                 }
 293 #endif
 294                 if (XFS_FORCED_SHUTDOWN(mp))
 295                         goto shutdown_abort;
 296                 *bpp = bp;
 297                 return 0;
 298         }
 299
 300         /*
 301          * If we find the buffer in the cache with this transaction
 302          * pointer in its b_fsprivate2 field, then we know we already
 303          * have it locked.  If it is already read in we just increment
 304          * the lock recursion count and return the buffer to the caller.
 305          * If the buffer is not yet read in, then we read it in, increment
 306          * the lock recursion count, and return it to the caller.
 307          */
 308         bp = xfs_trans_buf_item_match(tp, target, map, nmaps);
 309         if (bp != NULL) {
 310                 ASSERT(xfs_buf_islocked(bp));
 311                 ASSERT(bp->b_transp == tp);
 312                 ASSERT(bp->b_fspriv != NULL);
 313                 ASSERT(!bp->b_error);
 314                 if (!(XFS_BUF_ISDONE(bp))) {
 315                         trace_xfs_trans_read_buf_io(bp, _RET_IP_);
 316                         ASSERT(!XFS_BUF_ISASYNC(bp));
 317                         ASSERT(bp->b_iodone == NULL);
 318                         XFS_BUF_READ(bp);
 319                         bp->b_ops = ops;
 320                         xfsbdstrat(tp->t_mountp, bp);
 321                         error = xfs_buf_iowait(bp);
 322                         if (error) {
 323                                 xfs_buf_ioerror_alert(bp, __func__);
 324                                 xfs_buf_relse(bp);
 325                                 /*
 326                                  * We can gracefully recover from most read
 327                                  * errors. Ones we can't are those that happen
 328                                  * after the transaction's already dirty.
 329                                  */
 330                                 if (tp->t_flags & XFS_TRANS_DIRTY)
 331                                         xfs_force_shutdown(tp->t_mountp,
 332                                                         SHUTDOWN_META_IO_ERROR);
 333                                 return error;
 334                         }
 335                 }
 336                 /*
 337                  * We never locked this buf ourselves, so we shouldn't
 338                  * brelse it either. Just get out.
 339                  */
 340                 if (XFS_FORCED_SHUTDOWN(mp)) {
 341                         trace_xfs_trans_read_buf_shut(bp, _RET_IP_);
 342                         *bpp = NULL;
 343                         return XFS_ERROR(EIO);
 344                 }
 345
 346
 347                 bip = bp->b_fspriv;
 348                 bip->bli_recur++;
 349
 350                 ASSERT(atomic_read(&bip->bli_refcount) > 0);
 351                 trace_xfs_trans_read_buf_recur(bip);
 352                 *bpp = bp;
 353                 return 0;
 354         }
 355
 356         bp = xfs_buf_read_map(target, map, nmaps, flags, ops);
 357         if (bp == NULL) {
 358                 *bpp = NULL;
 359                 return (flags & XBF_TRYLOCK) ?
 360                                         0 : XFS_ERROR(ENOMEM);
 361         }
 362         if (bp->b_error) {
 363                 error = bp->b_error;
 364                 xfs_buf_stale(bp);
 365                 XFS_BUF_DONE(bp);
 366                 xfs_buf_ioerror_alert(bp, __func__);
 367                 if (tp->t_flags & XFS_TRANS_DIRTY)
 368                         xfs_force_shutdown(tp->t_mountp, SHUTDOWN_META_IO_ERROR);
 369                 xfs_buf_relse(bp);
 370                 return error;
 371         }
 372 #ifdef DEBUG
 373         if (xfs_do_error && !(tp->t_flags & XFS_TRANS_DIRTY)) {
 374                 if (xfs_error_target == target) {
 375                         if (((xfs_req_num++) % xfs_error_mod) == 0) {
 376                                 xfs_force_shutdown(tp->t_mountp,
 377                                                    SHUTDOWN_META_IO_ERROR);
 378                                 xfs_buf_relse(bp);
 379                                 xfs_debug(mp, "Returning trans error!");
 380                                 return XFS_ERROR(EIO);
 381                         }
 382                 }
 383         }
 384 #endif
 385         if (XFS_FORCED_SHUTDOWN(mp))
 386                 goto shutdown_abort;
 387
 388         _xfs_trans_bjoin(tp, bp, 1);
 389         trace_xfs_trans_read_buf(bp->b_fspriv);
 390
 391         *bpp = bp;
 392         return 0;
 393
 394 shutdown_abort:
 395         trace_xfs_trans_read_buf_shut(bp, _RET_IP_);
 396         xfs_buf_relse(bp);
 397         *bpp = NULL;
 398         return XFS_ERROR(EIO);
 399 }
 400
 401 /*
 402  * Release the buffer bp which was previously acquired with one of the
 403  * xfs_trans_... buffer allocation routines if the buffer has not
 404  * been modified within this transaction.  If the buffer is modified
 405  * within this transaction, do decrement the recursion count but do
 406  * not release the buffer even if the count goes to 0.  If the buffer is not
 407  * modified within the transaction, decrement the recursion count and
 408  * release the buffer if the recursion count goes to 0.
 409  *
 410  * If the buffer is to be released and it was not modified before
 411  * this transaction began, then free the buf_log_item associated with it.
 412  *
 413  * If the transaction pointer is NULL, make this just a normal
 414  * brelse() call.
 415  */
 416 void
 417 xfs_trans_brelse(xfs_trans_t    *tp,
 418                  xfs_buf_t      *bp)
 419 {
 420         xfs_buf_log_item_t      *bip;
 421
 422         /*
 423          * Default to a normal brelse() call if the tp is NULL.
 424          */
 425         if (tp == NULL) {
 426                 ASSERT(bp->b_transp == NULL);
 427                 xfs_buf_relse(bp);
 428                 return;
 429         }
 430
 431         ASSERT(bp->b_transp == tp);
 432         bip = bp->b_fspriv;
 433         ASSERT(bip->bli_item.li_type == XFS_LI_BUF);
 434         ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
 435         ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL));
 436         ASSERT(atomic_read(&bip->bli_refcount) > 0);
 437
 438         trace_xfs_trans_brelse(bip);
 439
 440         /*
 441          * If the release is just for a recursive lock,
 442          * then decrement the count and return.
 443          */
 444         if (bip->bli_recur > 0) {
 445                 bip->bli_recur--;
 446                 return;
 447         }
 448
 449         /*
 450          * If the buffer is dirty within this transaction, we can't
 451          * release it until we commit.
 452          */
 453         if (bip->bli_item.li_desc->lid_flags & XFS_LID_DIRTY)
 454                 return;
 455
 456         /*
 457          * If the buffer has been invalidated, then we can't release
 458          * it until the transaction commits to disk unless it is re-dirtied
 459          * as part of this transaction.  This prevents us from pulling
 460          * the item from the AIL before we should.
 461          */
 462         if (bip->bli_flags & XFS_BLI_STALE)
 463                 return;
 464
 465         ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
 466
 467         /*
 468          * Free up the log item descriptor tracking the released item.
 469          */
 470         xfs_trans_del_item(&bip->bli_item);
 471
 472         /*
 473          * Clear the hold flag in the buf log item if it is set.
 474          * We wouldn't want the next user of the buffer to
 475          * get confused.
 476          */
 477         if (bip->bli_flags & XFS_BLI_HOLD) {
 478                 bip->bli_flags &= ~XFS_BLI_HOLD;
 479         }
 480
 481         /*
 482          * Drop our reference to the buf log item.
 483          */
 484         atomic_dec(&bip->bli_refcount);
 485
 486         /*
 487          * If the buf item is not tracking data in the log, then
 488          * we must free it before releasing the buffer back to the
 489          * free pool.  Before releasing the buffer to the free pool,
 490          * clear the transaction pointer in b_fsprivate2 to dissolve
 491          * its relation to this transaction.
 492          */
 493         if (!xfs_buf_item_dirty(bip)) {
 494 /***
 495                 ASSERT(bp->b_pincount == 0);
 496 ***/
 497                 ASSERT(atomic_read(&bip->bli_refcount) == 0);
 498                 ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL));
 499                 ASSERT(!(bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF));
 500                 xfs_buf_item_relse(bp);
 501         }
 502
 503         bp->b_transp = NULL;
 504         xfs_buf_relse(bp);
 505 }
 506
 507 /*
 508  * Mark the buffer as not needing to be unlocked when the buf item's
 509  * iop_unlock() routine is called.  The buffer must already be locked
 510  * and associated with the given transaction.
 511  */
 512 /* ARGSUSED */
 513 void
 514 xfs_trans_bhold(xfs_trans_t     *tp,
 515                 xfs_buf_t       *bp)
 516 {
 517         xfs_buf_log_item_t      *bip = bp->b_fspriv;
 518
 519         ASSERT(bp->b_transp == tp);
 520         ASSERT(bip != NULL);
 521         ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
 522         ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL));
 523         ASSERT(atomic_read(&bip->bli_refcount) > 0);
 524
 525         bip->bli_flags |= XFS_BLI_HOLD;
 526         trace_xfs_trans_bhold(bip);
 527 }
 528
 529 /*
 530  * Cancel the previous buffer hold request made on this buffer
 531  * for this transaction.
 532  */
 533 void
 534 xfs_trans_bhold_release(xfs_trans_t     *tp,
 535                         xfs_buf_t       *bp)
 536 {
 537         xfs_buf_log_item_t      *bip = bp->b_fspriv;
 538
 539         ASSERT(bp->b_transp == tp);
 540         ASSERT(bip != NULL);
 541         ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
 542         ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL));
 543         ASSERT(atomic_read(&bip->bli_refcount) > 0);
 544         ASSERT(bip->bli_flags & XFS_BLI_HOLD);
 545
 546         bip->bli_flags &= ~XFS_BLI_HOLD;
 547         trace_xfs_trans_bhold_release(bip);
 548 }
 549
 550 /*
 551  * This is called to mark bytes first through last inclusive of the given
 552  * buffer as needing to be logged when the transaction is committed.
 553  * The buffer must already be associated with the given transaction.
 554  *
 555  * First and last are numbers relative to the beginning of this buffer,
 556  * so the first byte in the buffer is numbered 0 regardless of the
 557  * value of b_blkno.
 558  */
 559 void
 560 xfs_trans_log_buf(xfs_trans_t   *tp,
 561                   xfs_buf_t     *bp,
 562                   uint          first,
 563                   uint          last)
 564 {
 565         xfs_buf_log_item_t      *bip = bp->b_fspriv;
 566
 567         ASSERT(bp->b_transp == tp);
 568         ASSERT(bip != NULL);
 569         ASSERT(first <= last && last < BBTOB(bp->b_length));
 570         ASSERT(bp->b_iodone == NULL ||
 571                bp->b_iodone == xfs_buf_iodone_callbacks);
 572
 573         /*
 574          * Mark the buffer as needing to be written out eventually,
 575          * and set its iodone function to remove the buffer's buf log
 576          * item from the AIL and free it when the buffer is flushed
 577          * to disk.  See xfs_buf_attach_iodone() for more details
 578          * on li_cb and xfs_buf_iodone_callbacks().
 579          * If we end up aborting this transaction, we trap this buffer
 580          * inside the b_bdstrat callback so that this won't get written to
 581          * disk.
 582          */
 583         XFS_BUF_DONE(bp);
 584
 585         ASSERT(atomic_read(&bip->bli_refcount) > 0);
 586         bp->b_iodone = xfs_buf_iodone_callbacks;
 587         bip->bli_item.li_cb = xfs_buf_iodone;
 588
 589         trace_xfs_trans_log_buf(bip);
 590
 591         /*
 592          * If we invalidated the buffer within this transaction, then
 593          * cancel the invalidation now that we're dirtying the buffer
 594          * again.  There are no races with the code in xfs_buf_item_unpin(),
 595          * because we have a reference to the buffer this entire time.
 596          */
 597         if (bip->bli_flags & XFS_BLI_STALE) {
 598                 bip->bli_flags &= ~XFS_BLI_STALE;
 599                 ASSERT(XFS_BUF_ISSTALE(bp));
 600                 XFS_BUF_UNSTALE(bp);
 601                 bip->__bli_format.blf_flags &= ~XFS_BLF_CANCEL;
 602         }
 603
 604         tp->t_flags |= XFS_TRANS_DIRTY;
 605         bip->bli_item.li_desc->lid_flags |= XFS_LID_DIRTY;
 606
 607         /*
 608          * If we have an ordered buffer we are not logging any dirty range but
 609          * it still needs to be marked dirty and that it has been logged.
 610          */
 611         bip->bli_flags |= XFS_BLI_DIRTY | XFS_BLI_LOGGED;
 612         if (!(bip->bli_flags & XFS_BLI_ORDERED))
 613                 xfs_buf_item_log(bip, first, last);
 614 }
 615
 616
 617 /*
 618  * Invalidate a buffer that is being used within a transaction.
 619  *
 620  * Typically this is because the blocks in the buffer are being freed, so we
 621  * need to prevent it from being written out when we're done.  Allowing it
 622  * to be written again might overwrite data in the free blocks if they are
 623  * reallocated to a file.
 624  *
 625  * We prevent the buffer from being written out by marking it stale.  We can't
 626  * get rid of the buf log item at this point because the buffer may still be
 627  * pinned by another transaction.  If that is the case, then we'll wait until
 628  * the buffer is committed to disk for the last time (we can tell by the ref
 629  * count) and free it in xfs_buf_item_unpin().  Until that happens we will
 630  * keep the buffer locked so that the buffer and buf log item are not reused.
 631  *
 632  * We also set the XFS_BLF_CANCEL flag in the buf log format structure and log
 633  * the buf item.  This will be used at recovery time to determine that copies
 634  * of the buffer in the log before this should not be replayed.
 635  *
 636  * We mark the item descriptor and the transaction dirty so that we'll hold
 637  * the buffer until after the commit.
 638  *
 639  * Since we're invalidating the buffer, we also clear the state about which
 640  * parts of the buffer have been logged.  We also clear the flag indicating
 641  * that this is an inode buffer since the data in the buffer will no longer
 642  * be valid.
 643  *
 644  * We set the stale bit in the buffer as well since we're getting rid of it.
 645  */
 646 void
 647 xfs_trans_binval(
 648         xfs_trans_t     *tp,
 649         xfs_buf_t       *bp)
 650 {
 651         xfs_buf_log_item_t      *bip = bp->b_fspriv;
 652         int                     i;
 653
 654         ASSERT(bp->b_transp == tp);
 655         ASSERT(bip != NULL);
 656         ASSERT(atomic_read(&bip->bli_refcount) > 0);
 657
 658         trace_xfs_trans_binval(bip);
 659
 660         if (bip->bli_flags & XFS_BLI_STALE) {
 661                 /*
 662                  * If the buffer is already invalidated, then
 663                  * just return.
 664                  */
 665                 ASSERT(XFS_BUF_ISSTALE(bp));
 666                 ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY)));
 667                 ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_INODE_BUF));
 668                 ASSERT(!(bip->__bli_format.blf_flags & XFS_BLFT_MASK));
 669                 ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
 670                 ASSERT(bip->bli_item.li_desc->lid_flags & XFS_LID_DIRTY);
 671                 ASSERT(tp->t_flags & XFS_TRANS_DIRTY);
 672                 return;
 673         }
 674
 675         xfs_buf_stale(bp);
 676
 677         bip->bli_flags |= XFS_BLI_STALE;
 678         bip->bli_flags &= ~(XFS_BLI_INODE_BUF | XFS_BLI_LOGGED | XFS_BLI_DIRTY);
 679         bip->__bli_format.blf_flags &= ~XFS_BLF_INODE_BUF;
 680         bip->__bli_format.blf_flags |= XFS_BLF_CANCEL;
 681         bip->__bli_format.blf_flags &= ~XFS_BLFT_MASK;
 682         for (i = 0; i < bip->bli_format_count; i++) {
 683                 memset(bip->bli_formats[i].blf_data_map, 0,
 684                        (bip->bli_formats[i].blf_map_size * sizeof(uint)));
 685         }
 686         bip->bli_item.li_desc->lid_flags |= XFS_LID_DIRTY;
 687         tp->t_flags |= XFS_TRANS_DIRTY;
 688 }
 689
 690 /*
 691  * This call is used to indicate that the buffer contains on-disk inodes which
 692  * must be handled specially during recovery.  They require special handling
 693  * because only the di_next_unlinked from the inodes in the buffer should be
 694  * recovered.  The rest of the data in the buffer is logged via the inodes
 695  * themselves.
 696  *
 697  * All we do is set the XFS_BLI_INODE_BUF flag in the items flags so it can be
 698  * transferred to the buffer's log format structure so that we'll know what to
 699  * do at recovery time.
 700  */
 701 void
 702 xfs_trans_inode_buf(
 703         xfs_trans_t     *tp,
 704         xfs_buf_t       *bp)
 705 {
 706         xfs_buf_log_item_t      *bip = bp->b_fspriv;
 707
 708         ASSERT(bp->b_transp == tp);
 709         ASSERT(bip != NULL);
 710         ASSERT(atomic_read(&bip->bli_refcount) > 0);
 711
 712         bip->bli_flags |= XFS_BLI_INODE_BUF;
 713         xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF);
 714 }
 715
 716 /*
 717  * This call is used to indicate that the buffer is going to
 718  * be staled and was an inode buffer. This means it gets
 719  * special processing during unpin - where any inodes
 720  * associated with the buffer should be removed from ail.
 721  * There is also special processing during recovery,
 722  * any replay of the inodes in the buffer needs to be
 723  * prevented as the buffer may have been reused.
 724  */
 725 void
 726 xfs_trans_stale_inode_buf(
 727         xfs_trans_t     *tp,
 728         xfs_buf_t       *bp)
 729 {
 730         xfs_buf_log_item_t      *bip = bp->b_fspriv;
 731
 732         ASSERT(bp->b_transp == tp);
 733         ASSERT(bip != NULL);
 734         ASSERT(atomic_read(&bip->bli_refcount) > 0);
 735
 736         bip->bli_flags |= XFS_BLI_STALE_INODE;
 737         bip->bli_item.li_cb = xfs_buf_iodone;
 738         xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF);
 739 }
 740
 741 /*
 742  * Mark the buffer as being one which contains newly allocated
 743  * inodes.  We need to make sure that even if this buffer is
 744  * relogged as an 'inode buf' we still recover all of the inode
 745  * images in the face of a crash.  This works in coordination with
 746  * xfs_buf_item_committed() to ensure that the buffer remains in the
 747  * AIL at its original location even after it has been relogged.
 748  */
 749 /* ARGSUSED */
 750 void
 751 xfs_trans_inode_alloc_buf(
 752         xfs_trans_t     *tp,
 753         xfs_buf_t       *bp)
 754 {
 755         xfs_buf_log_item_t      *bip = bp->b_fspriv;
 756
 757         ASSERT(bp->b_transp == tp);
 758         ASSERT(bip != NULL);
 759         ASSERT(atomic_read(&bip->bli_refcount) > 0);
 760
 761         bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF;
 762         xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF);
 763 }
 764
 765 /*
 766  * Mark the buffer as ordered for this transaction. This means
 767  * that the contents of the buffer are not recorded in the transaction
 768  * but it is tracked in the AIL as though it was. This allows us
 769  * to record logical changes in transactions rather than the physical
 770  * changes we make to the buffer without changing writeback ordering
 771  * constraints of metadata buffers.
 772  */
 773 void
 774 xfs_trans_ordered_buf(
 775         struct xfs_trans        *tp,
 776         struct xfs_buf          *bp)
 777 {
 778         struct xfs_buf_log_item *bip = bp->b_fspriv;
 779
 780         ASSERT(bp->b_transp == tp);
 781         ASSERT(bip != NULL);
 782         ASSERT(atomic_read(&bip->bli_refcount) > 0);
 783
 784         bip->bli_flags |= XFS_BLI_ORDERED;
 785         trace_xfs_buf_item_ordered(bip);
 786 }
 787
 788 /*
 789  * Set the type of the buffer for log recovery so that it can correctly identify
 790  * and hence attach the correct buffer ops to the buffer after replay.
 791  */
 792 void
 793 xfs_trans_buf_set_type(
 794         struct xfs_trans        *tp,
 795         struct xfs_buf          *bp,
 796         enum xfs_blft           type)
 797 {
 798         struct xfs_buf_log_item *bip = bp->b_fspriv;
 799
 800         if (!tp)
 801                 return;
 802
 803         ASSERT(bp->b_transp == tp);
 804         ASSERT(bip != NULL);
 805         ASSERT(atomic_read(&bip->bli_refcount) > 0);
 806
 807         xfs_blft_to_flags(&bip->__bli_format, type);
 808 }
 809
 810 void
 811 xfs_trans_buf_copy_type(
 812         struct xfs_buf          *dst_bp,
 813         struct xfs_buf          *src_bp)
 814 {
 815         struct xfs_buf_log_item *sbip = src_bp->b_fspriv;
 816         struct xfs_buf_log_item *dbip = dst_bp->b_fspriv;
 817         enum xfs_blft           type;
 818
 819         type = xfs_blft_from_flags(&sbip->__bli_format);
 820         xfs_blft_to_flags(&dbip->__bli_format, type);
 821 }
 822
 823 /*
 824  * Similar to xfs_trans_inode_buf(), this marks the buffer as a cluster of
 825  * dquots. However, unlike in inode buffer recovery, dquot buffers get
 826  * recovered in their entirety. (Hence, no XFS_BLI_DQUOT_ALLOC_BUF flag).
 827  * The only thing that makes dquot buffers different from regular
 828  * buffers is that we must not replay dquot bufs when recovering
 829  * if a _corresponding_ quotaoff has happened. We also have to distinguish
 830  * between usr dquot bufs and grp dquot bufs, because usr and grp quotas
 831  * can be turned off independently.
 832  */
 833 /* ARGSUSED */
 834 void
 835 xfs_trans_dquot_buf(
 836         xfs_trans_t     *tp,
 837         xfs_buf_t       *bp,
 838         uint            type)
 839 {
 840         struct xfs_buf_log_item *bip = bp->b_fspriv;
 841
 842         ASSERT(type == XFS_BLF_UDQUOT_BUF ||
 843                type == XFS_BLF_PDQUOT_BUF ||
 844                type == XFS_BLF_GDQUOT_BUF);
 845
 846         bip->__bli_format.blf_flags |= type;
 847
 848         switch (type) {
 849         case XFS_BLF_UDQUOT_BUF:
 850                 type = XFS_BLFT_UDQUOT_BUF;
 851                 break;
 852         case XFS_BLF_PDQUOT_BUF:
 853                 type = XFS_BLFT_PDQUOT_BUF;
 854                 break;
 855         case XFS_BLF_GDQUOT_BUF:
 856                 type = XFS_BLFT_GDQUOT_BUF;
 857                 break;
 858         default:
 859                 type = XFS_BLFT_UNKNOWN_BUF;
 860                 break;
 861         }
 862
 863         xfs_trans_buf_set_type(tp, bp, type);
 864 }