fs/gfs2/lops.c

   1 /*
   2  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
   3  * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
   4  *
   5  * This copyrighted material is made available to anyone wishing to use,
   6  * modify, copy, or redistribute it subject to the terms and conditions
   7  * of the GNU General Public License version 2.
   8  */
   9
  10 #include <linux/sched.h>
  11 #include <linux/slab.h>
  12 #include <linux/spinlock.h>
  13 #include <linux/completion.h>
  14 #include <linux/buffer_head.h>
  15 #include <linux/mempool.h>
  16 #include <linux/gfs2_ondisk.h>
  17 #include <linux/bio.h>
  18 #include <linux/fs.h>
  19 #include <linux/list_sort.h>
  20
  21 #include "gfs2.h"
  22 #include "incore.h"
  23 #include "inode.h"
  24 #include "glock.h"
  25 #include "log.h"
  26 #include "lops.h"
  27 #include "meta_io.h"
  28 #include "recovery.h"
  29 #include "rgrp.h"
  30 #include "trans.h"
  31 #include "util.h"
  32 #include "trace_gfs2.h"
  33
  34 /**
  35  * gfs2_pin - Pin a buffer in memory
  36  * @sdp: The superblock
  37  * @bh: The buffer to be pinned
  38  *
  39  * The log lock must be held when calling this function
  40  */
  41 void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
  42 {
  43         struct gfs2_bufdata *bd;
  44
  45         BUG_ON(!current->journal_info);
  46
  47         clear_buffer_dirty(bh);
  48         if (test_set_buffer_pinned(bh))
  49                 gfs2_assert_withdraw(sdp, 0);
  50         if (!buffer_uptodate(bh))
  51                 gfs2_io_error_bh(sdp, bh);
  52         bd = bh->b_private;
  53         /* If this buffer is in the AIL and it has already been written
  54          * to in-place disk block, remove it from the AIL.
  55          */
  56         spin_lock(&sdp->sd_ail_lock);
  57         if (bd->bd_tr)
  58                 list_move(&bd->bd_ail_st_list, &bd->bd_tr->tr_ail2_list);
  59         spin_unlock(&sdp->sd_ail_lock);
  60         get_bh(bh);
  61         atomic_inc(&sdp->sd_log_pinned);
  62         trace_gfs2_pin(bd, 1);
  63 }
  64
  65 static bool buffer_is_rgrp(const struct gfs2_bufdata *bd)
  66 {
  67         return bd->bd_gl->gl_name.ln_type == LM_TYPE_RGRP;
  68 }
  69
  70 static void maybe_release_space(struct gfs2_bufdata *bd)
  71 {
  72         struct gfs2_glock *gl = bd->bd_gl;
  73         struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
  74         struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl);
  75         unsigned int index = bd->bd_bh->b_blocknr - gl->gl_name.ln_number;
  76         struct gfs2_bitmap *bi = rgd->rd_bits + index;
  77
  78         if (bi->bi_clone == NULL)
  79                 return;
  80         if (sdp->sd_args.ar_discard)
  81                 gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi, 1, NULL);
  82         memcpy(bi->bi_clone + bi->bi_offset,
  83                bd->bd_bh->b_data + bi->bi_offset, bi->bi_len);
  84         clear_bit(GBF_FULL, &bi->bi_flags);
  85         rgd->rd_free_clone = rgd->rd_free;
  86         rgd->rd_extfail_pt = rgd->rd_free;
  87 }
  88
  89 /**
  90  * gfs2_unpin - Unpin a buffer
  91  * @sdp: the filesystem the buffer belongs to
  92  * @bh: The buffer to unpin
  93  * @ai:
  94  * @flags: The inode dirty flags
  95  *
  96  */
  97
  98 static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
  99                        struct gfs2_trans *tr)
 100 {
 101         struct gfs2_bufdata *bd = bh->b_private;
 102
 103         BUG_ON(!buffer_uptodate(bh));
 104         BUG_ON(!buffer_pinned(bh));
 105
 106         lock_buffer(bh);
 107         mark_buffer_dirty(bh);
 108         clear_buffer_pinned(bh);
 109
 110         if (buffer_is_rgrp(bd))
 111                 maybe_release_space(bd);
 112
 113         spin_lock(&sdp->sd_ail_lock);
 114         if (bd->bd_tr) {
 115                 list_del(&bd->bd_ail_st_list);
 116                 brelse(bh);
 117         } else {
 118                 struct gfs2_glock *gl = bd->bd_gl;
 119                 list_add(&bd->bd_ail_gl_list, &gl->gl_ail_list);
 120                 atomic_inc(&gl->gl_ail_count);
 121         }
 122         bd->bd_tr = tr;
 123         list_add(&bd->bd_ail_st_list, &tr->tr_ail1_list);
 124         spin_unlock(&sdp->sd_ail_lock);
 125
 126         clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
 127         trace_gfs2_pin(bd, 0);
 128         unlock_buffer(bh);
 129         atomic_dec(&sdp->sd_log_pinned);
 130 }
 131
 132 static void gfs2_log_incr_head(struct gfs2_sbd *sdp)
 133 {
 134         BUG_ON((sdp->sd_log_flush_head == sdp->sd_log_tail) &&
 135                (sdp->sd_log_flush_head != sdp->sd_log_head));
 136
 137         if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks)
 138                 sdp->sd_log_flush_head = 0;
 139 }
 140
 141 static u64 gfs2_log_bmap(struct gfs2_sbd *sdp)
 142 {
 143         unsigned int lbn = sdp->sd_log_flush_head;
 144         struct gfs2_journal_extent *je;
 145         u64 block;
 146
 147         list_for_each_entry(je, &sdp->sd_jdesc->extent_list, list) {
 148                 if ((lbn >= je->lblock) && (lbn < (je->lblock + je->blocks))) {
 149                         block = je->dblock + lbn - je->lblock;
 150                         gfs2_log_incr_head(sdp);
 151                         return block;
 152                 }
 153         }
 154
 155         return -1;
 156 }
 157
 158 /**
 159  * gfs2_end_log_write_bh - end log write of pagecache data with buffers
 160  * @sdp: The superblock
 161  * @bvec: The bio_vec
 162  * @error: The i/o status
 163  *
 164  * This finds the relavent buffers and unlocks then and sets the
 165  * error flag according to the status of the i/o request. This is
 166  * used when the log is writing data which has an in-place version
 167  * that is pinned in the pagecache.
 168  */
 169
 170 static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp, struct bio_vec *bvec,
 171                                   blk_status_t error)
 172 {
 173         struct buffer_head *bh, *next;
 174         struct page *page = bvec->bv_page;
 175         unsigned size;
 176
 177         bh = page_buffers(page);
 178         size = bvec->bv_len;
 179         while (bh_offset(bh) < bvec->bv_offset)
 180                 bh = bh->b_this_page;
 181         do {
 182                 if (error)
 183                         mark_buffer_write_io_error(bh);
 184                 unlock_buffer(bh);
 185                 next = bh->b_this_page;
 186                 size -= bh->b_size;
 187                 brelse(bh);
 188                 bh = next;
 189         } while(bh && size);
 190 }
 191
 192 /**
 193  * gfs2_end_log_write - end of i/o to the log
 194  * @bio: The bio
 195  * @error: Status of i/o request
 196  *
 197  * Each bio_vec contains either data from the pagecache or data
 198  * relating to the log itself. Here we iterate over the bio_vec
 199  * array, processing both kinds of data.
 200  *
 201  */
 202
 203 static void gfs2_end_log_write(struct bio *bio)
 204 {
 205         struct gfs2_sbd *sdp = bio->bi_private;
 206         struct bio_vec *bvec;
 207         struct page *page;
 208         int i;
 209
 210         if (bio->bi_status)
 211                 fs_err(sdp, "Error %d writing to log\n", bio->bi_status);
 212
 213         bio_for_each_segment_all(bvec, bio, i) {
 214                 page = bvec->bv_page;
 215                 if (page_has_buffers(page))
 216                         gfs2_end_log_write_bh(sdp, bvec, bio->bi_status);
 217                 else
 218                         mempool_free(page, gfs2_page_pool);
 219         }
 220
 221         bio_put(bio);
 222         if (atomic_dec_and_test(&sdp->sd_log_in_flight))
 223                 wake_up(&sdp->sd_log_flush_wait);
 224 }
 225
 226 /**
 227  * gfs2_log_flush_bio - Submit any pending log bio
 228  * @sdp: The superblock
 229  * @op: REQ_OP
 230  * @op_flags: req_flag_bits
 231  *
 232  * Submit any pending part-built or full bio to the block device. If
 233  * there is no pending bio, then this is a no-op.
 234  */
 235
 236 void gfs2_log_flush_bio(struct gfs2_sbd *sdp, int op, int op_flags)
 237 {
 238         if (sdp->sd_log_bio) {
 239                 atomic_inc(&sdp->sd_log_in_flight);
 240                 bio_set_op_attrs(sdp->sd_log_bio, op, op_flags);
 241                 submit_bio(sdp->sd_log_bio);
 242                 sdp->sd_log_bio = NULL;
 243         }
 244 }
 245
 246 /**
 247  * gfs2_log_alloc_bio - Allocate a new bio for log writing
 248  * @sdp: The superblock
 249  * @blkno: The next device block number we want to write to
 250  *
 251  * This should never be called when there is a cached bio in the
 252  * super block. When it returns, there will be a cached bio in the
 253  * super block which will have as many bio_vecs as the device is
 254  * happy to handle.
 255  *
 256  * Returns: Newly allocated bio
 257  */
 258
 259 static struct bio *gfs2_log_alloc_bio(struct gfs2_sbd *sdp, u64 blkno)
 260 {
 261         struct super_block *sb = sdp->sd_vfs;
 262         struct bio *bio;
 263
 264         BUG_ON(sdp->sd_log_bio);
 265
 266         bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
 267         bio->bi_iter.bi_sector = blkno * (sb->s_blocksize >> 9);
 268         bio->bi_bdev = sb->s_bdev;
 269         bio->bi_end_io = gfs2_end_log_write;
 270         bio->bi_private = sdp;
 271
 272         sdp->sd_log_bio = bio;
 273
 274         return bio;
 275 }
 276
 277 /**
 278  * gfs2_log_get_bio - Get cached log bio, or allocate a new one
 279  * @sdp: The superblock
 280  * @blkno: The device block number we want to write to
 281  *
 282  * If there is a cached bio, then if the next block number is sequential
 283  * with the previous one, return it, otherwise flush the bio to the
 284  * device. If there is not a cached bio, or we just flushed it, then
 285  * allocate a new one.
 286  *
 287  * Returns: The bio to use for log writes
 288  */
 289
 290 static struct bio *gfs2_log_get_bio(struct gfs2_sbd *sdp, u64 blkno)
 291 {
 292         struct bio *bio = sdp->sd_log_bio;
 293         u64 nblk;
 294
 295         if (bio) {
 296                 nblk = bio_end_sector(bio);
 297                 nblk >>= sdp->sd_fsb2bb_shift;
 298                 if (blkno == nblk)
 299                         return bio;
 300                 gfs2_log_flush_bio(sdp, REQ_OP_WRITE, 0);
 301         }
 302
 303         return gfs2_log_alloc_bio(sdp, blkno);
 304 }
 305
 306
 307 /**
 308  * gfs2_log_write - write to log
 309  * @sdp: the filesystem
 310  * @page: the page to write
 311  * @size: the size of the data to write
 312  * @offset: the offset within the page
 313  *
 314  * Try and add the page segment to the current bio. If that fails,
 315  * submit the current bio to the device and create a new one, and
 316  * then add the page segment to that.
 317  */
 318
 319 static void gfs2_log_write(struct gfs2_sbd *sdp, struct page *page,
 320                            unsigned size, unsigned offset)
 321 {
 322         u64 blkno = gfs2_log_bmap(sdp);
 323         struct bio *bio;
 324         int ret;
 325
 326         bio = gfs2_log_get_bio(sdp, blkno);
 327         ret = bio_add_page(bio, page, size, offset);
 328         if (ret == 0) {
 329                 gfs2_log_flush_bio(sdp, REQ_OP_WRITE, 0);
 330                 bio = gfs2_log_alloc_bio(sdp, blkno);
 331                 ret = bio_add_page(bio, page, size, offset);
 332                 WARN_ON(ret == 0);
 333         }
 334 }
 335
 336 /**
 337  * gfs2_log_write_bh - write a buffer's content to the log
 338  * @sdp: The super block
 339  * @bh: The buffer pointing to the in-place location
 340  *
 341  * This writes the content of the buffer to the next available location
 342  * in the log. The buffer will be unlocked once the i/o to the log has
 343  * completed.
 344  */
 345
 346 static void gfs2_log_write_bh(struct gfs2_sbd *sdp, struct buffer_head *bh)
 347 {
 348         gfs2_log_write(sdp, bh->b_page, bh->b_size, bh_offset(bh));
 349 }
 350
 351 /**
 352  * gfs2_log_write_page - write one block stored in a page, into the log
 353  * @sdp: The superblock
 354  * @page: The struct page
 355  *
 356  * This writes the first block-sized part of the page into the log. Note
 357  * that the page must have been allocated from the gfs2_page_pool mempool
 358  * and that after this has been called, ownership has been transferred and
 359  * the page may be freed at any time.
 360  */
 361
 362 void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page)
 363 {
 364         struct super_block *sb = sdp->sd_vfs;
 365         gfs2_log_write(sdp, page, sb->s_blocksize, 0);
 366 }
 367
 368 static struct page *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type,
 369                                       u32 ld_length, u32 ld_data1)
 370 {
 371         struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
 372         struct gfs2_log_descriptor *ld = page_address(page);
 373         clear_page(ld);
 374         ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
 375         ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD);
 376         ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD);
 377         ld->ld_type = cpu_to_be32(ld_type);
 378         ld->ld_length = cpu_to_be32(ld_length);
 379         ld->ld_data1 = cpu_to_be32(ld_data1);
 380         ld->ld_data2 = 0;
 381         return page;
 382 }
 383
 384 static void gfs2_check_magic(struct buffer_head *bh)
 385 {
 386         void *kaddr;
 387         __be32 *ptr;
 388
 389         clear_buffer_escaped(bh);
 390         kaddr = kmap_atomic(bh->b_page);
 391         ptr = kaddr + bh_offset(bh);
 392         if (*ptr == cpu_to_be32(GFS2_MAGIC))
 393                 set_buffer_escaped(bh);
 394         kunmap_atomic(kaddr);
 395 }
 396
 397 static int blocknr_cmp(void *priv, struct list_head *a, struct list_head *b)
 398 {
 399         struct gfs2_bufdata *bda, *bdb;
 400
 401         bda = list_entry(a, struct gfs2_bufdata, bd_list);
 402         bdb = list_entry(b, struct gfs2_bufdata, bd_list);
 403
 404         if (bda->bd_bh->b_blocknr < bdb->bd_bh->b_blocknr)
 405                 return -1;
 406         if (bda->bd_bh->b_blocknr > bdb->bd_bh->b_blocknr)
 407                 return 1;
 408         return 0;
 409 }
 410
 411 static void gfs2_before_commit(struct gfs2_sbd *sdp, unsigned int limit,
 412                                 unsigned int total, struct list_head *blist,
 413                                 bool is_databuf)
 414 {
 415         struct gfs2_log_descriptor *ld;
 416         struct gfs2_bufdata *bd1 = NULL, *bd2;
 417         struct page *page;
 418         unsigned int num;
 419         unsigned n;
 420         __be64 *ptr;
 421
 422         gfs2_log_lock(sdp);
 423         list_sort(NULL, blist, blocknr_cmp);
 424         bd1 = bd2 = list_prepare_entry(bd1, blist, bd_list);
 425         while(total) {
 426                 num = total;
 427                 if (total > limit)
 428                         num = limit;
 429                 gfs2_log_unlock(sdp);
 430                 page = gfs2_get_log_desc(sdp,
 431                                          is_databuf ? GFS2_LOG_DESC_JDATA :
 432                                          GFS2_LOG_DESC_METADATA, num + 1, num);
 433                 ld = page_address(page);
 434                 gfs2_log_lock(sdp);
 435                 ptr = (__be64 *)(ld + 1);
 436
 437                 n = 0;
 438                 list_for_each_entry_continue(bd1, blist, bd_list) {
 439                         *ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr);
 440                         if (is_databuf) {
 441                                 gfs2_check_magic(bd1->bd_bh);
 442                                 *ptr++ = cpu_to_be64(buffer_escaped(bd1->bd_bh) ? 1 : 0);
 443                         }
 444                         if (++n >= num)
 445                                 break;
 446                 }
 447
 448                 gfs2_log_unlock(sdp);
 449                 gfs2_log_write_page(sdp, page);
 450                 gfs2_log_lock(sdp);
 451
 452                 n = 0;
 453                 list_for_each_entry_continue(bd2, blist, bd_list) {
 454                         get_bh(bd2->bd_bh);
 455                         gfs2_log_unlock(sdp);
 456                         lock_buffer(bd2->bd_bh);
 457
 458                         if (buffer_escaped(bd2->bd_bh)) {
 459                                 void *kaddr;
 460                                 page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
 461                                 ptr = page_address(page);
 462                                 kaddr = kmap_atomic(bd2->bd_bh->b_page);
 463                                 memcpy(ptr, kaddr + bh_offset(bd2->bd_bh),
 464                                        bd2->bd_bh->b_size);
 465                                 kunmap_atomic(kaddr);
 466                                 *(__be32 *)ptr = 0;
 467                                 clear_buffer_escaped(bd2->bd_bh);
 468                                 unlock_buffer(bd2->bd_bh);
 469                                 brelse(bd2->bd_bh);
 470                                 gfs2_log_write_page(sdp, page);
 471                         } else {
 472                                 gfs2_log_write_bh(sdp, bd2->bd_bh);
 473                         }
 474                         gfs2_log_lock(sdp);
 475                         if (++n >= num)
 476                                 break;
 477                 }
 478
 479                 BUG_ON(total < num);
 480                 total -= num;
 481         }
 482         gfs2_log_unlock(sdp);
 483 }
 484
 485 static void buf_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 486 {
 487         unsigned int limit = buf_limit(sdp); /* 503 for 4k blocks */
 488         unsigned int nbuf;
 489         if (tr == NULL)
 490                 return;
 491         nbuf = tr->tr_num_buf_new - tr->tr_num_buf_rm;
 492         gfs2_before_commit(sdp, limit, nbuf, &tr->tr_buf, 0);
 493 }
 494
 495 static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 496 {
 497         struct list_head *head;
 498         struct gfs2_bufdata *bd;
 499
 500         if (tr == NULL)
 501                 return;
 502
 503         head = &tr->tr_buf;
 504         while (!list_empty(head)) {
 505                 bd = list_entry(head->next, struct gfs2_bufdata, bd_list);
 506                 list_del_init(&bd->bd_list);
 507                 gfs2_unpin(sdp, bd->bd_bh, tr);
 508         }
 509 }
 510
 511 static void buf_lo_before_scan(struct gfs2_jdesc *jd,
 512                                struct gfs2_log_header_host *head, int pass)
 513 {
 514         if (pass != 0)
 515                 return;
 516
 517         jd->jd_found_blocks = 0;
 518         jd->jd_replayed_blocks = 0;
 519 }
 520
 521 static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
 522                                 struct gfs2_log_descriptor *ld, __be64 *ptr,
 523                                 int pass)
 524 {
 525         struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
 526         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 527         struct gfs2_glock *gl = ip->i_gl;
 528         unsigned int blks = be32_to_cpu(ld->ld_data1);
 529         struct buffer_head *bh_log, *bh_ip;
 530         u64 blkno;
 531         int error = 0;
 532
 533         if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_METADATA)
 534                 return 0;
 535
 536         gfs2_replay_incr_blk(jd, &start);
 537
 538         for (; blks; gfs2_replay_incr_blk(jd, &start), blks--) {
 539                 blkno = be64_to_cpu(*ptr++);
 540
 541                 jd->jd_found_blocks++;
 542
 543                 if (gfs2_revoke_check(jd, blkno, start))
 544                         continue;
 545
 546                 error = gfs2_replay_read_block(jd, start, &bh_log);
 547                 if (error)
 548                         return error;
 549
 550                 bh_ip = gfs2_meta_new(gl, blkno);
 551                 memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
 552
 553                 if (gfs2_meta_check(sdp, bh_ip))
 554                         error = -EIO;
 555                 else
 556                         mark_buffer_dirty(bh_ip);
 557
 558                 brelse(bh_log);
 559                 brelse(bh_ip);
 560
 561                 if (error)
 562                         break;
 563
 564                 jd->jd_replayed_blocks++;
 565         }
 566
 567         return error;
 568 }
 569
 570 /**
 571  * gfs2_meta_sync - Sync all buffers associated with a glock
 572  * @gl: The glock
 573  *
 574  */
 575
 576 static void gfs2_meta_sync(struct gfs2_glock *gl)
 577 {
 578         struct address_space *mapping = gfs2_glock2aspace(gl);
 579         struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
 580         int error;
 581
 582         if (mapping == NULL)
 583                 mapping = &sdp->sd_aspace;
 584
 585         filemap_fdatawrite(mapping);
 586         error = filemap_fdatawait(mapping);
 587
 588         if (error)
 589                 gfs2_io_error(gl->gl_name.ln_sbd);
 590 }
 591
 592 static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
 593 {
 594         struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
 595         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 596
 597         if (error) {
 598                 gfs2_meta_sync(ip->i_gl);
 599                 return;
 600         }
 601         if (pass != 1)
 602                 return;
 603
 604         gfs2_meta_sync(ip->i_gl);
 605
 606         fs_info(sdp, "jid=%u: Replayed %u of %u blocks\n",
 607                 jd->jd_jid, jd->jd_replayed_blocks, jd->jd_found_blocks);
 608 }
 609
 610 static void revoke_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 611 {
 612         struct gfs2_meta_header *mh;
 613         unsigned int offset;
 614         struct list_head *head = &sdp->sd_log_le_revoke;
 615         struct gfs2_bufdata *bd;
 616         struct page *page;
 617         unsigned int length;
 618
 619         gfs2_write_revokes(sdp);
 620         if (!sdp->sd_log_num_revoke)
 621                 return;
 622
 623         length = gfs2_struct2blk(sdp, sdp->sd_log_num_revoke, sizeof(u64));
 624         page = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_REVOKE, length, sdp->sd_log_num_revoke);
 625         offset = sizeof(struct gfs2_log_descriptor);
 626
 627         list_for_each_entry(bd, head, bd_list) {
 628                 sdp->sd_log_num_revoke--;
 629
 630                 if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) {
 631
 632                         gfs2_log_write_page(sdp, page);
 633                         page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
 634                         mh = page_address(page);
 635                         clear_page(mh);
 636                         mh->mh_magic = cpu_to_be32(GFS2_MAGIC);
 637                         mh->mh_type = cpu_to_be32(GFS2_METATYPE_LB);
 638                         mh->mh_format = cpu_to_be32(GFS2_FORMAT_LB);
 639                         offset = sizeof(struct gfs2_meta_header);
 640                 }
 641
 642                 *(__be64 *)(page_address(page) + offset) = cpu_to_be64(bd->bd_blkno);
 643                 offset += sizeof(u64);
 644         }
 645         gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
 646
 647         gfs2_log_write_page(sdp, page);
 648 }
 649
 650 static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 651 {
 652         struct list_head *head = &sdp->sd_log_le_revoke;
 653         struct gfs2_bufdata *bd;
 654         struct gfs2_glock *gl;
 655
 656         while (!list_empty(head)) {
 657                 bd = list_entry(head->next, struct gfs2_bufdata, bd_list);
 658                 list_del_init(&bd->bd_list);
 659                 gl = bd->bd_gl;
 660                 atomic_dec(&gl->gl_revokes);
 661                 clear_bit(GLF_LFLUSH, &gl->gl_flags);
 662                 kmem_cache_free(gfs2_bufdata_cachep, bd);
 663         }
 664 }
 665
 666 static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
 667                                   struct gfs2_log_header_host *head, int pass)
 668 {
 669         if (pass != 0)
 670                 return;
 671
 672         jd->jd_found_revokes = 0;
 673         jd->jd_replay_tail = head->lh_tail;
 674 }
 675
 676 static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
 677                                    struct gfs2_log_descriptor *ld, __be64 *ptr,
 678                                    int pass)
 679 {
 680         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 681         unsigned int blks = be32_to_cpu(ld->ld_length);
 682         unsigned int revokes = be32_to_cpu(ld->ld_data1);
 683         struct buffer_head *bh;
 684         unsigned int offset;
 685         u64 blkno;
 686         int first = 1;
 687         int error;
 688
 689         if (pass != 0 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_REVOKE)
 690                 return 0;
 691
 692         offset = sizeof(struct gfs2_log_descriptor);
 693
 694         for (; blks; gfs2_replay_incr_blk(jd, &start), blks--) {
 695                 error = gfs2_replay_read_block(jd, start, &bh);
 696                 if (error)
 697                         return error;
 698
 699                 if (!first)
 700                         gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LB);
 701
 702                 while (offset + sizeof(u64) <= sdp->sd_sb.sb_bsize) {
 703                         blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset));
 704
 705                         error = gfs2_revoke_add(jd, blkno, start);
 706                         if (error < 0) {
 707                                 brelse(bh);
 708                                 return error;
 709                         }
 710                         else if (error)
 711                                 jd->jd_found_revokes++;
 712
 713                         if (!--revokes)
 714                                 break;
 715                         offset += sizeof(u64);
 716                 }
 717
 718                 brelse(bh);
 719                 offset = sizeof(struct gfs2_meta_header);
 720                 first = 0;
 721         }
 722
 723         return 0;
 724 }
 725
 726 static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
 727 {
 728         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 729
 730         if (error) {
 731                 gfs2_revoke_clean(jd);
 732                 return;
 733         }
 734         if (pass != 1)
 735                 return;
 736
 737         fs_info(sdp, "jid=%u: Found %u revoke tags\n",
 738                 jd->jd_jid, jd->jd_found_revokes);
 739
 740         gfs2_revoke_clean(jd);
 741 }
 742
 743 /**
 744  * databuf_lo_before_commit - Scan the data buffers, writing as we go
 745  *
 746  */
 747
 748 static void databuf_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 749 {
 750         unsigned int limit = databuf_limit(sdp);
 751         unsigned int nbuf;
 752         if (tr == NULL)
 753                 return;
 754         nbuf = tr->tr_num_databuf_new - tr->tr_num_databuf_rm;
 755         gfs2_before_commit(sdp, limit, nbuf, &tr->tr_databuf, 1);
 756 }
 757
 758 static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
 759                                     struct gfs2_log_descriptor *ld,
 760                                     __be64 *ptr, int pass)
 761 {
 762         struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
 763         struct gfs2_glock *gl = ip->i_gl;
 764         unsigned int blks = be32_to_cpu(ld->ld_data1);
 765         struct buffer_head *bh_log, *bh_ip;
 766         u64 blkno;
 767         u64 esc;
 768         int error = 0;
 769
 770         if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA)
 771                 return 0;
 772
 773         gfs2_replay_incr_blk(jd, &start);
 774         for (; blks; gfs2_replay_incr_blk(jd, &start), blks--) {
 775                 blkno = be64_to_cpu(*ptr++);
 776                 esc = be64_to_cpu(*ptr++);
 777
 778                 jd->jd_found_blocks++;
 779
 780                 if (gfs2_revoke_check(jd, blkno, start))
 781                         continue;
 782
 783                 error = gfs2_replay_read_block(jd, start, &bh_log);
 784                 if (error)
 785                         return error;
 786
 787                 bh_ip = gfs2_meta_new(gl, blkno);
 788                 memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
 789
 790                 /* Unescape */
 791                 if (esc) {
 792                         __be32 *eptr = (__be32 *)bh_ip->b_data;
 793                         *eptr = cpu_to_be32(GFS2_MAGIC);
 794                 }
 795                 mark_buffer_dirty(bh_ip);
 796
 797                 brelse(bh_log);
 798                 brelse(bh_ip);
 799
 800                 jd->jd_replayed_blocks++;
 801         }
 802
 803         return error;
 804 }
 805
 806 /* FIXME: sort out accounting for log blocks etc. */
 807
 808 static void databuf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
 809 {
 810         struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
 811         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 812
 813         if (error) {
 814                 gfs2_meta_sync(ip->i_gl);
 815                 return;
 816         }
 817         if (pass != 1)
 818                 return;
 819
 820         /* data sync? */
 821         gfs2_meta_sync(ip->i_gl);
 822
 823         fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n",
 824                 jd->jd_jid, jd->jd_replayed_blocks, jd->jd_found_blocks);
 825 }
 826
 827 static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 828 {
 829         struct list_head *head;
 830         struct gfs2_bufdata *bd;
 831
 832         if (tr == NULL)
 833                 return;
 834
 835         head = &tr->tr_databuf;
 836         while (!list_empty(head)) {
 837                 bd = list_entry(head->next, struct gfs2_bufdata, bd_list);
 838                 list_del_init(&bd->bd_list);
 839                 gfs2_unpin(sdp, bd->bd_bh, tr);
 840         }
 841 }
 842
 843
 844 const struct gfs2_log_operations gfs2_buf_lops = {
 845         .lo_before_commit = buf_lo_before_commit,
 846         .lo_after_commit = buf_lo_after_commit,
 847         .lo_before_scan = buf_lo_before_scan,
 848         .lo_scan_elements = buf_lo_scan_elements,
 849         .lo_after_scan = buf_lo_after_scan,
 850         .lo_name = "buf",
 851 };
 852
 853 const struct gfs2_log_operations gfs2_revoke_lops = {
 854         .lo_before_commit = revoke_lo_before_commit,
 855         .lo_after_commit = revoke_lo_after_commit,
 856         .lo_before_scan = revoke_lo_before_scan,
 857         .lo_scan_elements = revoke_lo_scan_elements,
 858         .lo_after_scan = revoke_lo_after_scan,
 859         .lo_name = "revoke",
 860 };
 861
 862 const struct gfs2_log_operations gfs2_databuf_lops = {
 863         .lo_before_commit = databuf_lo_before_commit,
 864         .lo_after_commit = databuf_lo_after_commit,
 865         .lo_scan_elements = databuf_lo_scan_elements,
 866         .lo_after_scan = databuf_lo_after_scan,
 867         .lo_name = "databuf",
 868 };
 869
 870 const struct gfs2_log_operations *gfs2_log_ops[] = {
 871         &gfs2_databuf_lops,
 872         &gfs2_buf_lops,
 873         &gfs2_revoke_lops,
 874         NULL,
 875 };
 876