fs/gfs2/lops.c

   1 /*
   2  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
   3  * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
   4  *
   5  * This copyrighted material is made available to anyone wishing to use,
   6  * modify, copy, or redistribute it subject to the terms and conditions
   7  * of the GNU General Public License version 2.
   8  */
   9
  10 #include <linux/sched.h>
  11 #include <linux/slab.h>
  12 #include <linux/spinlock.h>
  13 #include <linux/completion.h>
  14 #include <linux/buffer_head.h>
  15 #include <linux/mempool.h>
  16 #include <linux/gfs2_ondisk.h>
  17 #include <linux/bio.h>
  18 #include <linux/fs.h>
  19 #include <linux/list_sort.h>
  20
  21 #include "gfs2.h"
  22 #include "incore.h"
  23 #include "inode.h"
  24 #include "glock.h"
  25 #include "log.h"
  26 #include "lops.h"
  27 #include "meta_io.h"
  28 #include "recovery.h"
  29 #include "rgrp.h"
  30 #include "trans.h"
  31 #include "util.h"
  32 #include "trace_gfs2.h"
  33
  34 /**
  35  * gfs2_pin - Pin a buffer in memory
  36  * @sdp: The superblock
  37  * @bh: The buffer to be pinned
  38  *
  39  * The log lock must be held when calling this function
  40  */
  41 void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
  42 {
  43         struct gfs2_bufdata *bd;
  44
  45         BUG_ON(!current->journal_info);
  46
  47         clear_buffer_dirty(bh);
  48         if (test_set_buffer_pinned(bh))
  49                 gfs2_assert_withdraw(sdp, 0);
  50         if (!buffer_uptodate(bh))
  51                 gfs2_io_error_bh(sdp, bh);
  52         bd = bh->b_private;
  53         /* If this buffer is in the AIL and it has already been written
  54          * to in-place disk block, remove it from the AIL.
  55          */
  56         spin_lock(&sdp->sd_ail_lock);
  57         if (bd->bd_tr)
  58                 list_move(&bd->bd_ail_st_list, &bd->bd_tr->tr_ail2_list);
  59         spin_unlock(&sdp->sd_ail_lock);
  60         get_bh(bh);
  61         atomic_inc(&sdp->sd_log_pinned);
  62         trace_gfs2_pin(bd, 1);
  63 }
  64
  65 static bool buffer_is_rgrp(const struct gfs2_bufdata *bd)
  66 {
  67         return bd->bd_gl->gl_name.ln_type == LM_TYPE_RGRP;
  68 }
  69
  70 static void maybe_release_space(struct gfs2_bufdata *bd)
  71 {
  72         struct gfs2_glock *gl = bd->bd_gl;
  73         struct gfs2_sbd *sdp = gl->gl_sbd;
  74         struct gfs2_rgrpd *rgd = gl->gl_object;
  75         unsigned int index = bd->bd_bh->b_blocknr - gl->gl_name.ln_number;
  76         struct gfs2_bitmap *bi = rgd->rd_bits + index;
  77
  78         if (bi->bi_clone == 0)
  79                 return;
  80         if (sdp->sd_args.ar_discard)
  81                 gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi, 1, NULL);
  82         memcpy(bi->bi_clone + bi->bi_offset,
  83                bd->bd_bh->b_data + bi->bi_offset, bi->bi_len);
  84         clear_bit(GBF_FULL, &bi->bi_flags);
  85         rgd->rd_free_clone = rgd->rd_free;
  86         rgd->rd_extfail_pt = rgd->rd_free;
  87 }
  88
  89 /**
  90  * gfs2_unpin - Unpin a buffer
  91  * @sdp: the filesystem the buffer belongs to
  92  * @bh: The buffer to unpin
  93  * @ai:
  94  * @flags: The inode dirty flags
  95  *
  96  */
  97
  98 static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
  99                        struct gfs2_trans *tr)
 100 {
 101         struct gfs2_bufdata *bd = bh->b_private;
 102
 103         BUG_ON(!buffer_uptodate(bh));
 104         BUG_ON(!buffer_pinned(bh));
 105
 106         lock_buffer(bh);
 107         mark_buffer_dirty(bh);
 108         clear_buffer_pinned(bh);
 109
 110         if (buffer_is_rgrp(bd))
 111                 maybe_release_space(bd);
 112
 113         spin_lock(&sdp->sd_ail_lock);
 114         if (bd->bd_tr) {
 115                 list_del(&bd->bd_ail_st_list);
 116                 brelse(bh);
 117         } else {
 118                 struct gfs2_glock *gl = bd->bd_gl;
 119                 list_add(&bd->bd_ail_gl_list, &gl->gl_ail_list);
 120                 atomic_inc(&gl->gl_ail_count);
 121         }
 122         bd->bd_tr = tr;
 123         list_add(&bd->bd_ail_st_list, &tr->tr_ail1_list);
 124         spin_unlock(&sdp->sd_ail_lock);
 125
 126         clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
 127         trace_gfs2_pin(bd, 0);
 128         unlock_buffer(bh);
 129         atomic_dec(&sdp->sd_log_pinned);
 130 }
 131
 132 static void gfs2_log_incr_head(struct gfs2_sbd *sdp)
 133 {
 134         BUG_ON((sdp->sd_log_flush_head == sdp->sd_log_tail) &&
 135                (sdp->sd_log_flush_head != sdp->sd_log_head));
 136
 137         if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks) {
 138                 sdp->sd_log_flush_head = 0;
 139                 sdp->sd_log_flush_wrapped = 1;
 140         }
 141 }
 142
 143 static u64 gfs2_log_bmap(struct gfs2_sbd *sdp)
 144 {
 145         unsigned int lbn = sdp->sd_log_flush_head;
 146         struct gfs2_journal_extent *je;
 147         u64 block;
 148
 149         list_for_each_entry(je, &sdp->sd_jdesc->extent_list, extent_list) {
 150                 if (lbn >= je->lblock && lbn < je->lblock + je->blocks) {
 151                         block = je->dblock + lbn - je->lblock;
 152                         gfs2_log_incr_head(sdp);
 153                         return block;
 154                 }
 155         }
 156
 157         return -1;
 158 }
 159
 160 /**
 161  * gfs2_end_log_write_bh - end log write of pagecache data with buffers
 162  * @sdp: The superblock
 163  * @bvec: The bio_vec
 164  * @error: The i/o status
 165  *
 166  * This finds the relavent buffers and unlocks then and sets the
 167  * error flag according to the status of the i/o request. This is
 168  * used when the log is writing data which has an in-place version
 169  * that is pinned in the pagecache.
 170  */
 171
 172 static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp, struct bio_vec *bvec,
 173                                   int error)
 174 {
 175         struct buffer_head *bh, *next;
 176         struct page *page = bvec->bv_page;
 177         unsigned size;
 178
 179         bh = page_buffers(page);
 180         size = bvec->bv_len;
 181         while (bh_offset(bh) < bvec->bv_offset)
 182                 bh = bh->b_this_page;
 183         do {
 184                 if (error)
 185                         set_buffer_write_io_error(bh);
 186                 unlock_buffer(bh);
 187                 next = bh->b_this_page;
 188                 size -= bh->b_size;
 189                 brelse(bh);
 190                 bh = next;
 191         } while(bh && size);
 192 }
 193
 194 /**
 195  * gfs2_end_log_write - end of i/o to the log
 196  * @bio: The bio
 197  * @error: Status of i/o request
 198  *
 199  * Each bio_vec contains either data from the pagecache or data
 200  * relating to the log itself. Here we iterate over the bio_vec
 201  * array, processing both kinds of data.
 202  *
 203  */
 204
 205 static void gfs2_end_log_write(struct bio *bio, int error)
 206 {
 207         struct gfs2_sbd *sdp = bio->bi_private;
 208         struct bio_vec *bvec;
 209         struct page *page;
 210         int i;
 211
 212         if (error) {
 213                 sdp->sd_log_error = error;
 214                 fs_err(sdp, "Error %d writing to log\n", error);
 215         }
 216
 217         bio_for_each_segment_all(bvec, bio, i) {
 218                 page = bvec->bv_page;
 219                 if (page_has_buffers(page))
 220                         gfs2_end_log_write_bh(sdp, bvec, error);
 221                 else
 222                         mempool_free(page, gfs2_page_pool);
 223         }
 224
 225         bio_put(bio);
 226         if (atomic_dec_and_test(&sdp->sd_log_in_flight))
 227                 wake_up(&sdp->sd_log_flush_wait);
 228 }
 229
 230 /**
 231  * gfs2_log_flush_bio - Submit any pending log bio
 232  * @sdp: The superblock
 233  * @rw: The rw flags
 234  *
 235  * Submit any pending part-built or full bio to the block device. If
 236  * there is no pending bio, then this is a no-op.
 237  */
 238
 239 void gfs2_log_flush_bio(struct gfs2_sbd *sdp, int rw)
 240 {
 241         if (sdp->sd_log_bio) {
 242                 atomic_inc(&sdp->sd_log_in_flight);
 243                 submit_bio(rw, sdp->sd_log_bio);
 244                 sdp->sd_log_bio = NULL;
 245         }
 246 }
 247
 248 /**
 249  * gfs2_log_alloc_bio - Allocate a new bio for log writing
 250  * @sdp: The superblock
 251  * @blkno: The next device block number we want to write to
 252  *
 253  * This should never be called when there is a cached bio in the
 254  * super block. When it returns, there will be a cached bio in the
 255  * super block which will have as many bio_vecs as the device is
 256  * happy to handle.
 257  *
 258  * Returns: Newly allocated bio
 259  */
 260
 261 static struct bio *gfs2_log_alloc_bio(struct gfs2_sbd *sdp, u64 blkno)
 262 {
 263         struct super_block *sb = sdp->sd_vfs;
 264         unsigned nrvecs = bio_get_nr_vecs(sb->s_bdev);
 265         struct bio *bio;
 266
 267         BUG_ON(sdp->sd_log_bio);
 268
 269         while (1) {
 270                 bio = bio_alloc(GFP_NOIO, nrvecs);
 271                 if (likely(bio))
 272                         break;
 273                 nrvecs = max(nrvecs/2, 1U);
 274         }
 275
 276         bio->bi_iter.bi_sector = blkno * (sb->s_blocksize >> 9);
 277         bio->bi_bdev = sb->s_bdev;
 278         bio->bi_end_io = gfs2_end_log_write;
 279         bio->bi_private = sdp;
 280
 281         sdp->sd_log_bio = bio;
 282
 283         return bio;
 284 }
 285
 286 /**
 287  * gfs2_log_get_bio - Get cached log bio, or allocate a new one
 288  * @sdp: The superblock
 289  * @blkno: The device block number we want to write to
 290  *
 291  * If there is a cached bio, then if the next block number is sequential
 292  * with the previous one, return it, otherwise flush the bio to the
 293  * device. If there is not a cached bio, or we just flushed it, then
 294  * allocate a new one.
 295  *
 296  * Returns: The bio to use for log writes
 297  */
 298
 299 static struct bio *gfs2_log_get_bio(struct gfs2_sbd *sdp, u64 blkno)
 300 {
 301         struct bio *bio = sdp->sd_log_bio;
 302         u64 nblk;
 303
 304         if (bio) {
 305                 nblk = bio_end_sector(bio);
 306                 nblk >>= sdp->sd_fsb2bb_shift;
 307                 if (blkno == nblk)
 308                         return bio;
 309                 gfs2_log_flush_bio(sdp, WRITE);
 310         }
 311
 312         return gfs2_log_alloc_bio(sdp, blkno);
 313 }
 314
 315
 316 /**
 317  * gfs2_log_write - write to log
 318  * @sdp: the filesystem
 319  * @page: the page to write
 320  * @size: the size of the data to write
 321  * @offset: the offset within the page
 322  *
 323  * Try and add the page segment to the current bio. If that fails,
 324  * submit the current bio to the device and create a new one, and
 325  * then add the page segment to that.
 326  */
 327
 328 static void gfs2_log_write(struct gfs2_sbd *sdp, struct page *page,
 329                            unsigned size, unsigned offset)
 330 {
 331         u64 blkno = gfs2_log_bmap(sdp);
 332         struct bio *bio;
 333         int ret;
 334
 335         bio = gfs2_log_get_bio(sdp, blkno);
 336         ret = bio_add_page(bio, page, size, offset);
 337         if (ret == 0) {
 338                 gfs2_log_flush_bio(sdp, WRITE);
 339                 bio = gfs2_log_alloc_bio(sdp, blkno);
 340                 ret = bio_add_page(bio, page, size, offset);
 341                 WARN_ON(ret == 0);
 342         }
 343 }
 344
 345 /**
 346  * gfs2_log_write_bh - write a buffer's content to the log
 347  * @sdp: The super block
 348  * @bh: The buffer pointing to the in-place location
 349  *
 350  * This writes the content of the buffer to the next available location
 351  * in the log. The buffer will be unlocked once the i/o to the log has
 352  * completed.
 353  */
 354
 355 static void gfs2_log_write_bh(struct gfs2_sbd *sdp, struct buffer_head *bh)
 356 {
 357         gfs2_log_write(sdp, bh->b_page, bh->b_size, bh_offset(bh));
 358 }
 359
 360 /**
 361  * gfs2_log_write_page - write one block stored in a page, into the log
 362  * @sdp: The superblock
 363  * @page: The struct page
 364  *
 365  * This writes the first block-sized part of the page into the log. Note
 366  * that the page must have been allocated from the gfs2_page_pool mempool
 367  * and that after this has been called, ownership has been transferred and
 368  * the page may be freed at any time.
 369  */
 370
 371 void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page)
 372 {
 373         struct super_block *sb = sdp->sd_vfs;
 374         gfs2_log_write(sdp, page, sb->s_blocksize, 0);
 375 }
 376
 377 static struct page *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type,
 378                                       u32 ld_length, u32 ld_data1)
 379 {
 380         struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
 381         struct gfs2_log_descriptor *ld = page_address(page);
 382         clear_page(ld);
 383         ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
 384         ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD);
 385         ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD);
 386         ld->ld_type = cpu_to_be32(ld_type);
 387         ld->ld_length = cpu_to_be32(ld_length);
 388         ld->ld_data1 = cpu_to_be32(ld_data1);
 389         ld->ld_data2 = 0;
 390         return page;
 391 }
 392
 393 static void gfs2_check_magic(struct buffer_head *bh)
 394 {
 395         void *kaddr;
 396         __be32 *ptr;
 397
 398         clear_buffer_escaped(bh);
 399         kaddr = kmap_atomic(bh->b_page);
 400         ptr = kaddr + bh_offset(bh);
 401         if (*ptr == cpu_to_be32(GFS2_MAGIC))
 402                 set_buffer_escaped(bh);
 403         kunmap_atomic(kaddr);
 404 }
 405
 406 static int blocknr_cmp(void *priv, struct list_head *a, struct list_head *b)
 407 {
 408         struct gfs2_bufdata *bda, *bdb;
 409
 410         bda = list_entry(a, struct gfs2_bufdata, bd_list);
 411         bdb = list_entry(b, struct gfs2_bufdata, bd_list);
 412
 413         if (bda->bd_bh->b_blocknr < bdb->bd_bh->b_blocknr)
 414                 return -1;
 415         if (bda->bd_bh->b_blocknr > bdb->bd_bh->b_blocknr)
 416                 return 1;
 417         return 0;
 418 }
 419
 420 static void gfs2_before_commit(struct gfs2_sbd *sdp, unsigned int limit,
 421                                 unsigned int total, struct list_head *blist,
 422                                 bool is_databuf)
 423 {
 424         struct gfs2_log_descriptor *ld;
 425         struct gfs2_bufdata *bd1 = NULL, *bd2;
 426         struct page *page;
 427         unsigned int num;
 428         unsigned n;
 429         __be64 *ptr;
 430
 431         gfs2_log_lock(sdp);
 432         list_sort(NULL, blist, blocknr_cmp);
 433         bd1 = bd2 = list_prepare_entry(bd1, blist, bd_list);
 434         while(total) {
 435                 num = total;
 436                 if (total > limit)
 437                         num = limit;
 438                 gfs2_log_unlock(sdp);
 439                 page = gfs2_get_log_desc(sdp,
 440                                          is_databuf ? GFS2_LOG_DESC_JDATA :
 441                                          GFS2_LOG_DESC_METADATA, num + 1, num);
 442                 ld = page_address(page);
 443                 gfs2_log_lock(sdp);
 444                 ptr = (__be64 *)(ld + 1);
 445
 446                 n = 0;
 447                 list_for_each_entry_continue(bd1, blist, bd_list) {
 448                         *ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr);
 449                         if (is_databuf) {
 450                                 gfs2_check_magic(bd1->bd_bh);
 451                                 *ptr++ = cpu_to_be64(buffer_escaped(bd1->bd_bh) ? 1 : 0);
 452                         }
 453                         if (++n >= num)
 454                                 break;
 455                 }
 456
 457                 gfs2_log_unlock(sdp);
 458                 gfs2_log_write_page(sdp, page);
 459                 gfs2_log_lock(sdp);
 460
 461                 n = 0;
 462                 list_for_each_entry_continue(bd2, blist, bd_list) {
 463                         get_bh(bd2->bd_bh);
 464                         gfs2_log_unlock(sdp);
 465                         lock_buffer(bd2->bd_bh);
 466
 467                         if (buffer_escaped(bd2->bd_bh)) {
 468                                 void *kaddr;
 469                                 page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
 470                                 ptr = page_address(page);
 471                                 kaddr = kmap_atomic(bd2->bd_bh->b_page);
 472                                 memcpy(ptr, kaddr + bh_offset(bd2->bd_bh),
 473                                        bd2->bd_bh->b_size);
 474                                 kunmap_atomic(kaddr);
 475                                 *(__be32 *)ptr = 0;
 476                                 clear_buffer_escaped(bd2->bd_bh);
 477                                 unlock_buffer(bd2->bd_bh);
 478                                 brelse(bd2->bd_bh);
 479                                 gfs2_log_write_page(sdp, page);
 480                         } else {
 481                                 gfs2_log_write_bh(sdp, bd2->bd_bh);
 482                         }
 483                         gfs2_log_lock(sdp);
 484                         if (++n >= num)
 485                                 break;
 486                 }
 487
 488                 BUG_ON(total < num);
 489                 total -= num;
 490         }
 491         gfs2_log_unlock(sdp);
 492 }
 493
 494 static void buf_lo_before_commit(struct gfs2_sbd *sdp)
 495 {
 496         unsigned int limit = buf_limit(sdp); /* 503 for 4k blocks */
 497
 498         gfs2_before_commit(sdp, limit, sdp->sd_log_num_buf,
 499                            &sdp->sd_log_le_buf, 0);
 500 }
 501
 502 static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 503 {
 504         struct list_head *head = &sdp->sd_log_le_buf;
 505         struct gfs2_bufdata *bd;
 506
 507         if (tr == NULL) {
 508                 gfs2_assert(sdp, list_empty(head));
 509                 return;
 510         }
 511
 512         while (!list_empty(head)) {
 513                 bd = list_entry(head->next, struct gfs2_bufdata, bd_list);
 514                 list_del_init(&bd->bd_list);
 515                 sdp->sd_log_num_buf--;
 516
 517                 gfs2_unpin(sdp, bd->bd_bh, tr);
 518         }
 519         gfs2_assert_warn(sdp, !sdp->sd_log_num_buf);
 520 }
 521
 522 static void buf_lo_before_scan(struct gfs2_jdesc *jd,
 523                                struct gfs2_log_header_host *head, int pass)
 524 {
 525         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 526
 527         if (pass != 0)
 528                 return;
 529
 530         sdp->sd_found_blocks = 0;
 531         sdp->sd_replayed_blocks = 0;
 532 }
 533
 534 static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
 535                                 struct gfs2_log_descriptor *ld, __be64 *ptr,
 536                                 int pass)
 537 {
 538         struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
 539         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 540         struct gfs2_glock *gl = ip->i_gl;
 541         unsigned int blks = be32_to_cpu(ld->ld_data1);
 542         struct buffer_head *bh_log, *bh_ip;
 543         u64 blkno;
 544         int error = 0;
 545
 546         if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_METADATA)
 547                 return 0;
 548
 549         gfs2_replay_incr_blk(sdp, &start);
 550
 551         for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
 552                 blkno = be64_to_cpu(*ptr++);
 553
 554                 sdp->sd_found_blocks++;
 555
 556                 if (gfs2_revoke_check(sdp, blkno, start))
 557                         continue;
 558
 559                 error = gfs2_replay_read_block(jd, start, &bh_log);
 560                 if (error)
 561                         return error;
 562
 563                 bh_ip = gfs2_meta_new(gl, blkno);
 564                 memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
 565
 566                 if (gfs2_meta_check(sdp, bh_ip))
 567                         error = -EIO;
 568                 else
 569                         mark_buffer_dirty(bh_ip);
 570
 571                 brelse(bh_log);
 572                 brelse(bh_ip);
 573
 574                 if (error)
 575                         break;
 576
 577                 sdp->sd_replayed_blocks++;
 578         }
 579
 580         return error;
 581 }
 582
 583 /**
 584  * gfs2_meta_sync - Sync all buffers associated with a glock
 585  * @gl: The glock
 586  *
 587  */
 588
 589 static void gfs2_meta_sync(struct gfs2_glock *gl)
 590 {
 591         struct address_space *mapping = gfs2_glock2aspace(gl);
 592         struct gfs2_sbd *sdp = gl->gl_sbd;
 593         int error;
 594
 595         if (mapping == NULL)
 596                 mapping = &sdp->sd_aspace;
 597
 598         filemap_fdatawrite(mapping);
 599         error = filemap_fdatawait(mapping);
 600
 601         if (error)
 602                 gfs2_io_error(gl->gl_sbd);
 603 }
 604
 605 static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
 606 {
 607         struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
 608         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 609
 610         if (error) {
 611                 gfs2_meta_sync(ip->i_gl);
 612                 return;
 613         }
 614         if (pass != 1)
 615                 return;
 616
 617         gfs2_meta_sync(ip->i_gl);
 618
 619         fs_info(sdp, "jid=%u: Replayed %u of %u blocks\n",
 620                 jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks);
 621 }
 622
 623 static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
 624 {
 625         struct gfs2_meta_header *mh;
 626         unsigned int offset;
 627         struct list_head *head = &sdp->sd_log_le_revoke;
 628         struct gfs2_bufdata *bd;
 629         struct page *page;
 630         unsigned int length;
 631
 632         gfs2_write_revokes(sdp);
 633         if (!sdp->sd_log_num_revoke)
 634                 return;
 635
 636         length = gfs2_struct2blk(sdp, sdp->sd_log_num_revoke, sizeof(u64));
 637         page = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_REVOKE, length, sdp->sd_log_num_revoke);
 638         offset = sizeof(struct gfs2_log_descriptor);
 639
 640         list_for_each_entry(bd, head, bd_list) {
 641                 sdp->sd_log_num_revoke--;
 642
 643                 if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) {
 644
 645                         gfs2_log_write_page(sdp, page);
 646                         page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
 647                         mh = page_address(page);
 648                         clear_page(mh);
 649                         mh->mh_magic = cpu_to_be32(GFS2_MAGIC);
 650                         mh->mh_type = cpu_to_be32(GFS2_METATYPE_LB);
 651                         mh->mh_format = cpu_to_be32(GFS2_FORMAT_LB);
 652                         offset = sizeof(struct gfs2_meta_header);
 653                 }
 654
 655                 *(__be64 *)(page_address(page) + offset) = cpu_to_be64(bd->bd_blkno);
 656                 offset += sizeof(u64);
 657         }
 658         gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
 659
 660         gfs2_log_write_page(sdp, page);
 661 }
 662
 663 static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 664 {
 665         struct list_head *head = &sdp->sd_log_le_revoke;
 666         struct gfs2_bufdata *bd;
 667         struct gfs2_glock *gl;
 668
 669         while (!list_empty(head)) {
 670                 bd = list_entry(head->next, struct gfs2_bufdata, bd_list);
 671                 list_del_init(&bd->bd_list);
 672                 gl = bd->bd_gl;
 673                 atomic_dec(&gl->gl_revokes);
 674                 clear_bit(GLF_LFLUSH, &gl->gl_flags);
 675                 kmem_cache_free(gfs2_bufdata_cachep, bd);
 676         }
 677 }
 678
 679 static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
 680                                   struct gfs2_log_header_host *head, int pass)
 681 {
 682         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 683
 684         if (pass != 0)
 685                 return;
 686
 687         sdp->sd_found_revokes = 0;
 688         sdp->sd_replay_tail = head->lh_tail;
 689 }
 690
 691 static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
 692                                    struct gfs2_log_descriptor *ld, __be64 *ptr,
 693                                    int pass)
 694 {
 695         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 696         unsigned int blks = be32_to_cpu(ld->ld_length);
 697         unsigned int revokes = be32_to_cpu(ld->ld_data1);
 698         struct buffer_head *bh;
 699         unsigned int offset;
 700         u64 blkno;
 701         int first = 1;
 702         int error;
 703
 704         if (pass != 0 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_REVOKE)
 705                 return 0;
 706
 707         offset = sizeof(struct gfs2_log_descriptor);
 708
 709         for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
 710                 error = gfs2_replay_read_block(jd, start, &bh);
 711                 if (error)
 712                         return error;
 713
 714                 if (!first)
 715                         gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LB);
 716
 717                 while (offset + sizeof(u64) <= sdp->sd_sb.sb_bsize) {
 718                         blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset));
 719
 720                         error = gfs2_revoke_add(sdp, blkno, start);
 721                         if (error < 0) {
 722                                 brelse(bh);
 723                                 return error;
 724                         }
 725                         else if (error)
 726                                 sdp->sd_found_revokes++;
 727
 728                         if (!--revokes)
 729                                 break;
 730                         offset += sizeof(u64);
 731                 }
 732
 733                 brelse(bh);
 734                 offset = sizeof(struct gfs2_meta_header);
 735                 first = 0;
 736         }
 737
 738         return 0;
 739 }
 740
 741 static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
 742 {
 743         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 744
 745         if (error) {
 746                 gfs2_revoke_clean(sdp);
 747                 return;
 748         }
 749         if (pass != 1)
 750                 return;
 751
 752         fs_info(sdp, "jid=%u: Found %u revoke tags\n",
 753                 jd->jd_jid, sdp->sd_found_revokes);
 754
 755         gfs2_revoke_clean(sdp);
 756 }
 757
 758 /**
 759  * databuf_lo_before_commit - Scan the data buffers, writing as we go
 760  *
 761  */
 762
 763 static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
 764 {
 765         unsigned int limit = buf_limit(sdp) / 2;
 766
 767         gfs2_before_commit(sdp, limit, sdp->sd_log_num_databuf,
 768                            &sdp->sd_log_le_databuf, 1);
 769 }
 770
 771 static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
 772                                     struct gfs2_log_descriptor *ld,
 773                                     __be64 *ptr, int pass)
 774 {
 775         struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
 776         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 777         struct gfs2_glock *gl = ip->i_gl;
 778         unsigned int blks = be32_to_cpu(ld->ld_data1);
 779         struct buffer_head *bh_log, *bh_ip;
 780         u64 blkno;
 781         u64 esc;
 782         int error = 0;
 783
 784         if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA)
 785                 return 0;
 786
 787         gfs2_replay_incr_blk(sdp, &start);
 788         for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
 789                 blkno = be64_to_cpu(*ptr++);
 790                 esc = be64_to_cpu(*ptr++);
 791
 792                 sdp->sd_found_blocks++;
 793
 794                 if (gfs2_revoke_check(sdp, blkno, start))
 795                         continue;
 796
 797                 error = gfs2_replay_read_block(jd, start, &bh_log);
 798                 if (error)
 799                         return error;
 800
 801                 bh_ip = gfs2_meta_new(gl, blkno);
 802                 memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
 803
 804                 /* Unescape */
 805                 if (esc) {
 806                         __be32 *eptr = (__be32 *)bh_ip->b_data;
 807                         *eptr = cpu_to_be32(GFS2_MAGIC);
 808                 }
 809                 mark_buffer_dirty(bh_ip);
 810
 811                 brelse(bh_log);
 812                 brelse(bh_ip);
 813
 814                 sdp->sd_replayed_blocks++;
 815         }
 816
 817         return error;
 818 }
 819
 820 /* FIXME: sort out accounting for log blocks etc. */
 821
 822 static void databuf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
 823 {
 824         struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
 825         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 826
 827         if (error) {
 828                 gfs2_meta_sync(ip->i_gl);
 829                 return;
 830         }
 831         if (pass != 1)
 832                 return;
 833
 834         /* data sync? */
 835         gfs2_meta_sync(ip->i_gl);
 836
 837         fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n",
 838                 jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks);
 839 }
 840
 841 static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 842 {
 843         struct list_head *head = &sdp->sd_log_le_databuf;
 844         struct gfs2_bufdata *bd;
 845
 846         if (tr == NULL) {
 847                 gfs2_assert(sdp, list_empty(head));
 848                 return;
 849         }
 850
 851         while (!list_empty(head)) {
 852                 bd = list_entry(head->next, struct gfs2_bufdata, bd_list);
 853                 list_del_init(&bd->bd_list);
 854                 sdp->sd_log_num_databuf--;
 855                 gfs2_unpin(sdp, bd->bd_bh, tr);
 856         }
 857         gfs2_assert_warn(sdp, !sdp->sd_log_num_databuf);
 858 }
 859
 860
 861 const struct gfs2_log_operations gfs2_buf_lops = {
 862         .lo_before_commit = buf_lo_before_commit,
 863         .lo_after_commit = buf_lo_after_commit,
 864         .lo_before_scan = buf_lo_before_scan,
 865         .lo_scan_elements = buf_lo_scan_elements,
 866         .lo_after_scan = buf_lo_after_scan,
 867         .lo_name = "buf",
 868 };
 869
 870 const struct gfs2_log_operations gfs2_revoke_lops = {
 871         .lo_before_commit = revoke_lo_before_commit,
 872         .lo_after_commit = revoke_lo_after_commit,
 873         .lo_before_scan = revoke_lo_before_scan,
 874         .lo_scan_elements = revoke_lo_scan_elements,
 875         .lo_after_scan = revoke_lo_after_scan,
 876         .lo_name = "revoke",
 877 };
 878
 879 const struct gfs2_log_operations gfs2_databuf_lops = {
 880         .lo_before_commit = databuf_lo_before_commit,
 881         .lo_after_commit = databuf_lo_after_commit,
 882         .lo_scan_elements = databuf_lo_scan_elements,
 883         .lo_after_scan = databuf_lo_after_scan,
 884         .lo_name = "databuf",
 885 };
 886
 887 const struct gfs2_log_operations *gfs2_log_ops[] = {
 888         &gfs2_databuf_lops,
 889         &gfs2_buf_lops,
 890         &gfs2_revoke_lops,
 891         NULL,
 892 };
 893