]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - fs/gfs2/lops.c
GFS2: Clean up log write code path
[mirror_ubuntu-artful-kernel.git] / fs / gfs2 / lops.c
CommitLineData
b3b94faa
DT
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3a8a9a10 3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
b3b94faa
DT
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
e9fc2aa0 7 * of the GNU General Public License version 2.
b3b94faa
DT
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
75ca61c1 15#include <linux/mempool.h>
5c676f6d 16#include <linux/gfs2_ondisk.h>
c969f58c
SW
17#include <linux/bio.h>
18#include <linux/fs.h>
b3b94faa
DT
19
20#include "gfs2.h"
5c676f6d 21#include "incore.h"
2332c443 22#include "inode.h"
b3b94faa
DT
23#include "glock.h"
24#include "log.h"
25#include "lops.h"
26#include "meta_io.h"
27#include "recovery.h"
28#include "rgrp.h"
29#include "trans.h"
5c676f6d 30#include "util.h"
63997775 31#include "trace_gfs2.h"
b3b94faa 32
9b9107a5
SW
33/**
34 * gfs2_pin - Pin a buffer in memory
35 * @sdp: The superblock
36 * @bh: The buffer to be pinned
37 *
38 * The log lock must be held when calling this function
39 */
40static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
41{
42 struct gfs2_bufdata *bd;
43
29687a2a 44 BUG_ON(!current->journal_info);
9b9107a5
SW
45
46 clear_buffer_dirty(bh);
47 if (test_set_buffer_pinned(bh))
48 gfs2_assert_withdraw(sdp, 0);
49 if (!buffer_uptodate(bh))
50 gfs2_io_error_bh(sdp, bh);
51 bd = bh->b_private;
52 /* If this buffer is in the AIL and it has already been written
53 * to in-place disk block, remove it from the AIL.
54 */
c618e87a 55 spin_lock(&sdp->sd_ail_lock);
9b9107a5
SW
56 if (bd->bd_ail)
57 list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list);
c618e87a 58 spin_unlock(&sdp->sd_ail_lock);
9b9107a5 59 get_bh(bh);
5e687eac 60 atomic_inc(&sdp->sd_log_pinned);
63997775 61 trace_gfs2_pin(bd, 1);
9b9107a5
SW
62}
63
7c9ca621
BP
64static bool buffer_is_rgrp(const struct gfs2_bufdata *bd)
65{
66 return bd->bd_gl->gl_name.ln_type == LM_TYPE_RGRP;
67}
68
69static void maybe_release_space(struct gfs2_bufdata *bd)
70{
71 struct gfs2_glock *gl = bd->bd_gl;
72 struct gfs2_sbd *sdp = gl->gl_sbd;
73 struct gfs2_rgrpd *rgd = gl->gl_object;
74 unsigned int index = bd->bd_bh->b_blocknr - gl->gl_name.ln_number;
75 struct gfs2_bitmap *bi = rgd->rd_bits + index;
76
77 if (bi->bi_clone == 0)
78 return;
79 if (sdp->sd_args.ar_discard)
66fc061b 80 gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi, 1, NULL);
7c9ca621
BP
81 memcpy(bi->bi_clone + bi->bi_offset,
82 bd->bd_bh->b_data + bi->bi_offset, bi->bi_len);
83 clear_bit(GBF_FULL, &bi->bi_flags);
84 rgd->rd_free_clone = rgd->rd_free;
85}
86
9b9107a5
SW
87/**
88 * gfs2_unpin - Unpin a buffer
89 * @sdp: the filesystem the buffer belongs to
90 * @bh: The buffer to unpin
91 * @ai:
29687a2a 92 * @flags: The inode dirty flags
9b9107a5
SW
93 *
94 */
95
96static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
97 struct gfs2_ail *ai)
98{
99 struct gfs2_bufdata *bd = bh->b_private;
100
29687a2a
SW
101 BUG_ON(!buffer_uptodate(bh));
102 BUG_ON(!buffer_pinned(bh));
9b9107a5
SW
103
104 lock_buffer(bh);
105 mark_buffer_dirty(bh);
106 clear_buffer_pinned(bh);
107
7c9ca621
BP
108 if (buffer_is_rgrp(bd))
109 maybe_release_space(bd);
110
d6a079e8 111 spin_lock(&sdp->sd_ail_lock);
9b9107a5
SW
112 if (bd->bd_ail) {
113 list_del(&bd->bd_ail_st_list);
114 brelse(bh);
115 } else {
116 struct gfs2_glock *gl = bd->bd_gl;
117 list_add(&bd->bd_ail_gl_list, &gl->gl_ail_list);
118 atomic_inc(&gl->gl_ail_count);
119 }
120 bd->bd_ail = ai;
121 list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
d6a079e8
DC
122 spin_unlock(&sdp->sd_ail_lock);
123
29687a2a 124 clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
63997775 125 trace_gfs2_pin(bd, 0);
9b9107a5 126 unlock_buffer(bh);
5e687eac 127 atomic_dec(&sdp->sd_log_pinned);
9b9107a5
SW
128}
129
e8c92ed7 130static void gfs2_log_incr_head(struct gfs2_sbd *sdp)
16615be1 131{
e8c92ed7
SW
132 BUG_ON((sdp->sd_log_flush_head == sdp->sd_log_tail) &&
133 (sdp->sd_log_flush_head != sdp->sd_log_head));
134
135 if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks) {
136 sdp->sd_log_flush_head = 0;
137 sdp->sd_log_flush_wrapped = 1;
138 }
16615be1
SW
139}
140
e8c92ed7 141static u64 gfs2_log_bmap(struct gfs2_sbd *sdp)
16615be1 142{
e8c92ed7
SW
143 unsigned int lbn = sdp->sd_log_flush_head;
144 struct gfs2_journal_extent *je;
145 u64 block;
146
147 list_for_each_entry(je, &sdp->sd_jdesc->extent_list, extent_list) {
148 if (lbn >= je->lblock && lbn < je->lblock + je->blocks) {
149 block = je->dblock + lbn - je->lblock;
150 gfs2_log_incr_head(sdp);
151 return block;
152 }
153 }
154
155 return -1;
16615be1
SW
156}
157
e8c92ed7
SW
158/**
159 * gfs2_end_log_write_bh - end log write of pagecache data with buffers
160 * @sdp: The superblock
161 * @bvec: The bio_vec
162 * @error: The i/o status
163 *
164 * This finds the relavent buffers and unlocks then and sets the
165 * error flag according to the status of the i/o request. This is
166 * used when the log is writing data which has an in-place version
167 * that is pinned in the pagecache.
168 */
169
170static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp, struct bio_vec *bvec,
171 int error)
16615be1 172{
e8c92ed7
SW
173 struct buffer_head *bh, *next;
174 struct page *page = bvec->bv_page;
175 unsigned size;
176
177 bh = page_buffers(page);
178 size = bvec->bv_len;
179 while (bh_offset(bh) < bvec->bv_offset)
180 bh = bh->b_this_page;
181 do {
182 if (error)
183 set_buffer_write_io_error(bh);
184 unlock_buffer(bh);
185 next = bh->b_this_page;
186 size -= bh->b_size;
187 brelse(bh);
188 bh = next;
189 } while(bh && size);
16615be1
SW
190}
191
47ac5537 192/**
e8c92ed7
SW
193 * gfs2_end_log_write - end of i/o to the log
194 * @bio: The bio
195 * @error: Status of i/o request
196 *
197 * Each bio_vec contains either data from the pagecache or data
198 * relating to the log itself. Here we iterate over the bio_vec
199 * array, processing both kinds of data.
47ac5537
SW
200 *
201 */
202
e8c92ed7 203static void gfs2_end_log_write(struct bio *bio, int error)
47ac5537 204{
e8c92ed7
SW
205 struct gfs2_sbd *sdp = bio->bi_private;
206 struct bio_vec *bvec;
207 struct page *page;
208 int i;
209
210 if (error) {
211 sdp->sd_log_error = error;
212 fs_err(sdp, "Error %d writing to log\n", error);
213 }
214
215 bio_for_each_segment(bvec, bio, i) {
216 page = bvec->bv_page;
217 if (page_has_buffers(page))
218 gfs2_end_log_write_bh(sdp, bvec, error);
219 else
220 mempool_free(page, gfs2_page_pool);
221 }
47ac5537 222
e8c92ed7 223 bio_put(bio);
47ac5537
SW
224 if (atomic_dec_and_test(&sdp->sd_log_in_flight))
225 wake_up(&sdp->sd_log_flush_wait);
226}
227
228/**
e8c92ed7
SW
229 * gfs2_log_flush_bio - Submit any pending log bio
230 * @sdp: The superblock
231 * @rw: The rw flags
47ac5537 232 *
e8c92ed7
SW
233 * Submit any pending part-built or full bio to the block device. If
234 * there is no pending bio, then this is a no-op.
47ac5537
SW
235 */
236
e8c92ed7 237void gfs2_log_flush_bio(struct gfs2_sbd *sdp, int rw)
47ac5537 238{
e8c92ed7
SW
239 if (sdp->sd_log_bio) {
240 atomic_inc(&sdp->sd_log_in_flight);
241 submit_bio(rw, sdp->sd_log_bio);
242 sdp->sd_log_bio = NULL;
243 }
244}
47ac5537 245
e8c92ed7
SW
246/**
247 * gfs2_log_alloc_bio - Allocate a new bio for log writing
248 * @sdp: The superblock
249 * @blkno: The next device block number we want to write to
250 *
251 * This should never be called when there is a cached bio in the
252 * super block. When it returns, there will be a cached bio in the
253 * super block which will have as many bio_vecs as the device is
254 * happy to handle.
255 *
256 * Returns: Newly allocated bio
257 */
258
259static struct bio *gfs2_log_alloc_bio(struct gfs2_sbd *sdp, u64 blkno)
260{
261 struct super_block *sb = sdp->sd_vfs;
262 unsigned nrvecs = bio_get_nr_vecs(sb->s_bdev);
263 struct bio *bio;
264
265 BUG_ON(sdp->sd_log_bio);
266
267 while (1) {
268 bio = bio_alloc(GFP_NOIO, nrvecs);
269 if (likely(bio))
270 break;
271 nrvecs = max(nrvecs/2, 1U);
272 }
47ac5537 273
e8c92ed7
SW
274 bio->bi_sector = blkno * (sb->s_blocksize >> 9);
275 bio->bi_bdev = sb->s_bdev;
276 bio->bi_end_io = gfs2_end_log_write;
277 bio->bi_private = sdp;
278
279 sdp->sd_log_bio = bio;
280
281 return bio;
47ac5537
SW
282}
283
284/**
e8c92ed7
SW
285 * gfs2_log_get_bio - Get cached log bio, or allocate a new one
286 * @sdp: The superblock
287 * @blkno: The device block number we want to write to
288 *
289 * If there is a cached bio, then if the next block number is sequential
290 * with the previous one, return it, otherwise flush the bio to the
291 * device. If there is not a cached bio, or we just flushed it, then
292 * allocate a new one.
47ac5537 293 *
e8c92ed7 294 * Returns: The bio to use for log writes
47ac5537
SW
295 */
296
e8c92ed7 297static struct bio *gfs2_log_get_bio(struct gfs2_sbd *sdp, u64 blkno)
47ac5537 298{
e8c92ed7
SW
299 struct bio *bio = sdp->sd_log_bio;
300 u64 nblk;
301
302 if (bio) {
303 nblk = bio->bi_sector + bio_sectors(bio);
304 nblk >>= sdp->sd_fsb2bb_shift;
305 if (blkno == nblk)
306 return bio;
307 gfs2_log_flush_bio(sdp, WRITE);
308 }
309
310 return gfs2_log_alloc_bio(sdp, blkno);
47ac5537
SW
311}
312
e8c92ed7 313
47ac5537 314/**
e8c92ed7 315 * gfs2_log_write - write to log
47ac5537 316 * @sdp: the filesystem
e8c92ed7
SW
317 * @page: the page to write
318 * @size: the size of the data to write
319 * @offset: the offset within the page
47ac5537 320 *
e8c92ed7
SW
321 * Try and add the page segment to the current bio. If that fails,
322 * submit the current bio to the device and create a new one, and
323 * then add the page segment to that.
47ac5537
SW
324 */
325
e8c92ed7
SW
326static void gfs2_log_write(struct gfs2_sbd *sdp, struct page *page,
327 unsigned size, unsigned offset)
47ac5537 328{
e8c92ed7
SW
329 u64 blkno = gfs2_log_bmap(sdp);
330 struct bio *bio;
331 int ret;
332
333 bio = gfs2_log_get_bio(sdp, blkno);
334 ret = bio_add_page(bio, page, size, offset);
335 if (ret == 0) {
336 gfs2_log_flush_bio(sdp, WRITE);
337 bio = gfs2_log_alloc_bio(sdp, blkno);
338 ret = bio_add_page(bio, page, size, offset);
339 WARN_ON(ret == 0);
340 }
341}
47ac5537 342
e8c92ed7
SW
343/**
344 * gfs2_log_write_bh - write a buffer's content to the log
345 * @sdp: The super block
346 * @bh: The buffer pointing to the in-place location
347 *
348 * This writes the content of the buffer to the next available location
349 * in the log. The buffer will be unlocked once the i/o to the log has
350 * completed.
351 */
352
353static void gfs2_log_write_bh(struct gfs2_sbd *sdp, struct buffer_head *bh)
354{
355 gfs2_log_write(sdp, bh->b_page, bh->b_size, bh_offset(bh));
356}
47ac5537 357
e8c92ed7
SW
358/**
359 * gfs2_log_write_page - write one block stored in a page, into the log
360 * @sdp: The superblock
361 * @page: The struct page
362 *
363 * This writes the first block-sized part of the page into the log. Note
364 * that the page must have been allocated from the gfs2_page_pool mempool
365 * and that after this has been called, ownership has been transferred and
366 * the page may be freed at any time.
367 */
47ac5537 368
e8c92ed7
SW
369void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page)
370{
371 struct super_block *sb = sdp->sd_vfs;
372 gfs2_log_write(sdp, page, sb->s_blocksize, 0);
47ac5537 373}
16615be1 374
e8c92ed7 375static struct page *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type)
16615be1 376{
e8c92ed7
SW
377 void *page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
378 struct gfs2_log_descriptor *ld = page_address(page);
379 clear_page(ld);
16615be1
SW
380 ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
381 ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD);
382 ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD);
383 ld->ld_type = cpu_to_be32(ld_type);
384 ld->ld_length = 0;
385 ld->ld_data1 = 0;
386 ld->ld_data2 = 0;
e8c92ed7 387 return page;
16615be1
SW
388}
389
b3b94faa
DT
390static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
391{
392 struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
0ab7d13f 393 struct gfs2_meta_header *mh;
b3b94faa
DT
394 struct gfs2_trans *tr;
395
9b9107a5 396 lock_buffer(bd->bd_bh);
8bd95727 397 gfs2_log_lock(sdp);
9b9107a5
SW
398 if (!list_empty(&bd->bd_list_tr))
399 goto out;
5c676f6d 400 tr = current->journal_info;
b3b94faa
DT
401 tr->tr_touched = 1;
402 tr->tr_num_buf++;
403 list_add(&bd->bd_list_tr, &tr->tr_list_buf);
b3b94faa 404 if (!list_empty(&le->le_list))
9b9107a5 405 goto out;
2bcd610d
SW
406 set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
407 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
b3b94faa 408 gfs2_meta_check(sdp, bd->bd_bh);
a98ab220 409 gfs2_pin(sdp, bd->bd_bh);
0ab7d13f
SW
410 mh = (struct gfs2_meta_header *)bd->bd_bh->b_data;
411 mh->__pad0 = cpu_to_be64(0);
412 mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
b3b94faa
DT
413 sdp->sd_log_num_buf++;
414 list_add(&le->le_list, &sdp->sd_log_le_buf);
b3b94faa 415 tr->tr_num_buf_new++;
9b9107a5
SW
416out:
417 gfs2_log_unlock(sdp);
418 unlock_buffer(bd->bd_bh);
b3b94faa
DT
419}
420
b3b94faa
DT
421static void buf_lo_before_commit(struct gfs2_sbd *sdp)
422{
b3b94faa
DT
423 struct gfs2_log_descriptor *ld;
424 struct gfs2_bufdata *bd1 = NULL, *bd2;
e8c92ed7 425 struct page *page;
905d2aef 426 unsigned int total;
b3b94faa
DT
427 unsigned int limit;
428 unsigned int num;
429 unsigned n;
430 __be64 *ptr;
431
2332c443 432 limit = buf_limit(sdp);
b3b94faa
DT
433 /* for 4k blocks, limit = 503 */
434
905d2aef
BP
435 gfs2_log_lock(sdp);
436 total = sdp->sd_log_num_buf;
b3b94faa
DT
437 bd1 = bd2 = list_prepare_entry(bd1, &sdp->sd_log_le_buf, bd_le.le_list);
438 while(total) {
439 num = total;
440 if (total > limit)
441 num = limit;
905d2aef 442 gfs2_log_unlock(sdp);
e8c92ed7
SW
443 page = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_METADATA);
444 ld = page_address(page);
905d2aef 445 gfs2_log_lock(sdp);
e8c92ed7 446 ptr = (__be64 *)(ld + 1);
b3b94faa
DT
447 ld->ld_length = cpu_to_be32(num + 1);
448 ld->ld_data1 = cpu_to_be32(num);
b3b94faa
DT
449
450 n = 0;
568f4c96
SW
451 list_for_each_entry_continue(bd1, &sdp->sd_log_le_buf,
452 bd_le.le_list) {
b3b94faa
DT
453 *ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr);
454 if (++n >= num)
455 break;
456 }
457
905d2aef 458 gfs2_log_unlock(sdp);
e8c92ed7 459 gfs2_log_write_page(sdp, page);
905d2aef 460 gfs2_log_lock(sdp);
b3b94faa
DT
461
462 n = 0;
568f4c96
SW
463 list_for_each_entry_continue(bd2, &sdp->sd_log_le_buf,
464 bd_le.le_list) {
16615be1 465 get_bh(bd2->bd_bh);
905d2aef 466 gfs2_log_unlock(sdp);
16615be1 467 lock_buffer(bd2->bd_bh);
e8c92ed7 468 gfs2_log_write_bh(sdp, bd2->bd_bh);
905d2aef 469 gfs2_log_lock(sdp);
b3b94faa
DT
470 if (++n >= num)
471 break;
472 }
473
905d2aef 474 BUG_ON(total < num);
b3b94faa
DT
475 total -= num;
476 }
905d2aef 477 gfs2_log_unlock(sdp);
b3b94faa
DT
478}
479
480static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
481{
482 struct list_head *head = &sdp->sd_log_le_buf;
483 struct gfs2_bufdata *bd;
484
485 while (!list_empty(head)) {
486 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
487 list_del_init(&bd->bd_le.le_list);
488 sdp->sd_log_num_buf--;
489
a98ab220 490 gfs2_unpin(sdp, bd->bd_bh, ai);
b3b94faa
DT
491 }
492 gfs2_assert_warn(sdp, !sdp->sd_log_num_buf);
493}
494
495static void buf_lo_before_scan(struct gfs2_jdesc *jd,
55167622 496 struct gfs2_log_header_host *head, int pass)
b3b94faa 497{
feaa7bba 498 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
b3b94faa
DT
499
500 if (pass != 0)
501 return;
502
503 sdp->sd_found_blocks = 0;
504 sdp->sd_replayed_blocks = 0;
505}
506
507static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
508 struct gfs2_log_descriptor *ld, __be64 *ptr,
509 int pass)
510{
feaa7bba
SW
511 struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
512 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
5c676f6d 513 struct gfs2_glock *gl = ip->i_gl;
b3b94faa
DT
514 unsigned int blks = be32_to_cpu(ld->ld_data1);
515 struct buffer_head *bh_log, *bh_ip;
cd915493 516 u64 blkno;
b3b94faa
DT
517 int error = 0;
518
519 if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_METADATA)
520 return 0;
521
522 gfs2_replay_incr_blk(sdp, &start);
523
524 for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
525 blkno = be64_to_cpu(*ptr++);
526
527 sdp->sd_found_blocks++;
528
529 if (gfs2_revoke_check(sdp, blkno, start))
530 continue;
531
532 error = gfs2_replay_read_block(jd, start, &bh_log);
82ffa516
SW
533 if (error)
534 return error;
b3b94faa
DT
535
536 bh_ip = gfs2_meta_new(gl, blkno);
537 memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
538
539 if (gfs2_meta_check(sdp, bh_ip))
540 error = -EIO;
541 else
542 mark_buffer_dirty(bh_ip);
543
544 brelse(bh_log);
545 brelse(bh_ip);
546
547 if (error)
548 break;
549
550 sdp->sd_replayed_blocks++;
551 }
552
553 return error;
554}
555
556static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
557{
feaa7bba
SW
558 struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
559 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
b3b94faa
DT
560
561 if (error) {
7276b3b0 562 gfs2_meta_sync(ip->i_gl);
b3b94faa
DT
563 return;
564 }
565 if (pass != 1)
566 return;
567
7276b3b0 568 gfs2_meta_sync(ip->i_gl);
b3b94faa
DT
569
570 fs_info(sdp, "jid=%u: Replayed %u of %u blocks\n",
571 jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks);
572}
573
574static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
575{
f42ab085
SW
576 struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
577 struct gfs2_glock *gl = bd->bd_gl;
b3b94faa
DT
578 struct gfs2_trans *tr;
579
5c676f6d 580 tr = current->journal_info;
b3b94faa
DT
581 tr->tr_touched = 1;
582 tr->tr_num_revoke++;
b3b94faa 583 sdp->sd_log_num_revoke++;
f42ab085
SW
584 atomic_inc(&gl->gl_revokes);
585 set_bit(GLF_LFLUSH, &gl->gl_flags);
b3b94faa 586 list_add(&le->le_list, &sdp->sd_log_le_revoke);
b3b94faa
DT
587}
588
589static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
590{
591 struct gfs2_log_descriptor *ld;
592 struct gfs2_meta_header *mh;
b3b94faa
DT
593 unsigned int offset;
594 struct list_head *head = &sdp->sd_log_le_revoke;
82e86087 595 struct gfs2_bufdata *bd;
e8c92ed7 596 struct page *page;
b3b94faa
DT
597
598 if (!sdp->sd_log_num_revoke)
599 return;
600
e8c92ed7
SW
601 page = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_REVOKE);
602 ld = page_address(page);
568f4c96 603 ld->ld_length = cpu_to_be32(gfs2_struct2blk(sdp, sdp->sd_log_num_revoke,
cd915493 604 sizeof(u64)));
b3b94faa 605 ld->ld_data1 = cpu_to_be32(sdp->sd_log_num_revoke);
b3b94faa
DT
606 offset = sizeof(struct gfs2_log_descriptor);
607
f42ab085 608 list_for_each_entry(bd, head, bd_le.le_list) {
b3b94faa
DT
609 sdp->sd_log_num_revoke--;
610
cd915493 611 if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) {
b3b94faa 612
e8c92ed7
SW
613 gfs2_log_write_page(sdp, page);
614 page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
615 mh = page_address(page);
616 clear_page(mh);
b3b94faa 617 mh->mh_magic = cpu_to_be32(GFS2_MAGIC);
e3167ded
SW
618 mh->mh_type = cpu_to_be32(GFS2_METATYPE_LB);
619 mh->mh_format = cpu_to_be32(GFS2_FORMAT_LB);
b3b94faa
DT
620 offset = sizeof(struct gfs2_meta_header);
621 }
622
e8c92ed7 623 *(__be64 *)(page_address(page) + offset) = cpu_to_be64(bd->bd_blkno);
cd915493 624 offset += sizeof(u64);
b3b94faa
DT
625 }
626 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
627
e8c92ed7 628 gfs2_log_write_page(sdp, page);
b3b94faa
DT
629}
630
f42ab085
SW
631static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
632{
633 struct list_head *head = &sdp->sd_log_le_revoke;
634 struct gfs2_bufdata *bd;
635 struct gfs2_glock *gl;
636
637 while (!list_empty(head)) {
638 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
639 list_del_init(&bd->bd_le.le_list);
640 gl = bd->bd_gl;
641 atomic_dec(&gl->gl_revokes);
642 clear_bit(GLF_LFLUSH, &gl->gl_flags);
643 kmem_cache_free(gfs2_bufdata_cachep, bd);
644 }
645}
646
b3b94faa 647static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
55167622 648 struct gfs2_log_header_host *head, int pass)
b3b94faa 649{
feaa7bba 650 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
b3b94faa
DT
651
652 if (pass != 0)
653 return;
654
655 sdp->sd_found_revokes = 0;
656 sdp->sd_replay_tail = head->lh_tail;
657}
658
659static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
660 struct gfs2_log_descriptor *ld, __be64 *ptr,
661 int pass)
662{
feaa7bba 663 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
b3b94faa
DT
664 unsigned int blks = be32_to_cpu(ld->ld_length);
665 unsigned int revokes = be32_to_cpu(ld->ld_data1);
666 struct buffer_head *bh;
667 unsigned int offset;
cd915493 668 u64 blkno;
b3b94faa
DT
669 int first = 1;
670 int error;
671
672 if (pass != 0 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_REVOKE)
673 return 0;
674
675 offset = sizeof(struct gfs2_log_descriptor);
676
677 for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
678 error = gfs2_replay_read_block(jd, start, &bh);
679 if (error)
680 return error;
681
682 if (!first)
683 gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LB);
684
cd915493 685 while (offset + sizeof(u64) <= sdp->sd_sb.sb_bsize) {
b3b94faa
DT
686 blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset));
687
688 error = gfs2_revoke_add(sdp, blkno, start);
3ad62e87
BP
689 if (error < 0) {
690 brelse(bh);
b3b94faa 691 return error;
3ad62e87 692 }
b3b94faa
DT
693 else if (error)
694 sdp->sd_found_revokes++;
695
696 if (!--revokes)
697 break;
cd915493 698 offset += sizeof(u64);
b3b94faa
DT
699 }
700
701 brelse(bh);
702 offset = sizeof(struct gfs2_meta_header);
703 first = 0;
704 }
705
706 return 0;
707}
708
709static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
710{
feaa7bba 711 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
b3b94faa
DT
712
713 if (error) {
714 gfs2_revoke_clean(sdp);
715 return;
716 }
717 if (pass != 1)
718 return;
719
720 fs_info(sdp, "jid=%u: Found %u revoke tags\n",
721 jd->jd_jid, sdp->sd_found_revokes);
722
723 gfs2_revoke_clean(sdp);
724}
725
18ec7d5c
SW
726/**
727 * databuf_lo_add - Add a databuf to the transaction.
728 *
729 * This is used in two distinct cases:
730 * i) In ordered write mode
731 * We put the data buffer on a list so that we can ensure that its
732 * synced to disk at the right time
733 * ii) In journaled data mode
734 * We need to journal the data block in the same way as metadata in
735 * the functions above. The difference is that here we have a tag
736 * which is two __be64's being the block number (as per meta data)
737 * and a flag which says whether the data block needs escaping or
738 * not. This means we need a new log entry for each 251 or so data
739 * blocks, which isn't an enormous overhead but twice as much as
740 * for normal metadata blocks.
741 */
b3b94faa
DT
742static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
743{
18ec7d5c 744 struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
5c676f6d 745 struct gfs2_trans *tr = current->journal_info;
18ec7d5c 746 struct address_space *mapping = bd->bd_bh->b_page->mapping;
feaa7bba 747 struct gfs2_inode *ip = GFS2_I(mapping->host);
b3b94faa 748
9b9107a5 749 lock_buffer(bd->bd_bh);
8bd95727 750 gfs2_log_lock(sdp);
9ff8ec32
SW
751 if (tr) {
752 if (!list_empty(&bd->bd_list_tr))
753 goto out;
754 tr->tr_touched = 1;
755 if (gfs2_is_jdata(ip)) {
756 tr->tr_num_buf++;
757 list_add(&bd->bd_list_tr, &tr->tr_list_buf);
758 }
773ed1a0 759 }
2332c443 760 if (!list_empty(&le->le_list))
9b9107a5 761 goto out;
2332c443 762
2bcd610d
SW
763 set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
764 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
2332c443 765 if (gfs2_is_jdata(ip)) {
2332c443
RP
766 gfs2_pin(sdp, bd->bd_bh);
767 tr->tr_num_databuf_new++;
d7b616e2 768 sdp->sd_log_num_databuf++;
e5884636 769 list_add_tail(&le->le_list, &sdp->sd_log_le_databuf);
d7b616e2 770 } else {
e5884636 771 list_add_tail(&le->le_list, &sdp->sd_log_le_ordered);
9b9107a5 772 }
9b9107a5 773out:
b3b94faa 774 gfs2_log_unlock(sdp);
9b9107a5 775 unlock_buffer(bd->bd_bh);
b3b94faa
DT
776}
777
16615be1 778static void gfs2_check_magic(struct buffer_head *bh)
18ec7d5c 779{
18ec7d5c
SW
780 void *kaddr;
781 __be32 *ptr;
18ec7d5c 782
16615be1 783 clear_buffer_escaped(bh);
d9349285 784 kaddr = kmap_atomic(bh->b_page);
18ec7d5c
SW
785 ptr = kaddr + bh_offset(bh);
786 if (*ptr == cpu_to_be32(GFS2_MAGIC))
16615be1 787 set_buffer_escaped(bh);
d9349285 788 kunmap_atomic(kaddr);
18ec7d5c
SW
789}
790
e8c92ed7
SW
791static void gfs2_write_blocks(struct gfs2_sbd *sdp,
792 struct gfs2_log_descriptor *ld,
793 struct page *page,
16615be1
SW
794 struct list_head *list, struct list_head *done,
795 unsigned int n)
b3b94faa 796{
16615be1
SW
797 struct gfs2_bufdata *bd;
798 __be64 *ptr;
d7b616e2 799
e8c92ed7 800 if (!ld)
16615be1 801 return;
b3b94faa 802
16615be1
SW
803 ld->ld_length = cpu_to_be32(n + 1);
804 ld->ld_data1 = cpu_to_be32(n);
e8c92ed7 805 ptr = (__force __be64 *)(ld + 1);
16615be1 806
e8c92ed7 807 gfs2_log_write_page(sdp, page);
f55ab26a 808 gfs2_log_lock(sdp);
e8c92ed7 809 while (!list_empty(list)) {
16615be1
SW
810 bd = list_entry(list->next, struct gfs2_bufdata, bd_le.le_list);
811 list_move_tail(&bd->bd_le.le_list, done);
812 get_bh(bd->bd_bh);
f55ab26a 813 gfs2_log_unlock(sdp);
16615be1
SW
814 lock_buffer(bd->bd_bh);
815 if (buffer_escaped(bd->bd_bh)) {
816 void *kaddr;
e8c92ed7
SW
817 page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
818 ptr = page_address(page);
d9349285 819 kaddr = kmap_atomic(bd->bd_bh->b_page);
e8c92ed7
SW
820 memcpy(ptr, kaddr + bh_offset(bd->bd_bh),
821 bd->bd_bh->b_size);
d9349285 822 kunmap_atomic(kaddr);
e8c92ed7 823 *(__be32 *)ptr = 0;
16615be1
SW
824 clear_buffer_escaped(bd->bd_bh);
825 unlock_buffer(bd->bd_bh);
826 brelse(bd->bd_bh);
e8c92ed7 827 gfs2_log_write_page(sdp, page);
16615be1 828 } else {
e8c92ed7 829 gfs2_log_write_bh(sdp, bd->bd_bh);
18ec7d5c 830 }
e8c92ed7 831 n--;
f55ab26a 832 gfs2_log_lock(sdp);
16615be1
SW
833 }
834 gfs2_log_unlock(sdp);
e8c92ed7 835 BUG_ON(n != 0);
16615be1
SW
836}
837
838/**
839 * databuf_lo_before_commit - Scan the data buffers, writing as we go
840 *
841 */
842
843static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
844{
845 struct gfs2_bufdata *bd = NULL;
e8c92ed7
SW
846 struct gfs2_log_descriptor *ld = NULL;
847 struct page *page = NULL;
16615be1
SW
848 unsigned int n = 0;
849 __be64 *ptr = NULL, *end = NULL;
850 LIST_HEAD(processed);
851 LIST_HEAD(in_progress);
852
853 gfs2_log_lock(sdp);
854 while (!list_empty(&sdp->sd_log_le_databuf)) {
855 if (ptr == end) {
f55ab26a 856 gfs2_log_unlock(sdp);
e8c92ed7 857 gfs2_write_blocks(sdp, ld, page, &in_progress, &processed, n);
16615be1 858 n = 0;
e8c92ed7
SW
859 page = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_JDATA);
860 ld = page_address(page);
861 ptr = (__force __be64 *)(ld + 1);
862 end = (__force __be64 *)(page_address(page) + sdp->sd_vfs->s_blocksize);
863 end--;
f55ab26a 864 gfs2_log_lock(sdp);
16615be1 865 continue;
18ec7d5c 866 }
16615be1
SW
867 bd = list_entry(sdp->sd_log_le_databuf.next, struct gfs2_bufdata, bd_le.le_list);
868 list_move_tail(&bd->bd_le.le_list, &in_progress);
869 gfs2_check_magic(bd->bd_bh);
870 *ptr++ = cpu_to_be64(bd->bd_bh->b_blocknr);
e8c92ed7 871 *ptr++ = cpu_to_be64(buffer_escaped(bd->bd_bh) ? 1 : 0);
16615be1 872 n++;
b3b94faa 873 }
f55ab26a 874 gfs2_log_unlock(sdp);
e8c92ed7 875 gfs2_write_blocks(sdp, ld, page, &in_progress, &processed, n);
16615be1
SW
876 gfs2_log_lock(sdp);
877 list_splice(&processed, &sdp->sd_log_le_databuf);
878 gfs2_log_unlock(sdp);
18ec7d5c
SW
879}
880
881static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
882 struct gfs2_log_descriptor *ld,
883 __be64 *ptr, int pass)
884{
feaa7bba
SW
885 struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
886 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
5c676f6d 887 struct gfs2_glock *gl = ip->i_gl;
18ec7d5c
SW
888 unsigned int blks = be32_to_cpu(ld->ld_data1);
889 struct buffer_head *bh_log, *bh_ip;
cd915493
SW
890 u64 blkno;
891 u64 esc;
18ec7d5c
SW
892 int error = 0;
893
894 if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA)
895 return 0;
896
897 gfs2_replay_incr_blk(sdp, &start);
898 for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
899 blkno = be64_to_cpu(*ptr++);
900 esc = be64_to_cpu(*ptr++);
901
902 sdp->sd_found_blocks++;
903
904 if (gfs2_revoke_check(sdp, blkno, start))
905 continue;
906
907 error = gfs2_replay_read_block(jd, start, &bh_log);
908 if (error)
909 return error;
910
911 bh_ip = gfs2_meta_new(gl, blkno);
912 memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
913
914 /* Unescape */
915 if (esc) {
916 __be32 *eptr = (__be32 *)bh_ip->b_data;
917 *eptr = cpu_to_be32(GFS2_MAGIC);
918 }
919 mark_buffer_dirty(bh_ip);
920
921 brelse(bh_log);
922 brelse(bh_ip);
18ec7d5c
SW
923
924 sdp->sd_replayed_blocks++;
925 }
926
927 return error;
928}
929
930/* FIXME: sort out accounting for log blocks etc. */
931
932static void databuf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
933{
feaa7bba
SW
934 struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
935 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
18ec7d5c
SW
936
937 if (error) {
7276b3b0 938 gfs2_meta_sync(ip->i_gl);
18ec7d5c
SW
939 return;
940 }
941 if (pass != 1)
942 return;
943
944 /* data sync? */
7276b3b0 945 gfs2_meta_sync(ip->i_gl);
18ec7d5c
SW
946
947 fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n",
948 jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks);
949}
950
951static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
952{
953 struct list_head *head = &sdp->sd_log_le_databuf;
954 struct gfs2_bufdata *bd;
955
956 while (!list_empty(head)) {
957 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
b8e1aabf 958 list_del_init(&bd->bd_le.le_list);
18ec7d5c 959 sdp->sd_log_num_databuf--;
18ec7d5c 960 gfs2_unpin(sdp, bd->bd_bh, ai);
18ec7d5c 961 }
b3b94faa
DT
962 gfs2_assert_warn(sdp, !sdp->sd_log_num_databuf);
963}
964
18ec7d5c 965
b09e593d 966const struct gfs2_log_operations gfs2_buf_lops = {
b3b94faa 967 .lo_add = buf_lo_add,
b3b94faa
DT
968 .lo_before_commit = buf_lo_before_commit,
969 .lo_after_commit = buf_lo_after_commit,
970 .lo_before_scan = buf_lo_before_scan,
971 .lo_scan_elements = buf_lo_scan_elements,
972 .lo_after_scan = buf_lo_after_scan,
ea67eedb 973 .lo_name = "buf",
b3b94faa
DT
974};
975
b09e593d 976const struct gfs2_log_operations gfs2_revoke_lops = {
b3b94faa
DT
977 .lo_add = revoke_lo_add,
978 .lo_before_commit = revoke_lo_before_commit,
f42ab085 979 .lo_after_commit = revoke_lo_after_commit,
b3b94faa
DT
980 .lo_before_scan = revoke_lo_before_scan,
981 .lo_scan_elements = revoke_lo_scan_elements,
982 .lo_after_scan = revoke_lo_after_scan,
ea67eedb 983 .lo_name = "revoke",
b3b94faa
DT
984};
985
b09e593d 986const struct gfs2_log_operations gfs2_rg_lops = {
ea67eedb 987 .lo_name = "rg",
b3b94faa
DT
988};
989
b09e593d 990const struct gfs2_log_operations gfs2_databuf_lops = {
b3b94faa
DT
991 .lo_add = databuf_lo_add,
992 .lo_before_commit = databuf_lo_before_commit,
18ec7d5c
SW
993 .lo_after_commit = databuf_lo_after_commit,
994 .lo_scan_elements = databuf_lo_scan_elements,
995 .lo_after_scan = databuf_lo_after_scan,
ea67eedb 996 .lo_name = "databuf",
b3b94faa
DT
997};
998
b09e593d 999const struct gfs2_log_operations *gfs2_log_ops[] = {
16615be1 1000 &gfs2_databuf_lops,
b3b94faa 1001 &gfs2_buf_lops,
b3b94faa 1002 &gfs2_rg_lops,
16615be1 1003 &gfs2_revoke_lops,
ea67eedb 1004 NULL,
b3b94faa
DT
1005};
1006