]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - fs/gfs2/bmap.c
Merge tag 'scsi-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi
[mirror_ubuntu-jammy-kernel.git] / fs / gfs2 / bmap.c
CommitLineData
7336d0e6 1// SPDX-License-Identifier: GPL-2.0-only
b3b94faa
DT
2/*
3 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3a8a9a10 4 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
b3b94faa
DT
5 */
6
b3b94faa
DT
7#include <linux/spinlock.h>
8#include <linux/completion.h>
9#include <linux/buffer_head.h>
64dd153c 10#include <linux/blkdev.h>
5c676f6d 11#include <linux/gfs2_ondisk.h>
71b86f56 12#include <linux/crc32.h>
3974320c 13#include <linux/iomap.h>
98583b3e 14#include <linux/ktime.h>
b3b94faa
DT
15
16#include "gfs2.h"
5c676f6d 17#include "incore.h"
b3b94faa
DT
18#include "bmap.h"
19#include "glock.h"
20#include "inode.h"
b3b94faa 21#include "meta_io.h"
b3b94faa
DT
22#include "quota.h"
23#include "rgrp.h"
45138990 24#include "log.h"
4c16c36a 25#include "super.h"
b3b94faa 26#include "trans.h"
18ec7d5c 27#include "dir.h"
5c676f6d 28#include "util.h"
64bc06bb 29#include "aops.h"
63997775 30#include "trace_gfs2.h"
b3b94faa
DT
31
32/* This doesn't need to be that large as max 64 bit pointers in a 4k
33 * block is 512, so __u16 is fine for that. It saves stack space to
34 * keep it small.
35 */
36struct metapath {
dbac6710 37 struct buffer_head *mp_bh[GFS2_MAX_META_HEIGHT];
b3b94faa 38 __u16 mp_list[GFS2_MAX_META_HEIGHT];
5f8bd444
BP
39 int mp_fheight; /* find_metapath height */
40 int mp_aheight; /* actual height (lookup height) */
b3b94faa
DT
41};
42
64bc06bb
AG
43static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length);
44
f25ef0c1
SW
45/**
46 * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page
47 * @ip: the inode
48 * @dibh: the dinode buffer
49 * @block: the block number that was allocated
ff8f33c8 50 * @page: The (optional) page. This is looked up if @page is NULL
f25ef0c1
SW
51 *
52 * Returns: errno
53 */
54
55static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
cd915493 56 u64 block, struct page *page)
f25ef0c1 57{
f25ef0c1
SW
58 struct inode *inode = &ip->i_inode;
59 struct buffer_head *bh;
60 int release = 0;
61
62 if (!page || page->index) {
220cca2a 63 page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS);
f25ef0c1
SW
64 if (!page)
65 return -ENOMEM;
66 release = 1;
67 }
68
69 if (!PageUptodate(page)) {
70 void *kaddr = kmap(page);
602c89d2
SW
71 u64 dsize = i_size_read(inode);
72
235628c5
AG
73 if (dsize > gfs2_max_stuffed_size(ip))
74 dsize = gfs2_max_stuffed_size(ip);
f25ef0c1 75
602c89d2 76 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
09cbfeaf 77 memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
f25ef0c1
SW
78 kunmap(page);
79
80 SetPageUptodate(page);
81 }
82
83 if (!page_has_buffers(page))
47a9a527
FF
84 create_empty_buffers(page, BIT(inode->i_blkbits),
85 BIT(BH_Uptodate));
f25ef0c1
SW
86
87 bh = page_buffers(page);
88
89 if (!buffer_mapped(bh))
90 map_bh(bh, inode->i_sb, block);
91
92 set_buffer_uptodate(bh);
845802b1 93 if (gfs2_is_jdata(ip))
350a9b0a 94 gfs2_trans_add_data(ip->i_gl, bh);
845802b1
AG
95 else {
96 mark_buffer_dirty(bh);
97 gfs2_ordered_add_inode(ip);
98 }
f25ef0c1
SW
99
100 if (release) {
101 unlock_page(page);
09cbfeaf 102 put_page(page);
f25ef0c1
SW
103 }
104
105 return 0;
106}
107
b3b94faa
DT
108/**
109 * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big
110 * @ip: The GFS2 inode to unstuff
ff8f33c8 111 * @page: The (optional) page. This is looked up if the @page is NULL
b3b94faa
DT
112 *
113 * This routine unstuffs a dinode and returns it to a "normal" state such
114 * that the height can be grown in the traditional way.
115 *
116 * Returns: errno
117 */
118
f25ef0c1 119int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
b3b94faa
DT
120{
121 struct buffer_head *bh, *dibh;
48516ced 122 struct gfs2_dinode *di;
cd915493 123 u64 block = 0;
18ec7d5c 124 int isdir = gfs2_is_dir(ip);
b3b94faa
DT
125 int error;
126
127 down_write(&ip->i_rw_mutex);
128
129 error = gfs2_meta_inode_buffer(ip, &dibh);
130 if (error)
131 goto out;
907b9bce 132
a2e0f799 133 if (i_size_read(&ip->i_inode)) {
b3b94faa
DT
134 /* Get a free block, fill it with the stuffed data,
135 and write it out to disk */
136
b45e41d7 137 unsigned int n = 1;
6e87ed0f 138 error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL);
09010978
SW
139 if (error)
140 goto out_brelse;
18ec7d5c 141 if (isdir) {
fbb27873 142 gfs2_trans_remove_revoke(GFS2_SB(&ip->i_inode), block, 1);
61e085a8 143 error = gfs2_dir_get_new_buffer(ip, block, &bh);
b3b94faa
DT
144 if (error)
145 goto out_brelse;
48516ced 146 gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_meta_header),
b3b94faa
DT
147 dibh, sizeof(struct gfs2_dinode));
148 brelse(bh);
149 } else {
f25ef0c1 150 error = gfs2_unstuffer_page(ip, dibh, block, page);
b3b94faa
DT
151 if (error)
152 goto out_brelse;
153 }
154 }
155
156 /* Set up the pointer to the new block */
157
350a9b0a 158 gfs2_trans_add_meta(ip->i_gl, dibh);
48516ced 159 di = (struct gfs2_dinode *)dibh->b_data;
b3b94faa
DT
160 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
161
a2e0f799 162 if (i_size_read(&ip->i_inode)) {
48516ced 163 *(__be64 *)(di + 1) = cpu_to_be64(block);
77658aad
SW
164 gfs2_add_inode_blocks(&ip->i_inode, 1);
165 di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
b3b94faa
DT
166 }
167
ecc30c79 168 ip->i_height = 1;
48516ced 169 di->di_height = cpu_to_be16(1);
b3b94faa 170
a91ea69f 171out_brelse:
b3b94faa 172 brelse(dibh);
a91ea69f 173out:
b3b94faa 174 up_write(&ip->i_rw_mutex);
b3b94faa
DT
175 return error;
176}
177
b3b94faa
DT
178
179/**
180 * find_metapath - Find path through the metadata tree
9b8c81d1 181 * @sdp: The superblock
b3b94faa 182 * @block: The disk block to look up
07e23d68 183 * @mp: The metapath to return the result in
9b8c81d1 184 * @height: The pre-calculated height of the metadata tree
b3b94faa
DT
185 *
186 * This routine returns a struct metapath structure that defines a path
187 * through the metadata of inode "ip" to get to block "block".
188 *
189 * Example:
190 * Given: "ip" is a height 3 file, "offset" is 101342453, and this is a
191 * filesystem with a blocksize of 4096.
192 *
193 * find_metapath() would return a struct metapath structure set to:
07e23d68 194 * mp_fheight = 3, mp_list[0] = 0, mp_list[1] = 48, and mp_list[2] = 165.
b3b94faa
DT
195 *
196 * That means that in order to get to the block containing the byte at
197 * offset 101342453, we would load the indirect block pointed to by pointer
198 * 0 in the dinode. We would then load the indirect block pointed to by
199 * pointer 48 in that indirect block. We would then load the data block
200 * pointed to by pointer 165 in that indirect block.
201 *
202 * ----------------------------------------
203 * | Dinode | |
204 * | | 4|
205 * | |0 1 2 3 4 5 9|
206 * | | 6|
207 * ----------------------------------------
208 * |
209 * |
210 * V
211 * ----------------------------------------
212 * | Indirect Block |
213 * | 5|
214 * | 4 4 4 4 4 5 5 1|
215 * |0 5 6 7 8 9 0 1 2|
216 * ----------------------------------------
217 * |
218 * |
219 * V
220 * ----------------------------------------
221 * | Indirect Block |
222 * | 1 1 1 1 1 5|
223 * | 6 6 6 6 6 1|
224 * |0 3 4 5 6 7 2|
225 * ----------------------------------------
226 * |
227 * |
228 * V
229 * ----------------------------------------
230 * | Data block containing offset |
231 * | 101342453 |
232 * | |
233 * | |
234 * ----------------------------------------
235 *
236 */
237
9b8c81d1
SW
238static void find_metapath(const struct gfs2_sbd *sdp, u64 block,
239 struct metapath *mp, unsigned int height)
b3b94faa 240{
b3b94faa
DT
241 unsigned int i;
242
5f8bd444 243 mp->mp_fheight = height;
9b8c81d1 244 for (i = height; i--;)
7eabb77e 245 mp->mp_list[i] = do_div(block, sdp->sd_inptrs);
b3b94faa
DT
246}
247
5af4e7a0 248static inline unsigned int metapath_branch_start(const struct metapath *mp)
9b8c81d1 249{
5af4e7a0
BM
250 if (mp->mp_list[0] == 0)
251 return 2;
252 return 1;
9b8c81d1
SW
253}
254
d552a2b9 255/**
20cdc193 256 * metaptr1 - Return the first possible metadata pointer in a metapath buffer
d552a2b9
BP
257 * @height: The metadata height (0 = dinode)
258 * @mp: The metapath
259 */
260static inline __be64 *metaptr1(unsigned int height, const struct metapath *mp)
261{
262 struct buffer_head *bh = mp->mp_bh[height];
263 if (height == 0)
264 return ((__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)));
265 return ((__be64 *)(bh->b_data + sizeof(struct gfs2_meta_header)));
266}
267
b3b94faa
DT
268/**
269 * metapointer - Return pointer to start of metadata in a buffer
b3b94faa
DT
270 * @height: The metadata height (0 = dinode)
271 * @mp: The metapath
272 *
273 * Return a pointer to the block number of the next height of the metadata
274 * tree given a buffer containing the pointer to the current height of the
275 * metadata tree.
276 */
277
9b8c81d1 278static inline __be64 *metapointer(unsigned int height, const struct metapath *mp)
b3b94faa 279{
d552a2b9
BP
280 __be64 *p = metaptr1(height, mp);
281 return p + mp->mp_list[height];
b3b94faa
DT
282}
283
7841b9f0
AG
284static inline const __be64 *metaend(unsigned int height, const struct metapath *mp)
285{
286 const struct buffer_head *bh = mp->mp_bh[height];
287 return (const __be64 *)(bh->b_data + bh->b_size);
288}
289
290static void clone_metapath(struct metapath *clone, struct metapath *mp)
291{
292 unsigned int hgt;
293
294 *clone = *mp;
295 for (hgt = 0; hgt < mp->mp_aheight; hgt++)
296 get_bh(clone->mp_bh[hgt]);
297}
298
5cf26b1e 299static void gfs2_metapath_ra(struct gfs2_glock *gl, __be64 *start, __be64 *end)
b99b98dc 300{
b99b98dc
SW
301 const __be64 *t;
302
5cf26b1e 303 for (t = start; t < end; t++) {
c3ce5aa9
AG
304 struct buffer_head *rabh;
305
b99b98dc
SW
306 if (!*t)
307 continue;
308
309 rabh = gfs2_getbuf(gl, be64_to_cpu(*t), CREATE);
310 if (trylock_buffer(rabh)) {
311 if (!buffer_uptodate(rabh)) {
312 rabh->b_end_io = end_buffer_read_sync;
e477b24b
CL
313 submit_bh(REQ_OP_READ,
314 REQ_RAHEAD | REQ_META | REQ_PRIO,
315 rabh);
b99b98dc
SW
316 continue;
317 }
318 unlock_buffer(rabh);
319 }
320 brelse(rabh);
321 }
322}
323
e8b43fe0
AG
324static int __fillup_metapath(struct gfs2_inode *ip, struct metapath *mp,
325 unsigned int x, unsigned int h)
d552a2b9 326{
e8b43fe0
AG
327 for (; x < h; x++) {
328 __be64 *ptr = metapointer(x, mp);
329 u64 dblock = be64_to_cpu(*ptr);
330 int ret;
d552a2b9 331
e8b43fe0
AG
332 if (!dblock)
333 break;
334 ret = gfs2_meta_indirect_buffer(ip, x + 1, dblock, &mp->mp_bh[x + 1]);
335 if (ret)
336 return ret;
337 }
338 mp->mp_aheight = x + 1;
339 return 0;
d552a2b9
BP
340}
341
b3b94faa 342/**
9b8c81d1
SW
343 * lookup_metapath - Walk the metadata tree to a specific point
344 * @ip: The inode
b3b94faa 345 * @mp: The metapath
b3b94faa 346 *
9b8c81d1
SW
347 * Assumes that the inode's buffer has already been looked up and
348 * hooked onto mp->mp_bh[0] and that the metapath has been initialised
349 * by find_metapath().
350 *
351 * If this function encounters part of the tree which has not been
352 * allocated, it returns the current height of the tree at the point
353 * at which it found the unallocated block. Blocks which are found are
354 * added to the mp->mp_bh[] list.
b3b94faa 355 *
e8b43fe0 356 * Returns: error
b3b94faa
DT
357 */
358
9b8c81d1 359static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp)
11707ea0 360{
e8b43fe0 361 return __fillup_metapath(ip, mp, 0, ip->i_height - 1);
dbac6710
SW
362}
363
d552a2b9
BP
364/**
365 * fillup_metapath - fill up buffers for the metadata path to a specific height
366 * @ip: The inode
367 * @mp: The metapath
368 * @h: The height to which it should be mapped
369 *
370 * Similar to lookup_metapath, but does lookups for a range of heights
371 *
c3ce5aa9 372 * Returns: error or the number of buffers filled
d552a2b9
BP
373 */
374
375static int fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h)
376{
e8b43fe0 377 unsigned int x = 0;
c3ce5aa9 378 int ret;
d552a2b9
BP
379
380 if (h) {
381 /* find the first buffer we need to look up. */
e8b43fe0
AG
382 for (x = h - 1; x > 0; x--) {
383 if (mp->mp_bh[x])
384 break;
d552a2b9
BP
385 }
386 }
c3ce5aa9
AG
387 ret = __fillup_metapath(ip, mp, x, h);
388 if (ret)
389 return ret;
390 return mp->mp_aheight - x - 1;
d552a2b9
BP
391}
392
a27a0c9b
AG
393static sector_t metapath_to_block(struct gfs2_sbd *sdp, struct metapath *mp)
394{
395 sector_t factor = 1, block = 0;
396 int hgt;
397
398 for (hgt = mp->mp_fheight - 1; hgt >= 0; hgt--) {
399 if (hgt < mp->mp_aheight)
400 block += mp->mp_list[hgt] * factor;
401 factor *= sdp->sd_inptrs;
402 }
403 return block;
404}
405
64bc06bb 406static void release_metapath(struct metapath *mp)
dbac6710
SW
407{
408 int i;
409
9b8c81d1
SW
410 for (i = 0; i < GFS2_MAX_META_HEIGHT; i++) {
411 if (mp->mp_bh[i] == NULL)
412 break;
413 brelse(mp->mp_bh[i]);
64bc06bb 414 mp->mp_bh[i] = NULL;
9b8c81d1 415 }
11707ea0
SW
416}
417
30cbf189
SW
418/**
419 * gfs2_extent_length - Returns length of an extent of blocks
bcfe9413
AG
420 * @bh: The metadata block
421 * @ptr: Current position in @bh
422 * @limit: Max extent length to return
30cbf189
SW
423 * @eob: Set to 1 if we hit "end of block"
424 *
30cbf189
SW
425 * Returns: The length of the extent (minimum of one block)
426 */
427
bcfe9413 428static inline unsigned int gfs2_extent_length(struct buffer_head *bh, __be64 *ptr, size_t limit, int *eob)
30cbf189 429{
bcfe9413 430 const __be64 *end = (__be64 *)(bh->b_data + bh->b_size);
30cbf189
SW
431 const __be64 *first = ptr;
432 u64 d = be64_to_cpu(*ptr);
433
434 *eob = 0;
435 do {
436 ptr++;
437 if (ptr >= end)
438 break;
bcfe9413 439 d++;
30cbf189
SW
440 } while(be64_to_cpu(*ptr) == d);
441 if (ptr >= end)
442 *eob = 1;
bcfe9413 443 return ptr - first;
30cbf189
SW
444}
445
a27a0c9b
AG
446enum walker_status { WALK_STOP, WALK_FOLLOW, WALK_CONTINUE };
447
448/*
449 * gfs2_metadata_walker - walk an indirect block
450 * @mp: Metapath to indirect block
451 * @ptrs: Number of pointers to look at
452 *
453 * When returning WALK_FOLLOW, the walker must update @mp to point at the right
454 * indirect block to follow.
455 */
456typedef enum walker_status (*gfs2_metadata_walker)(struct metapath *mp,
457 unsigned int ptrs);
7841b9f0 458
a27a0c9b
AG
459/*
460 * gfs2_walk_metadata - walk a tree of indirect blocks
461 * @inode: The inode
462 * @mp: Starting point of walk
463 * @max_len: Maximum number of blocks to walk
464 * @walker: Called during the walk
465 *
466 * Returns 1 if the walk was stopped by @walker, 0 if we went past @max_len or
467 * past the end of metadata, and a negative error code otherwise.
468 */
7841b9f0 469
a27a0c9b
AG
470static int gfs2_walk_metadata(struct inode *inode, struct metapath *mp,
471 u64 max_len, gfs2_metadata_walker walker)
7841b9f0 472{
7841b9f0
AG
473 struct gfs2_inode *ip = GFS2_I(inode);
474 struct gfs2_sbd *sdp = GFS2_SB(inode);
7841b9f0
AG
475 u64 factor = 1;
476 unsigned int hgt;
a27a0c9b 477 int ret;
7841b9f0 478
a27a0c9b
AG
479 /*
480 * The walk starts in the lowest allocated indirect block, which may be
481 * before the position indicated by @mp. Adjust @max_len accordingly
482 * to avoid a short walk.
483 */
484 for (hgt = mp->mp_fheight - 1; hgt >= mp->mp_aheight; hgt--) {
485 max_len += mp->mp_list[hgt] * factor;
486 mp->mp_list[hgt] = 0;
7841b9f0 487 factor *= sdp->sd_inptrs;
a27a0c9b 488 }
7841b9f0
AG
489
490 for (;;) {
a27a0c9b
AG
491 u16 start = mp->mp_list[hgt];
492 enum walker_status status;
493 unsigned int ptrs;
494 u64 len;
7841b9f0
AG
495
496 /* Walk indirect block. */
a27a0c9b
AG
497 ptrs = (hgt >= 1 ? sdp->sd_inptrs : sdp->sd_diptrs) - start;
498 len = ptrs * factor;
499 if (len > max_len)
500 ptrs = DIV_ROUND_UP_ULL(max_len, factor);
501 status = walker(mp, ptrs);
502 switch (status) {
503 case WALK_STOP:
504 return 1;
505 case WALK_FOLLOW:
506 BUG_ON(mp->mp_aheight == mp->mp_fheight);
507 ptrs = mp->mp_list[hgt] - start;
508 len = ptrs * factor;
7841b9f0 509 break;
a27a0c9b 510 case WALK_CONTINUE:
7841b9f0 511 break;
7841b9f0 512 }
a27a0c9b
AG
513 if (len >= max_len)
514 break;
515 max_len -= len;
516 if (status == WALK_FOLLOW)
517 goto fill_up_metapath;
7841b9f0
AG
518
519lower_metapath:
520 /* Decrease height of metapath. */
7841b9f0
AG
521 brelse(mp->mp_bh[hgt]);
522 mp->mp_bh[hgt] = NULL;
a27a0c9b 523 mp->mp_list[hgt] = 0;
7841b9f0
AG
524 if (!hgt)
525 break;
526 hgt--;
527 factor *= sdp->sd_inptrs;
528
529 /* Advance in metadata tree. */
530 (mp->mp_list[hgt])++;
a27a0c9b 531 if (mp->mp_list[hgt] >= sdp->sd_inptrs) {
7841b9f0
AG
532 if (!hgt)
533 break;
534 goto lower_metapath;
535 }
536
537fill_up_metapath:
538 /* Increase height of metapath. */
7841b9f0
AG
539 ret = fillup_metapath(ip, mp, ip->i_height - 1);
540 if (ret < 0)
a27a0c9b 541 return ret;
7841b9f0
AG
542 hgt += ret;
543 for (; ret; ret--)
544 do_div(factor, sdp->sd_inptrs);
545 mp->mp_aheight = hgt + 1;
546 }
a27a0c9b 547 return 0;
7841b9f0
AG
548}
549
a27a0c9b
AG
550static enum walker_status gfs2_hole_walker(struct metapath *mp,
551 unsigned int ptrs)
7841b9f0 552{
a27a0c9b
AG
553 const __be64 *start, *ptr, *end;
554 unsigned int hgt;
555
556 hgt = mp->mp_aheight - 1;
557 start = metapointer(hgt, mp);
558 end = start + ptrs;
7841b9f0
AG
559
560 for (ptr = start; ptr < end; ptr++) {
561 if (*ptr) {
a27a0c9b 562 mp->mp_list[hgt] += ptr - start;
7841b9f0
AG
563 if (mp->mp_aheight == mp->mp_fheight)
564 return WALK_STOP;
a27a0c9b 565 return WALK_FOLLOW;
7841b9f0
AG
566 }
567 }
a27a0c9b 568 return WALK_CONTINUE;
7841b9f0
AG
569}
570
571/**
572 * gfs2_hole_size - figure out the size of a hole
573 * @inode: The inode
574 * @lblock: The logical starting block number
575 * @len: How far to look (in blocks)
576 * @mp: The metapath at lblock
577 * @iomap: The iomap to store the hole size in
578 *
579 * This function modifies @mp.
580 *
581 * Returns: errno on error
582 */
583static int gfs2_hole_size(struct inode *inode, sector_t lblock, u64 len,
584 struct metapath *mp, struct iomap *iomap)
585{
a27a0c9b
AG
586 struct metapath clone;
587 u64 hole_size;
588 int ret;
589
590 clone_metapath(&clone, mp);
591 ret = gfs2_walk_metadata(inode, &clone, len, gfs2_hole_walker);
592 if (ret < 0)
593 goto out;
7841b9f0 594
a27a0c9b
AG
595 if (ret == 1)
596 hole_size = metapath_to_block(GFS2_SB(inode), &clone) - lblock;
597 else
598 hole_size = len;
599 iomap->length = hole_size << inode->i_blkbits;
600 ret = 0;
601
602out:
603 release_metapath(&clone);
7841b9f0
AG
604 return ret;
605}
606
9b8c81d1
SW
607static inline __be64 *gfs2_indirect_init(struct metapath *mp,
608 struct gfs2_glock *gl, unsigned int i,
609 unsigned offset, u64 bn)
610{
611 __be64 *ptr = (__be64 *)(mp->mp_bh[i - 1]->b_data +
612 ((i > 1) ? sizeof(struct gfs2_meta_header) :
613 sizeof(struct gfs2_dinode)));
614 BUG_ON(i < 1);
615 BUG_ON(mp->mp_bh[i] != NULL);
616 mp->mp_bh[i] = gfs2_meta_new(gl, bn);
350a9b0a 617 gfs2_trans_add_meta(gl, mp->mp_bh[i]);
9b8c81d1
SW
618 gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN);
619 gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header));
620 ptr += offset;
621 *ptr = cpu_to_be64(bn);
622 return ptr;
623}
624
625enum alloc_state {
626 ALLOC_DATA = 0,
627 ALLOC_GROW_DEPTH = 1,
628 ALLOC_GROW_HEIGHT = 2,
629 /* ALLOC_UNSTUFF = 3, TBD and rather complicated */
630};
631
632/**
628e366d 633 * gfs2_iomap_alloc - Build a metadata tree of the requested height
9b8c81d1 634 * @inode: The GFS2 inode
628e366d 635 * @iomap: The iomap structure
5f8bd444 636 * @mp: The metapath, with proper height information calculated
9b8c81d1
SW
637 *
638 * In this routine we may have to alloc:
639 * i) Indirect blocks to grow the metadata tree height
640 * ii) Indirect blocks to fill in lower part of the metadata tree
641 * iii) Data blocks
642 *
64bc06bb
AG
643 * This function is called after gfs2_iomap_get, which works out the
644 * total number of blocks which we need via gfs2_alloc_size.
645 *
646 * We then do the actual allocation asking for an extent at a time (if
647 * enough contiguous free blocks are available, there will only be one
648 * allocation request per call) and uses the state machine to initialise
649 * the blocks in order.
9b8c81d1 650 *
628e366d
AG
651 * Right now, this function will allocate at most one indirect block
652 * worth of data -- with a default block size of 4K, that's slightly
653 * less than 2M. If this limitation is ever removed to allow huge
654 * allocations, we would probably still want to limit the iomap size we
655 * return to avoid stalling other tasks during huge writes; the next
656 * iomap iteration would then find the blocks already allocated.
657 *
9b8c81d1
SW
658 * Returns: errno on error
659 */
660
3974320c 661static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
bb4cb25d 662 struct metapath *mp)
9b8c81d1
SW
663{
664 struct gfs2_inode *ip = GFS2_I(inode);
665 struct gfs2_sbd *sdp = GFS2_SB(inode);
666 struct buffer_head *dibh = mp->mp_bh[0];
5f8bd444 667 u64 bn;
5af4e7a0 668 unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0;
64bc06bb 669 size_t dblks = iomap->length >> inode->i_blkbits;
5f8bd444 670 const unsigned end_of_metadata = mp->mp_fheight - 1;
628e366d 671 int ret;
9b8c81d1
SW
672 enum alloc_state state;
673 __be64 *ptr;
674 __be64 zero_bn = 0;
675
5f8bd444 676 BUG_ON(mp->mp_aheight < 1);
9b8c81d1 677 BUG_ON(dibh == NULL);
64bc06bb 678 BUG_ON(dblks < 1);
9b8c81d1 679
350a9b0a 680 gfs2_trans_add_meta(ip->i_gl, dibh);
9b8c81d1 681
628e366d
AG
682 down_write(&ip->i_rw_mutex);
683
5f8bd444 684 if (mp->mp_fheight == mp->mp_aheight) {
64bc06bb 685 /* Bottom indirect block exists */
9b8c81d1
SW
686 state = ALLOC_DATA;
687 } else {
688 /* Need to allocate indirect blocks */
5f8bd444 689 if (mp->mp_fheight == ip->i_height) {
9b8c81d1 690 /* Writing into existing tree, extend tree down */
5f8bd444 691 iblks = mp->mp_fheight - mp->mp_aheight;
9b8c81d1
SW
692 state = ALLOC_GROW_DEPTH;
693 } else {
694 /* Building up tree height */
695 state = ALLOC_GROW_HEIGHT;
5f8bd444 696 iblks = mp->mp_fheight - ip->i_height;
5af4e7a0 697 branch_start = metapath_branch_start(mp);
5f8bd444 698 iblks += (mp->mp_fheight - branch_start);
9b8c81d1
SW
699 }
700 }
701
702 /* start of the second part of the function (state machine) */
703
3974320c 704 blks = dblks + iblks;
5f8bd444 705 i = mp->mp_aheight;
9b8c81d1
SW
706 do {
707 n = blks - alloced;
628e366d
AG
708 ret = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
709 if (ret)
710 goto out;
9b8c81d1
SW
711 alloced += n;
712 if (state != ALLOC_DATA || gfs2_is_jdata(ip))
fbb27873 713 gfs2_trans_remove_revoke(sdp, bn, n);
9b8c81d1
SW
714 switch (state) {
715 /* Growing height of tree */
716 case ALLOC_GROW_HEIGHT:
717 if (i == 1) {
718 ptr = (__be64 *)(dibh->b_data +
719 sizeof(struct gfs2_dinode));
720 zero_bn = *ptr;
721 }
5f8bd444
BP
722 for (; i - 1 < mp->mp_fheight - ip->i_height && n > 0;
723 i++, n--)
9b8c81d1 724 gfs2_indirect_init(mp, ip->i_gl, i, 0, bn++);
5f8bd444 725 if (i - 1 == mp->mp_fheight - ip->i_height) {
9b8c81d1
SW
726 i--;
727 gfs2_buffer_copy_tail(mp->mp_bh[i],
728 sizeof(struct gfs2_meta_header),
729 dibh, sizeof(struct gfs2_dinode));
730 gfs2_buffer_clear_tail(dibh,
731 sizeof(struct gfs2_dinode) +
732 sizeof(__be64));
733 ptr = (__be64 *)(mp->mp_bh[i]->b_data +
734 sizeof(struct gfs2_meta_header));
735 *ptr = zero_bn;
736 state = ALLOC_GROW_DEPTH;
5f8bd444 737 for(i = branch_start; i < mp->mp_fheight; i++) {
9b8c81d1
SW
738 if (mp->mp_bh[i] == NULL)
739 break;
740 brelse(mp->mp_bh[i]);
741 mp->mp_bh[i] = NULL;
742 }
5af4e7a0 743 i = branch_start;
9b8c81d1
SW
744 }
745 if (n == 0)
746 break;
0a4c9265 747 /* fall through - To branching from existing tree */
9b8c81d1 748 case ALLOC_GROW_DEPTH:
5f8bd444 749 if (i > 1 && i < mp->mp_fheight)
350a9b0a 750 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[i-1]);
5f8bd444 751 for (; i < mp->mp_fheight && n > 0; i++, n--)
9b8c81d1
SW
752 gfs2_indirect_init(mp, ip->i_gl, i,
753 mp->mp_list[i-1], bn++);
5f8bd444 754 if (i == mp->mp_fheight)
9b8c81d1
SW
755 state = ALLOC_DATA;
756 if (n == 0)
757 break;
0a4c9265 758 /* fall through - To tree complete, adding data blocks */
9b8c81d1 759 case ALLOC_DATA:
3974320c 760 BUG_ON(n > dblks);
9b8c81d1 761 BUG_ON(mp->mp_bh[end_of_metadata] == NULL);
350a9b0a 762 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[end_of_metadata]);
3974320c 763 dblks = n;
9b8c81d1 764 ptr = metapointer(end_of_metadata, mp);
3974320c 765 iomap->addr = bn << inode->i_blkbits;
628e366d 766 iomap->flags |= IOMAP_F_MERGED | IOMAP_F_NEW;
9b8c81d1
SW
767 while (n-- > 0)
768 *ptr++ = cpu_to_be64(bn++);
769 break;
770 }
3974320c 771 } while (iomap->addr == IOMAP_NULL_ADDR);
9b8c81d1 772
d505a96a 773 iomap->type = IOMAP_MAPPED;
3974320c 774 iomap->length = (u64)dblks << inode->i_blkbits;
5f8bd444 775 ip->i_height = mp->mp_fheight;
9b8c81d1 776 gfs2_add_inode_blocks(&ip->i_inode, alloced);
628e366d
AG
777 gfs2_dinode_out(ip, dibh->b_data);
778out:
779 up_write(&ip->i_rw_mutex);
780 return ret;
9b8c81d1
SW
781}
782
7ee66c03
CH
783#define IOMAP_F_GFS2_BOUNDARY IOMAP_F_PRIVATE
784
64bc06bb
AG
785/**
786 * gfs2_alloc_size - Compute the maximum allocation size
787 * @inode: The inode
788 * @mp: The metapath
789 * @size: Requested size in blocks
790 *
791 * Compute the maximum size of the next allocation at @mp.
792 *
793 * Returns: size in blocks
794 */
795static u64 gfs2_alloc_size(struct inode *inode, struct metapath *mp, u64 size)
3974320c
BP
796{
797 struct gfs2_inode *ip = GFS2_I(inode);
64bc06bb
AG
798 struct gfs2_sbd *sdp = GFS2_SB(inode);
799 const __be64 *first, *ptr, *end;
800
801 /*
802 * For writes to stuffed files, this function is called twice via
803 * gfs2_iomap_get, before and after unstuffing. The size we return the
804 * first time needs to be large enough to get the reservation and
805 * allocation sizes right. The size we return the second time must
806 * be exact or else gfs2_iomap_alloc won't do the right thing.
807 */
808
809 if (gfs2_is_stuffed(ip) || mp->mp_fheight != mp->mp_aheight) {
810 unsigned int maxsize = mp->mp_fheight > 1 ?
811 sdp->sd_inptrs : sdp->sd_diptrs;
812 maxsize -= mp->mp_list[mp->mp_fheight - 1];
813 if (size > maxsize)
814 size = maxsize;
815 return size;
816 }
3974320c 817
64bc06bb
AG
818 first = metapointer(ip->i_height - 1, mp);
819 end = metaend(ip->i_height - 1, mp);
820 if (end - first > size)
821 end = first + size;
822 for (ptr = first; ptr < end; ptr++) {
823 if (*ptr)
824 break;
825 }
826 return ptr - first;
3974320c
BP
827}
828
829/**
628e366d 830 * gfs2_iomap_get - Map blocks from an inode to disk blocks
3974320c
BP
831 * @inode: The inode
832 * @pos: Starting position in bytes
833 * @length: Length to map, in bytes
834 * @flags: iomap flags
835 * @iomap: The iomap structure
628e366d 836 * @mp: The metapath
3974320c
BP
837 *
838 * Returns: errno
839 */
628e366d
AG
840static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
841 unsigned flags, struct iomap *iomap,
842 struct metapath *mp)
b3b94faa 843{
feaa7bba
SW
844 struct gfs2_inode *ip = GFS2_I(inode);
845 struct gfs2_sbd *sdp = GFS2_SB(inode);
d505a96a 846 loff_t size = i_size_read(inode);
9b8c81d1 847 __be64 *ptr;
3974320c 848 sector_t lblock;
628e366d
AG
849 sector_t lblock_stop;
850 int ret;
9b8c81d1 851 int eob;
628e366d 852 u64 len;
d505a96a 853 struct buffer_head *dibh = NULL, *bh;
9b8c81d1 854 u8 height;
7276b3b0 855
628e366d
AG
856 if (!length)
857 return -EINVAL;
b3b94faa 858
d505a96a
AG
859 down_read(&ip->i_rw_mutex);
860
861 ret = gfs2_meta_inode_buffer(ip, &dibh);
862 if (ret)
863 goto unlock;
c26b5aa8 864 mp->mp_bh[0] = dibh;
d505a96a 865
49edd5bf 866 if (gfs2_is_stuffed(ip)) {
d505a96a
AG
867 if (flags & IOMAP_WRITE) {
868 loff_t max_size = gfs2_max_stuffed_size(ip);
869
870 if (pos + length > max_size)
871 goto unstuff;
872 iomap->length = max_size;
873 } else {
874 if (pos >= size) {
875 if (flags & IOMAP_REPORT) {
876 ret = -ENOENT;
877 goto unlock;
878 } else {
879 /* report a hole */
880 iomap->offset = pos;
881 iomap->length = length;
882 goto do_alloc;
883 }
884 }
885 iomap->length = size;
49edd5bf 886 }
d505a96a
AG
887 iomap->addr = (ip->i_no_addr << inode->i_blkbits) +
888 sizeof(struct gfs2_dinode);
889 iomap->type = IOMAP_INLINE;
64bc06bb 890 iomap->inline_data = dibh->b_data + sizeof(struct gfs2_dinode);
d505a96a 891 goto out;
3974320c 892 }
d505a96a
AG
893
894unstuff:
3974320c 895 lblock = pos >> inode->i_blkbits;
3974320c 896 iomap->offset = lblock << inode->i_blkbits;
628e366d
AG
897 lblock_stop = (pos + length - 1) >> inode->i_blkbits;
898 len = lblock_stop - lblock + 1;
d505a96a 899 iomap->length = len << inode->i_blkbits;
628e366d 900
9b8c81d1 901 height = ip->i_height;
9a38662b 902 while ((lblock + 1) * sdp->sd_sb.sb_bsize > sdp->sd_heightsize[height])
9b8c81d1 903 height++;
628e366d 904 find_metapath(sdp, lblock, mp, height);
9b8c81d1
SW
905 if (height > ip->i_height || gfs2_is_stuffed(ip))
906 goto do_alloc;
3974320c 907
628e366d 908 ret = lookup_metapath(ip, mp);
e8b43fe0 909 if (ret)
628e366d 910 goto unlock;
3974320c 911
628e366d 912 if (mp->mp_aheight != ip->i_height)
9b8c81d1 913 goto do_alloc;
3974320c 914
628e366d 915 ptr = metapointer(ip->i_height - 1, mp);
9b8c81d1
SW
916 if (*ptr == 0)
917 goto do_alloc;
3974320c 918
628e366d 919 bh = mp->mp_bh[ip->i_height - 1];
bcfe9413 920 len = gfs2_extent_length(bh, ptr, len, &eob);
3974320c 921
628e366d
AG
922 iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits;
923 iomap->length = len << inode->i_blkbits;
924 iomap->type = IOMAP_MAPPED;
0ed91eca 925 iomap->flags |= IOMAP_F_MERGED;
9b8c81d1 926 if (eob)
7ee66c03 927 iomap->flags |= IOMAP_F_GFS2_BOUNDARY;
3974320c 928
3974320c 929out:
628e366d
AG
930 iomap->bdev = inode->i_sb->s_bdev;
931unlock:
932 up_read(&ip->i_rw_mutex);
9b8c81d1 933 return ret;
30cbf189 934
9b8c81d1 935do_alloc:
628e366d 936 iomap->addr = IOMAP_NULL_ADDR;
628e366d 937 iomap->type = IOMAP_HOLE;
628e366d 938 if (flags & IOMAP_REPORT) {
49edd5bf 939 if (pos >= size)
3974320c 940 ret = -ENOENT;
628e366d
AG
941 else if (height == ip->i_height)
942 ret = gfs2_hole_size(inode, lblock, len, mp, iomap);
49edd5bf
AG
943 else
944 iomap->length = size - pos;
64bc06bb
AG
945 } else if (flags & IOMAP_WRITE) {
946 u64 alloc_size;
947
967bcc91
AG
948 if (flags & IOMAP_DIRECT)
949 goto out; /* (see gfs2_file_direct_write) */
950
64bc06bb
AG
951 len = gfs2_alloc_size(inode, mp, len);
952 alloc_size = len << inode->i_blkbits;
953 if (alloc_size < iomap->length)
954 iomap->length = alloc_size;
955 } else {
d505a96a
AG
956 if (pos < size && height == ip->i_height)
957 ret = gfs2_hole_size(inode, lblock, len, mp, iomap);
b3b94faa 958 }
628e366d 959 goto out;
3974320c
BP
960}
961
7c70b896
BP
962/**
963 * gfs2_lblk_to_dblk - convert logical block to disk block
964 * @inode: the inode of the file we're mapping
965 * @lblock: the block relative to the start of the file
966 * @dblock: the returned dblock, if no error
967 *
968 * This function maps a single block from a file logical block (relative to
969 * the start of the file) to a file system absolute block using iomap.
970 *
971 * Returns: the absolute file system block, or an error
972 */
973int gfs2_lblk_to_dblk(struct inode *inode, u32 lblock, u64 *dblock)
974{
975 struct iomap iomap = { };
976 struct metapath mp = { .mp_aheight = 1, };
977 loff_t pos = (loff_t)lblock << inode->i_blkbits;
978 int ret;
979
980 ret = gfs2_iomap_get(inode, pos, i_blocksize(inode), 0, &iomap, &mp);
981 release_metapath(&mp);
982 if (ret == 0)
983 *dblock = iomap.addr >> inode->i_blkbits;
984
985 return ret;
986}
987
64bc06bb
AG
988static int gfs2_write_lock(struct inode *inode)
989{
990 struct gfs2_inode *ip = GFS2_I(inode);
991 struct gfs2_sbd *sdp = GFS2_SB(inode);
992 int error;
993
994 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
995 error = gfs2_glock_nq(&ip->i_gh);
996 if (error)
997 goto out_uninit;
998 if (&ip->i_inode == sdp->sd_rindex) {
999 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
1000
1001 error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE,
1002 GL_NOCACHE, &m_ip->i_gh);
1003 if (error)
1004 goto out_unlock;
1005 }
1006 return 0;
1007
1008out_unlock:
1009 gfs2_glock_dq(&ip->i_gh);
1010out_uninit:
1011 gfs2_holder_uninit(&ip->i_gh);
1012 return error;
1013}
1014
1015static void gfs2_write_unlock(struct inode *inode)
1016{
1017 struct gfs2_inode *ip = GFS2_I(inode);
1018 struct gfs2_sbd *sdp = GFS2_SB(inode);
1019
1020 if (&ip->i_inode == sdp->sd_rindex) {
1021 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
1022
1023 gfs2_glock_dq_uninit(&m_ip->i_gh);
1024 }
1025 gfs2_glock_dq_uninit(&ip->i_gh);
1026}
1027
d0a22a4b
AG
1028static int gfs2_iomap_page_prepare(struct inode *inode, loff_t pos,
1029 unsigned len, struct iomap *iomap)
1030{
2741b672 1031 unsigned int blockmask = i_blocksize(inode) - 1;
d0a22a4b 1032 struct gfs2_sbd *sdp = GFS2_SB(inode);
2741b672 1033 unsigned int blocks;
d0a22a4b 1034
2741b672
AG
1035 blocks = ((pos & blockmask) + len + blockmask) >> inode->i_blkbits;
1036 return gfs2_trans_begin(sdp, RES_DINODE + blocks, 0);
d0a22a4b
AG
1037}
1038
df0db3ec
AG
1039static void gfs2_iomap_page_done(struct inode *inode, loff_t pos,
1040 unsigned copied, struct page *page,
1041 struct iomap *iomap)
64bc06bb 1042{
706cb549 1043 struct gfs2_trans *tr = current->journal_info;
64bc06bb 1044 struct gfs2_inode *ip = GFS2_I(inode);
d0a22a4b 1045 struct gfs2_sbd *sdp = GFS2_SB(inode);
64bc06bb 1046
d0a22a4b 1047 if (page && !gfs2_is_stuffed(ip))
df0db3ec 1048 gfs2_page_add_databufs(ip, page, offset_in_page(pos), copied);
706cb549
AG
1049
1050 if (tr->tr_num_buf_new)
1051 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
1052
d0a22a4b 1053 gfs2_trans_end(sdp);
64bc06bb
AG
1054}
1055
df0db3ec 1056static const struct iomap_page_ops gfs2_iomap_page_ops = {
d0a22a4b 1057 .page_prepare = gfs2_iomap_page_prepare,
df0db3ec
AG
1058 .page_done = gfs2_iomap_page_done,
1059};
1060
64bc06bb
AG
1061static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
1062 loff_t length, unsigned flags,
c26b5aa8
AG
1063 struct iomap *iomap,
1064 struct metapath *mp)
64bc06bb 1065{
64bc06bb
AG
1066 struct gfs2_inode *ip = GFS2_I(inode);
1067 struct gfs2_sbd *sdp = GFS2_SB(inode);
34aad20b 1068 bool unstuff;
64bc06bb
AG
1069 int ret;
1070
64bc06bb
AG
1071 unstuff = gfs2_is_stuffed(ip) &&
1072 pos + length > gfs2_max_stuffed_size(ip);
1073
34aad20b
AG
1074 if (unstuff || iomap->type == IOMAP_HOLE) {
1075 unsigned int data_blocks, ind_blocks;
1076 struct gfs2_alloc_parms ap = {};
1077 unsigned int rblocks;
1078 struct gfs2_trans *tr;
64bc06bb 1079
64bc06bb
AG
1080 gfs2_write_calc_reserv(ip, iomap->length, &data_blocks,
1081 &ind_blocks);
34aad20b 1082 ap.target = data_blocks + ind_blocks;
64bc06bb
AG
1083 ret = gfs2_quota_lock_check(ip, &ap);
1084 if (ret)
34aad20b 1085 return ret;
64bc06bb
AG
1086
1087 ret = gfs2_inplace_reserve(ip, &ap);
1088 if (ret)
1089 goto out_qunlock;
64bc06bb 1090
34aad20b
AG
1091 rblocks = RES_DINODE + ind_blocks;
1092 if (gfs2_is_jdata(ip))
1093 rblocks += data_blocks;
1094 if (ind_blocks || data_blocks)
1095 rblocks += RES_STATFS + RES_QUOTA;
1096 if (inode == sdp->sd_rindex)
1097 rblocks += 2 * RES_STATFS;
64bc06bb
AG
1098 rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks);
1099
d0a22a4b
AG
1100 ret = gfs2_trans_begin(sdp, rblocks,
1101 iomap->length >> inode->i_blkbits);
64bc06bb 1102 if (ret)
d0a22a4b
AG
1103 goto out_trans_fail;
1104
1105 if (unstuff) {
1106 ret = gfs2_unstuff_dinode(ip, NULL);
1107 if (ret)
1108 goto out_trans_end;
1109 release_metapath(mp);
1110 ret = gfs2_iomap_get(inode, iomap->offset,
1111 iomap->length, flags, iomap, mp);
1112 if (ret)
1113 goto out_trans_end;
1114 }
64bc06bb 1115
d0a22a4b 1116 if (iomap->type == IOMAP_HOLE) {
bb4cb25d 1117 ret = gfs2_iomap_alloc(inode, iomap, mp);
d0a22a4b
AG
1118 if (ret) {
1119 gfs2_trans_end(sdp);
1120 gfs2_inplace_release(ip);
1121 punch_hole(ip, iomap->offset, iomap->length);
1122 goto out_qunlock;
1123 }
64bc06bb 1124 }
d0a22a4b
AG
1125
1126 tr = current->journal_info;
1127 if (tr->tr_num_buf_new)
1128 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
d0a22a4b
AG
1129
1130 gfs2_trans_end(sdp);
64bc06bb 1131 }
d0a22a4b
AG
1132
1133 if (gfs2_is_stuffed(ip) || gfs2_is_jdata(ip))
df0db3ec 1134 iomap->page_ops = &gfs2_iomap_page_ops;
64bc06bb
AG
1135 return 0;
1136
1137out_trans_end:
1138 gfs2_trans_end(sdp);
1139out_trans_fail:
34aad20b 1140 gfs2_inplace_release(ip);
64bc06bb 1141out_qunlock:
34aad20b 1142 gfs2_quota_unlock(ip);
64bc06bb
AG
1143 return ret;
1144}
1145
34aad20b
AG
1146static inline bool gfs2_iomap_need_write_lock(unsigned flags)
1147{
1148 return (flags & IOMAP_WRITE) && !(flags & IOMAP_DIRECT);
1149}
1150
628e366d 1151static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
c039b997
GR
1152 unsigned flags, struct iomap *iomap,
1153 struct iomap *srcmap)
628e366d
AG
1154{
1155 struct gfs2_inode *ip = GFS2_I(inode);
1156 struct metapath mp = { .mp_aheight = 1, };
1157 int ret;
1158
0ed91eca
AG
1159 iomap->flags |= IOMAP_F_BUFFER_HEAD;
1160
628e366d 1161 trace_gfs2_iomap_start(ip, pos, length, flags);
34aad20b
AG
1162 if (gfs2_iomap_need_write_lock(flags)) {
1163 ret = gfs2_write_lock(inode);
1164 if (ret)
1165 goto out;
628e366d 1166 }
34aad20b
AG
1167
1168 ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
1169 if (ret)
1170 goto out_unlock;
1171
72d36d05 1172 switch(flags & (IOMAP_WRITE | IOMAP_ZERO)) {
34aad20b
AG
1173 case IOMAP_WRITE:
1174 if (flags & IOMAP_DIRECT) {
1175 /*
1176 * Silently fall back to buffered I/O for stuffed files
1177 * or if we've got a hole (see gfs2_file_direct_write).
1178 */
1179 if (iomap->type != IOMAP_MAPPED)
1180 ret = -ENOTBLK;
1181 goto out_unlock;
1182 }
1183 break;
72d36d05
AG
1184 case IOMAP_ZERO:
1185 if (iomap->type == IOMAP_HOLE)
1186 goto out_unlock;
1187 break;
34aad20b
AG
1188 default:
1189 goto out_unlock;
1190 }
1191
1192 ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap, &mp);
1193
1194out_unlock:
1195 if (ret && gfs2_iomap_need_write_lock(flags))
1196 gfs2_write_unlock(inode);
c26b5aa8 1197 release_metapath(&mp);
34aad20b 1198out:
628e366d
AG
1199 trace_gfs2_iomap_end(ip, iomap, ret);
1200 return ret;
1201}
1202
64bc06bb
AG
1203static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length,
1204 ssize_t written, unsigned flags, struct iomap *iomap)
1205{
1206 struct gfs2_inode *ip = GFS2_I(inode);
1207 struct gfs2_sbd *sdp = GFS2_SB(inode);
64bc06bb 1208
72d36d05 1209 switch (flags & (IOMAP_WRITE | IOMAP_ZERO)) {
34aad20b
AG
1210 case IOMAP_WRITE:
1211 if (flags & IOMAP_DIRECT)
1212 return 0;
1213 break;
72d36d05
AG
1214 case IOMAP_ZERO:
1215 if (iomap->type == IOMAP_HOLE)
1216 return 0;
1217 break;
34aad20b
AG
1218 default:
1219 return 0;
1220 }
64bc06bb 1221
d0a22a4b 1222 if (!gfs2_is_stuffed(ip))
64bc06bb
AG
1223 gfs2_ordered_add_inode(ip);
1224
d0a22a4b 1225 if (inode == sdp->sd_rindex)
64bc06bb 1226 adjust_fs_space(inode);
64bc06bb 1227
64bc06bb
AG
1228 gfs2_inplace_release(ip);
1229
1230 if (length != written && (iomap->flags & IOMAP_F_NEW)) {
1231 /* Deallocate blocks that were just allocated. */
1232 loff_t blockmask = i_blocksize(inode) - 1;
1233 loff_t end = (pos + length) & ~blockmask;
1234
1235 pos = (pos + written + blockmask) & ~blockmask;
1236 if (pos < end) {
1237 truncate_pagecache_range(inode, pos, end - 1);
1238 punch_hole(ip, pos, end - pos);
1239 }
1240 }
1241
1242 if (ip->i_qadata && ip->i_qadata->qa_qd_num)
1243 gfs2_quota_unlock(ip);
706cb549
AG
1244
1245 if (unlikely(!written))
1246 goto out_unlock;
1247
8d3e72a1
AG
1248 if (iomap->flags & IOMAP_F_SIZE_CHANGED)
1249 mark_inode_dirty(inode);
706cb549 1250 set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
64bc06bb 1251
706cb549 1252out_unlock:
34aad20b
AG
1253 if (gfs2_iomap_need_write_lock(flags))
1254 gfs2_write_unlock(inode);
64bc06bb
AG
1255 return 0;
1256}
1257
628e366d
AG
1258const struct iomap_ops gfs2_iomap_ops = {
1259 .iomap_begin = gfs2_iomap_begin,
64bc06bb 1260 .iomap_end = gfs2_iomap_end,
628e366d
AG
1261};
1262
3974320c 1263/**
d39d18e0 1264 * gfs2_block_map - Map one or more blocks of an inode to a disk block
3974320c
BP
1265 * @inode: The inode
1266 * @lblock: The logical block number
1267 * @bh_map: The bh to be mapped
1268 * @create: True if its ok to alloc blocks to satify the request
1269 *
d39d18e0
AG
1270 * The size of the requested mapping is defined in bh_map->b_size.
1271 *
1272 * Clears buffer_mapped(bh_map) and leaves bh_map->b_size unchanged
1273 * when @lblock is not mapped. Sets buffer_mapped(bh_map) and
1274 * bh_map->b_size to indicate the size of the mapping when @lblock and
1275 * successive blocks are mapped, up to the requested size.
1276 *
1277 * Sets buffer_boundary() if a read of metadata will be required
1278 * before the next block can be mapped. Sets buffer_new() if new
1279 * blocks were allocated.
3974320c
BP
1280 *
1281 * Returns: errno
1282 */
1283
1284int gfs2_block_map(struct inode *inode, sector_t lblock,
1285 struct buffer_head *bh_map, int create)
1286{
1287 struct gfs2_inode *ip = GFS2_I(inode);
628e366d
AG
1288 loff_t pos = (loff_t)lblock << inode->i_blkbits;
1289 loff_t length = bh_map->b_size;
1290 struct metapath mp = { .mp_aheight = 1, };
1291 struct iomap iomap = { };
1292 int ret;
3974320c
BP
1293
1294 clear_buffer_mapped(bh_map);
1295 clear_buffer_new(bh_map);
1296 clear_buffer_boundary(bh_map);
1297 trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
1298
628e366d
AG
1299 if (create) {
1300 ret = gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, &iomap, &mp);
1301 if (!ret && iomap.type == IOMAP_HOLE)
bb4cb25d 1302 ret = gfs2_iomap_alloc(inode, &iomap, &mp);
628e366d
AG
1303 release_metapath(&mp);
1304 } else {
1305 ret = gfs2_iomap_get(inode, pos, length, 0, &iomap, &mp);
1306 release_metapath(&mp);
3974320c 1307 }
628e366d
AG
1308 if (ret)
1309 goto out;
3974320c
BP
1310
1311 if (iomap.length > bh_map->b_size) {
1312 iomap.length = bh_map->b_size;
7ee66c03 1313 iomap.flags &= ~IOMAP_F_GFS2_BOUNDARY;
5f8bd444 1314 }
3974320c
BP
1315 if (iomap.addr != IOMAP_NULL_ADDR)
1316 map_bh(bh_map, inode->i_sb, iomap.addr >> inode->i_blkbits);
1317 bh_map->b_size = iomap.length;
7ee66c03 1318 if (iomap.flags & IOMAP_F_GFS2_BOUNDARY)
3974320c
BP
1319 set_buffer_boundary(bh_map);
1320 if (iomap.flags & IOMAP_F_NEW)
1321 set_buffer_new(bh_map);
1322
1323out:
1324 trace_gfs2_bmap(ip, bh_map, lblock, create, ret);
1325 return ret;
fd88de56
SW
1326}
1327
941e6d7d
SW
1328/*
1329 * Deprecated: do not use in new code
1330 */
fd88de56
SW
1331int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen)
1332{
23591256 1333 struct buffer_head bh = { .b_state = 0, .b_blocknr = 0 };
7a6bbacb 1334 int ret;
fd88de56
SW
1335 int create = *new;
1336
1337 BUG_ON(!extlen);
1338 BUG_ON(!dblock);
1339 BUG_ON(!new);
1340
47a9a527 1341 bh.b_size = BIT(inode->i_blkbits + (create ? 0 : 5));
e9e1ef2b 1342 ret = gfs2_block_map(inode, lblock, &bh, create);
7a6bbacb
SW
1343 *extlen = bh.b_size >> inode->i_blkbits;
1344 *dblock = bh.b_blocknr;
1345 if (buffer_new(&bh))
1346 *new = 1;
1347 else
1348 *new = 0;
1349 return ret;
b3b94faa
DT
1350}
1351
bdba0d5e
AG
1352static int gfs2_block_zero_range(struct inode *inode, loff_t from,
1353 unsigned int length)
ba7f7290 1354{
2257e468 1355 return iomap_zero_range(inode, from, length, NULL, &gfs2_iomap_ops);
ba7f7290
SW
1356}
1357
c62baf65
FF
1358#define GFS2_JTRUNC_REVOKES 8192
1359
fa731fc4
SW
1360/**
1361 * gfs2_journaled_truncate - Wrapper for truncate_pagecache for jdata files
1362 * @inode: The inode being truncated
1363 * @oldsize: The original (larger) size
1364 * @newsize: The new smaller size
1365 *
1366 * With jdata files, we have to journal a revoke for each block which is
1367 * truncated. As a result, we need to split this into separate transactions
1368 * if the number of pages being truncated gets too large.
1369 */
1370
fa731fc4
SW
1371static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize)
1372{
1373 struct gfs2_sbd *sdp = GFS2_SB(inode);
1374 u64 max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
1375 u64 chunk;
1376 int error;
1377
1378 while (oldsize != newsize) {
e7fdf004
AG
1379 struct gfs2_trans *tr;
1380 unsigned int offs;
1381
fa731fc4
SW
1382 chunk = oldsize - newsize;
1383 if (chunk > max_chunk)
1384 chunk = max_chunk;
e7fdf004
AG
1385
1386 offs = oldsize & ~PAGE_MASK;
1387 if (offs && chunk > PAGE_SIZE)
1388 chunk = offs + ((chunk - offs) & PAGE_MASK);
1389
7caef267 1390 truncate_pagecache(inode, oldsize - chunk);
fa731fc4 1391 oldsize -= chunk;
e7fdf004
AG
1392
1393 tr = current->journal_info;
1394 if (!test_bit(TR_TOUCHED, &tr->tr_flags))
1395 continue;
1396
fa731fc4
SW
1397 gfs2_trans_end(sdp);
1398 error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
1399 if (error)
1400 return error;
1401 }
1402
1403 return 0;
1404}
1405
8b5860a3 1406static int trunc_start(struct inode *inode, u64 newsize)
b3b94faa 1407{
ff8f33c8
SW
1408 struct gfs2_inode *ip = GFS2_I(inode);
1409 struct gfs2_sbd *sdp = GFS2_SB(inode);
80990f40 1410 struct buffer_head *dibh = NULL;
b3b94faa 1411 int journaled = gfs2_is_jdata(ip);
8b5860a3 1412 u64 oldsize = inode->i_size;
b3b94faa
DT
1413 int error;
1414
fa731fc4
SW
1415 if (journaled)
1416 error = gfs2_trans_begin(sdp, RES_DINODE + RES_JDATA, GFS2_JTRUNC_REVOKES);
1417 else
1418 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
b3b94faa
DT
1419 if (error)
1420 return error;
1421
1422 error = gfs2_meta_inode_buffer(ip, &dibh);
1423 if (error)
1424 goto out;
1425
350a9b0a 1426 gfs2_trans_add_meta(ip->i_gl, dibh);
ff8f33c8 1427
b3b94faa 1428 if (gfs2_is_stuffed(ip)) {
ff8f33c8 1429 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize);
b3b94faa 1430 } else {
bdba0d5e
AG
1431 unsigned int blocksize = i_blocksize(inode);
1432 unsigned int offs = newsize & (blocksize - 1);
1433 if (offs) {
1434 error = gfs2_block_zero_range(inode, newsize,
1435 blocksize - offs);
ff8f33c8 1436 if (error)
80990f40 1437 goto out;
b3b94faa 1438 }
ff8f33c8 1439 ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG;
b3b94faa
DT
1440 }
1441
ff8f33c8 1442 i_size_write(inode, newsize);
078cd827 1443 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
ff8f33c8 1444 gfs2_dinode_out(ip, dibh->b_data);
b3b94faa 1445
fa731fc4
SW
1446 if (journaled)
1447 error = gfs2_journaled_truncate(inode, oldsize, newsize);
1448 else
7caef267 1449 truncate_pagecache(inode, newsize);
fa731fc4 1450
a91ea69f 1451out:
80990f40
AG
1452 brelse(dibh);
1453 if (current->journal_info)
1454 gfs2_trans_end(sdp);
b3b94faa
DT
1455 return error;
1456}
1457
628e366d
AG
1458int gfs2_iomap_get_alloc(struct inode *inode, loff_t pos, loff_t length,
1459 struct iomap *iomap)
1460{
1461 struct metapath mp = { .mp_aheight = 1, };
1462 int ret;
1463
1464 ret = gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, iomap, &mp);
1465 if (!ret && iomap->type == IOMAP_HOLE)
bb4cb25d 1466 ret = gfs2_iomap_alloc(inode, iomap, &mp);
628e366d
AG
1467 release_metapath(&mp);
1468 return ret;
1469}
1470
d552a2b9
BP
1471/**
1472 * sweep_bh_for_rgrps - find an rgrp in a meta buffer and free blocks therein
1473 * @ip: inode
1474 * @rg_gh: holder of resource group glock
5cf26b1e
AG
1475 * @bh: buffer head to sweep
1476 * @start: starting point in bh
1477 * @end: end point in bh
1478 * @meta: true if bh points to metadata (rather than data)
d552a2b9 1479 * @btotal: place to keep count of total blocks freed
d552a2b9
BP
1480 *
1481 * We sweep a metadata buffer (provided by the metapath) for blocks we need to
1482 * free, and free them all. However, we do it one rgrp at a time. If this
1483 * block has references to multiple rgrps, we break it into individual
1484 * transactions. This allows other processes to use the rgrps while we're
1485 * focused on a single one, for better concurrency / performance.
1486 * At every transaction boundary, we rewrite the inode into the journal.
1487 * That way the bitmaps are kept consistent with the inode and we can recover
1488 * if we're interrupted by power-outages.
1489 *
1490 * Returns: 0, or return code if an error occurred.
1491 * *btotal has the total number of blocks freed
1492 */
1493static int sweep_bh_for_rgrps(struct gfs2_inode *ip, struct gfs2_holder *rd_gh,
5cf26b1e
AG
1494 struct buffer_head *bh, __be64 *start, __be64 *end,
1495 bool meta, u32 *btotal)
b3b94faa 1496{
9b8c81d1 1497 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
d552a2b9
BP
1498 struct gfs2_rgrpd *rgd;
1499 struct gfs2_trans *tr;
5cf26b1e 1500 __be64 *p;
d552a2b9
BP
1501 int blks_outside_rgrp;
1502 u64 bn, bstart, isize_blks;
1503 s64 blen; /* needs to be s64 or gfs2_add_inode_blocks breaks */
d552a2b9
BP
1504 int ret = 0;
1505 bool buf_in_tr = false; /* buffer was added to transaction */
1506
d552a2b9 1507more_rgrps:
5cf26b1e
AG
1508 rgd = NULL;
1509 if (gfs2_holder_initialized(rd_gh)) {
1510 rgd = gfs2_glock2rgrp(rd_gh->gh_gl);
1511 gfs2_assert_withdraw(sdp,
1512 gfs2_glock_is_locked_by_me(rd_gh->gh_gl));
1513 }
d552a2b9
BP
1514 blks_outside_rgrp = 0;
1515 bstart = 0;
1516 blen = 0;
d552a2b9 1517
5cf26b1e 1518 for (p = start; p < end; p++) {
d552a2b9
BP
1519 if (!*p)
1520 continue;
1521 bn = be64_to_cpu(*p);
5cf26b1e
AG
1522
1523 if (rgd) {
1524 if (!rgrp_contains_block(rgd, bn)) {
1525 blks_outside_rgrp++;
1526 continue;
1527 }
d552a2b9 1528 } else {
90bcab99 1529 rgd = gfs2_blk2rgrpd(sdp, bn, true);
5cf26b1e
AG
1530 if (unlikely(!rgd)) {
1531 ret = -EIO;
1532 goto out;
1533 }
d552a2b9
BP
1534 ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
1535 0, rd_gh);
1536 if (ret)
1537 goto out;
1538
1539 /* Must be done with the rgrp glock held: */
1540 if (gfs2_rs_active(&ip->i_res) &&
1541 rgd == ip->i_res.rs_rbm.rgd)
1542 gfs2_rs_deltree(&ip->i_res);
1543 }
1544
d552a2b9
BP
1545 /* The size of our transactions will be unknown until we
1546 actually process all the metadata blocks that relate to
1547 the rgrp. So we estimate. We know it can't be more than
1548 the dinode's i_blocks and we don't want to exceed the
1549 journal flush threshold, sd_log_thresh2. */
1550 if (current->journal_info == NULL) {
1551 unsigned int jblocks_rqsted, revokes;
1552
1553 jblocks_rqsted = rgd->rd_length + RES_DINODE +
1554 RES_INDIRECT;
1555 isize_blks = gfs2_get_inode_blocks(&ip->i_inode);
1556 if (isize_blks > atomic_read(&sdp->sd_log_thresh2))
1557 jblocks_rqsted +=
1558 atomic_read(&sdp->sd_log_thresh2);
1559 else
1560 jblocks_rqsted += isize_blks;
1561 revokes = jblocks_rqsted;
1562 if (meta)
5cf26b1e 1563 revokes += end - start;
d552a2b9
BP
1564 else if (ip->i_depth)
1565 revokes += sdp->sd_inptrs;
1566 ret = gfs2_trans_begin(sdp, jblocks_rqsted, revokes);
1567 if (ret)
1568 goto out_unlock;
1569 down_write(&ip->i_rw_mutex);
1570 }
1571 /* check if we will exceed the transaction blocks requested */
1572 tr = current->journal_info;
1573 if (tr->tr_num_buf_new + RES_STATFS +
1574 RES_QUOTA >= atomic_read(&sdp->sd_log_thresh2)) {
1575 /* We set blks_outside_rgrp to ensure the loop will
1576 be repeated for the same rgrp, but with a new
1577 transaction. */
1578 blks_outside_rgrp++;
1579 /* This next part is tricky. If the buffer was added
1580 to the transaction, we've already set some block
1581 pointers to 0, so we better follow through and free
1582 them, or we will introduce corruption (so break).
1583 This may be impossible, or at least rare, but I
1584 decided to cover the case regardless.
1585
1586 If the buffer was not added to the transaction
1587 (this call), doing so would exceed our transaction
1588 size, so we need to end the transaction and start a
1589 new one (so goto). */
1590
1591 if (buf_in_tr)
1592 break;
1593 goto out_unlock;
1594 }
1595
1596 gfs2_trans_add_meta(ip->i_gl, bh);
1597 buf_in_tr = true;
1598 *p = 0;
1599 if (bstart + blen == bn) {
1600 blen++;
1601 continue;
1602 }
1603 if (bstart) {
0ddeded4 1604 __gfs2_free_blocks(ip, rgd, bstart, (u32)blen, meta);
d552a2b9
BP
1605 (*btotal) += blen;
1606 gfs2_add_inode_blocks(&ip->i_inode, -blen);
1607 }
1608 bstart = bn;
1609 blen = 1;
1610 }
1611 if (bstart) {
0ddeded4 1612 __gfs2_free_blocks(ip, rgd, bstart, (u32)blen, meta);
d552a2b9
BP
1613 (*btotal) += blen;
1614 gfs2_add_inode_blocks(&ip->i_inode, -blen);
1615 }
1616out_unlock:
1617 if (!ret && blks_outside_rgrp) { /* If buffer still has non-zero blocks
1618 outside the rgrp we just processed,
1619 do it all over again. */
1620 if (current->journal_info) {
5cf26b1e
AG
1621 struct buffer_head *dibh;
1622
1623 ret = gfs2_meta_inode_buffer(ip, &dibh);
1624 if (ret)
1625 goto out;
d552a2b9
BP
1626
1627 /* Every transaction boundary, we rewrite the dinode
1628 to keep its di_blocks current in case of failure. */
1629 ip->i_inode.i_mtime = ip->i_inode.i_ctime =
b32c8c76 1630 current_time(&ip->i_inode);
d552a2b9
BP
1631 gfs2_trans_add_meta(ip->i_gl, dibh);
1632 gfs2_dinode_out(ip, dibh->b_data);
5cf26b1e 1633 brelse(dibh);
d552a2b9
BP
1634 up_write(&ip->i_rw_mutex);
1635 gfs2_trans_end(sdp);
f0b444b3 1636 buf_in_tr = false;
d552a2b9
BP
1637 }
1638 gfs2_glock_dq_uninit(rd_gh);
1639 cond_resched();
1640 goto more_rgrps;
1641 }
1642out:
1643 return ret;
1644}
1645
10d2cf94
AG
1646static bool mp_eq_to_hgt(struct metapath *mp, __u16 *list, unsigned int h)
1647{
1648 if (memcmp(mp->mp_list, list, h * sizeof(mp->mp_list[0])))
1649 return false;
1650 return true;
1651}
1652
d552a2b9
BP
1653/**
1654 * find_nonnull_ptr - find a non-null pointer given a metapath and height
d552a2b9
BP
1655 * @mp: starting metapath
1656 * @h: desired height to search
1657 *
10d2cf94 1658 * Assumes the metapath is valid (with buffers) out to height h.
d552a2b9
BP
1659 * Returns: true if a non-null pointer was found in the metapath buffer
1660 * false if all remaining pointers are NULL in the buffer
1661 */
1662static bool find_nonnull_ptr(struct gfs2_sbd *sdp, struct metapath *mp,
10d2cf94
AG
1663 unsigned int h,
1664 __u16 *end_list, unsigned int end_aligned)
d552a2b9 1665{
10d2cf94
AG
1666 struct buffer_head *bh = mp->mp_bh[h];
1667 __be64 *first, *ptr, *end;
1668
1669 first = metaptr1(h, mp);
1670 ptr = first + mp->mp_list[h];
1671 end = (__be64 *)(bh->b_data + bh->b_size);
1672 if (end_list && mp_eq_to_hgt(mp, end_list, h)) {
1673 bool keep_end = h < end_aligned;
1674 end = first + end_list[h] + keep_end;
1675 }
d552a2b9 1676
10d2cf94 1677 while (ptr < end) {
c4a9d189 1678 if (*ptr) { /* if we have a non-null pointer */
10d2cf94 1679 mp->mp_list[h] = ptr - first;
c4a9d189
BP
1680 h++;
1681 if (h < GFS2_MAX_META_HEIGHT)
10d2cf94 1682 mp->mp_list[h] = 0;
d552a2b9 1683 return true;
c4a9d189 1684 }
10d2cf94 1685 ptr++;
d552a2b9 1686 }
10d2cf94 1687 return false;
d552a2b9
BP
1688}
1689
1690enum dealloc_states {
1691 DEALLOC_MP_FULL = 0, /* Strip a metapath with all buffers read in */
1692 DEALLOC_MP_LOWER = 1, /* lower the metapath strip height */
1693 DEALLOC_FILL_MP = 2, /* Fill in the metapath to the given height. */
1694 DEALLOC_DONE = 3, /* process complete */
1695};
b3b94faa 1696
5cf26b1e
AG
1697static inline void
1698metapointer_range(struct metapath *mp, int height,
1699 __u16 *start_list, unsigned int start_aligned,
10d2cf94 1700 __u16 *end_list, unsigned int end_aligned,
5cf26b1e
AG
1701 __be64 **start, __be64 **end)
1702{
1703 struct buffer_head *bh = mp->mp_bh[height];
1704 __be64 *first;
1705
1706 first = metaptr1(height, mp);
1707 *start = first;
1708 if (mp_eq_to_hgt(mp, start_list, height)) {
1709 bool keep_start = height < start_aligned;
1710 *start = first + start_list[height] + keep_start;
1711 }
1712 *end = (__be64 *)(bh->b_data + bh->b_size);
10d2cf94
AG
1713 if (end_list && mp_eq_to_hgt(mp, end_list, height)) {
1714 bool keep_end = height < end_aligned;
1715 *end = first + end_list[height] + keep_end;
1716 }
1717}
1718
1719static inline bool walk_done(struct gfs2_sbd *sdp,
1720 struct metapath *mp, int height,
1721 __u16 *end_list, unsigned int end_aligned)
1722{
1723 __u16 end;
1724
1725 if (end_list) {
1726 bool keep_end = height < end_aligned;
1727 if (!mp_eq_to_hgt(mp, end_list, height))
1728 return false;
1729 end = end_list[height] + keep_end;
1730 } else
1731 end = (height > 0) ? sdp->sd_inptrs : sdp->sd_diptrs;
1732 return mp->mp_list[height] >= end;
5cf26b1e
AG
1733}
1734
d552a2b9 1735/**
10d2cf94 1736 * punch_hole - deallocate blocks in a file
d552a2b9 1737 * @ip: inode to truncate
10d2cf94
AG
1738 * @offset: the start of the hole
1739 * @length: the size of the hole (or 0 for truncate)
1740 *
1741 * Punch a hole into a file or truncate a file at a given position. This
1742 * function operates in whole blocks (@offset and @length are rounded
1743 * accordingly); partially filled blocks must be cleared otherwise.
d552a2b9 1744 *
10d2cf94
AG
1745 * This function works from the bottom up, and from the right to the left. In
1746 * other words, it strips off the highest layer (data) before stripping any of
1747 * the metadata. Doing it this way is best in case the operation is interrupted
1748 * by power failure, etc. The dinode is rewritten in every transaction to
1749 * guarantee integrity.
d552a2b9 1750 */
10d2cf94 1751static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length)
d552a2b9
BP
1752{
1753 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
bb491ce6 1754 u64 maxsize = sdp->sd_heightsize[ip->i_height];
10d2cf94 1755 struct metapath mp = {};
d552a2b9
BP
1756 struct buffer_head *dibh, *bh;
1757 struct gfs2_holder rd_gh;
cb7f0903 1758 unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift;
10d2cf94
AG
1759 u64 lblock = (offset + (1 << bsize_shift) - 1) >> bsize_shift;
1760 __u16 start_list[GFS2_MAX_META_HEIGHT];
1761 __u16 __end_list[GFS2_MAX_META_HEIGHT], *end_list = NULL;
4e56a641 1762 unsigned int start_aligned, uninitialized_var(end_aligned);
d552a2b9
BP
1763 unsigned int strip_h = ip->i_height - 1;
1764 u32 btotal = 0;
1765 int ret, state;
1766 int mp_h; /* metapath buffers are read in to this height */
d552a2b9 1767 u64 prev_bnr = 0;
5cf26b1e 1768 __be64 *start, *end;
b3b94faa 1769
bb491ce6
AG
1770 if (offset >= maxsize) {
1771 /*
1772 * The starting point lies beyond the allocated meta-data;
1773 * there are no blocks do deallocate.
1774 */
1775 return 0;
1776 }
1777
10d2cf94
AG
1778 /*
1779 * The start position of the hole is defined by lblock, start_list, and
1780 * start_aligned. The end position of the hole is defined by lend,
1781 * end_list, and end_aligned.
1782 *
1783 * start_aligned and end_aligned define down to which height the start
1784 * and end positions are aligned to the metadata tree (i.e., the
1785 * position is a multiple of the metadata granularity at the height
1786 * above). This determines at which heights additional meta pointers
1787 * needs to be preserved for the remaining data.
1788 */
b3b94faa 1789
10d2cf94 1790 if (length) {
10d2cf94
AG
1791 u64 end_offset = offset + length;
1792 u64 lend;
1793
1794 /*
1795 * Clip the end at the maximum file size for the given height:
1796 * that's how far the metadata goes; files bigger than that
1797 * will have additional layers of indirection.
1798 */
1799 if (end_offset > maxsize)
1800 end_offset = maxsize;
1801 lend = end_offset >> bsize_shift;
1802
1803 if (lblock >= lend)
1804 return 0;
1805
1806 find_metapath(sdp, lend, &mp, ip->i_height);
1807 end_list = __end_list;
1808 memcpy(end_list, mp.mp_list, sizeof(mp.mp_list));
1809
1810 for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) {
1811 if (end_list[mp_h])
1812 break;
1813 }
1814 end_aligned = mp_h;
1815 }
1816
1817 find_metapath(sdp, lblock, &mp, ip->i_height);
cb7f0903
AG
1818 memcpy(start_list, mp.mp_list, sizeof(start_list));
1819
cb7f0903
AG
1820 for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) {
1821 if (start_list[mp_h])
1822 break;
1823 }
1824 start_aligned = mp_h;
d552a2b9
BP
1825
1826 ret = gfs2_meta_inode_buffer(ip, &dibh);
1827 if (ret)
1828 return ret;
b3b94faa 1829
d552a2b9
BP
1830 mp.mp_bh[0] = dibh;
1831 ret = lookup_metapath(ip, &mp);
e8b43fe0
AG
1832 if (ret)
1833 goto out_metapath;
c3ce5aa9
AG
1834
1835 /* issue read-ahead on metadata */
5cf26b1e
AG
1836 for (mp_h = 0; mp_h < mp.mp_aheight - 1; mp_h++) {
1837 metapointer_range(&mp, mp_h, start_list, start_aligned,
10d2cf94 1838 end_list, end_aligned, &start, &end);
5cf26b1e
AG
1839 gfs2_metapath_ra(ip->i_gl, start, end);
1840 }
c3ce5aa9 1841
e8b43fe0 1842 if (mp.mp_aheight == ip->i_height)
d552a2b9
BP
1843 state = DEALLOC_MP_FULL; /* We have a complete metapath */
1844 else
1845 state = DEALLOC_FILL_MP; /* deal with partial metapath */
b3b94faa 1846
d552a2b9
BP
1847 ret = gfs2_rindex_update(sdp);
1848 if (ret)
1849 goto out_metapath;
1850
1851 ret = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
1852 if (ret)
1853 goto out_metapath;
1854 gfs2_holder_mark_uninitialized(&rd_gh);
1855
1856 mp_h = strip_h;
1857
1858 while (state != DEALLOC_DONE) {
1859 switch (state) {
1860 /* Truncate a full metapath at the given strip height.
1861 * Note that strip_h == mp_h in order to be in this state. */
1862 case DEALLOC_MP_FULL:
d552a2b9
BP
1863 bh = mp.mp_bh[mp_h];
1864 gfs2_assert_withdraw(sdp, bh);
1865 if (gfs2_assert_withdraw(sdp,
1866 prev_bnr != bh->b_blocknr)) {
f29e62ee
BP
1867 fs_emerg(sdp, "inode %llu, block:%llu, i_h:%u,"
1868 "s_h:%u, mp_h:%u\n",
d552a2b9
BP
1869 (unsigned long long)ip->i_no_addr,
1870 prev_bnr, ip->i_height, strip_h, mp_h);
1871 }
1872 prev_bnr = bh->b_blocknr;
cb7f0903 1873
5cf26b1e
AG
1874 if (gfs2_metatype_check(sdp, bh,
1875 (mp_h ? GFS2_METATYPE_IN :
1876 GFS2_METATYPE_DI))) {
1877 ret = -EIO;
1878 goto out;
1879 }
1880
10d2cf94
AG
1881 /*
1882 * Below, passing end_aligned as 0 gives us the
1883 * metapointer range excluding the end point: the end
1884 * point is the first metapath we must not deallocate!
1885 */
1886
5cf26b1e 1887 metapointer_range(&mp, mp_h, start_list, start_aligned,
10d2cf94 1888 end_list, 0 /* end_aligned */,
5cf26b1e
AG
1889 &start, &end);
1890 ret = sweep_bh_for_rgrps(ip, &rd_gh, mp.mp_bh[mp_h],
1891 start, end,
1892 mp_h != ip->i_height - 1,
1893 &btotal);
cb7f0903 1894
d552a2b9
BP
1895 /* If we hit an error or just swept dinode buffer,
1896 just exit. */
1897 if (ret || !mp_h) {
1898 state = DEALLOC_DONE;
1899 break;
1900 }
1901 state = DEALLOC_MP_LOWER;
1902 break;
1903
1904 /* lower the metapath strip height */
1905 case DEALLOC_MP_LOWER:
1906 /* We're done with the current buffer, so release it,
1907 unless it's the dinode buffer. Then back up to the
1908 previous pointer. */
1909 if (mp_h) {
1910 brelse(mp.mp_bh[mp_h]);
1911 mp.mp_bh[mp_h] = NULL;
1912 }
1913 /* If we can't get any lower in height, we've stripped
1914 off all we can. Next step is to back up and start
1915 stripping the previous level of metadata. */
1916 if (mp_h == 0) {
1917 strip_h--;
cb7f0903 1918 memcpy(mp.mp_list, start_list, sizeof(start_list));
d552a2b9
BP
1919 mp_h = strip_h;
1920 state = DEALLOC_FILL_MP;
1921 break;
1922 }
1923 mp.mp_list[mp_h] = 0;
1924 mp_h--; /* search one metadata height down */
d552a2b9 1925 mp.mp_list[mp_h]++;
10d2cf94
AG
1926 if (walk_done(sdp, &mp, mp_h, end_list, end_aligned))
1927 break;
d552a2b9
BP
1928 /* Here we've found a part of the metapath that is not
1929 * allocated. We need to search at that height for the
1930 * next non-null pointer. */
10d2cf94 1931 if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned)) {
d552a2b9
BP
1932 state = DEALLOC_FILL_MP;
1933 mp_h++;
1934 }
1935 /* No more non-null pointers at this height. Back up
1936 to the previous height and try again. */
1937 break; /* loop around in the same state */
1938
1939 /* Fill the metapath with buffers to the given height. */
1940 case DEALLOC_FILL_MP:
1941 /* Fill the buffers out to the current height. */
1942 ret = fillup_metapath(ip, &mp, mp_h);
c3ce5aa9 1943 if (ret < 0)
d552a2b9 1944 goto out;
c3ce5aa9 1945
e7445ced
AG
1946 /* On the first pass, issue read-ahead on metadata. */
1947 if (mp.mp_aheight > 1 && strip_h == ip->i_height - 1) {
1948 unsigned int height = mp.mp_aheight - 1;
1949
1950 /* No read-ahead for data blocks. */
1951 if (mp.mp_aheight - 1 == strip_h)
1952 height--;
1953
1954 for (; height >= mp.mp_aheight - ret; height--) {
1955 metapointer_range(&mp, height,
5cf26b1e 1956 start_list, start_aligned,
10d2cf94 1957 end_list, end_aligned,
5cf26b1e
AG
1958 &start, &end);
1959 gfs2_metapath_ra(ip->i_gl, start, end);
1960 }
c3ce5aa9 1961 }
d552a2b9
BP
1962
1963 /* If buffers found for the entire strip height */
e8b43fe0 1964 if (mp.mp_aheight - 1 == strip_h) {
d552a2b9
BP
1965 state = DEALLOC_MP_FULL;
1966 break;
1967 }
e8b43fe0
AG
1968 if (mp.mp_aheight < ip->i_height) /* We have a partial height */
1969 mp_h = mp.mp_aheight - 1;
d552a2b9
BP
1970
1971 /* If we find a non-null block pointer, crawl a bit
1972 higher up in the metapath and try again, otherwise
1973 we need to look lower for a new starting point. */
10d2cf94 1974 if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned))
d552a2b9
BP
1975 mp_h++;
1976 else
1977 state = DEALLOC_MP_LOWER;
b3b94faa 1978 break;
d552a2b9 1979 }
b3b94faa
DT
1980 }
1981
d552a2b9
BP
1982 if (btotal) {
1983 if (current->journal_info == NULL) {
1984 ret = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS +
1985 RES_QUOTA, 0);
1986 if (ret)
1987 goto out;
1988 down_write(&ip->i_rw_mutex);
1989 }
1990 gfs2_statfs_change(sdp, 0, +btotal, 0);
1991 gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid,
1992 ip->i_inode.i_gid);
b32c8c76 1993 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
d552a2b9
BP
1994 gfs2_trans_add_meta(ip->i_gl, dibh);
1995 gfs2_dinode_out(ip, dibh->b_data);
1996 up_write(&ip->i_rw_mutex);
1997 gfs2_trans_end(sdp);
1998 }
b3b94faa 1999
d552a2b9
BP
2000out:
2001 if (gfs2_holder_initialized(&rd_gh))
2002 gfs2_glock_dq_uninit(&rd_gh);
2003 if (current->journal_info) {
2004 up_write(&ip->i_rw_mutex);
2005 gfs2_trans_end(sdp);
2006 cond_resched();
2007 }
2008 gfs2_quota_unhold(ip);
2009out_metapath:
2010 release_metapath(&mp);
2011 return ret;
b3b94faa
DT
2012}
2013
2014static int trunc_end(struct gfs2_inode *ip)
2015{
feaa7bba 2016 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
b3b94faa
DT
2017 struct buffer_head *dibh;
2018 int error;
2019
2020 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
2021 if (error)
2022 return error;
2023
2024 down_write(&ip->i_rw_mutex);
2025
2026 error = gfs2_meta_inode_buffer(ip, &dibh);
2027 if (error)
2028 goto out;
2029
a2e0f799 2030 if (!i_size_read(&ip->i_inode)) {
ecc30c79 2031 ip->i_height = 0;
ce276b06 2032 ip->i_goal = ip->i_no_addr;
b3b94faa 2033 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
45138990 2034 gfs2_ordered_del_inode(ip);
b3b94faa 2035 }
078cd827 2036 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
383f01fb 2037 ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG;
b3b94faa 2038
350a9b0a 2039 gfs2_trans_add_meta(ip->i_gl, dibh);
539e5d6b 2040 gfs2_dinode_out(ip, dibh->b_data);
b3b94faa
DT
2041 brelse(dibh);
2042
a91ea69f 2043out:
b3b94faa 2044 up_write(&ip->i_rw_mutex);
b3b94faa 2045 gfs2_trans_end(sdp);
b3b94faa
DT
2046 return error;
2047}
2048
2049/**
2050 * do_shrink - make a file smaller
ff8f33c8 2051 * @inode: the inode
ff8f33c8 2052 * @newsize: the size to make the file
b3b94faa 2053 *
ff8f33c8
SW
2054 * Called with an exclusive lock on @inode. The @size must
2055 * be equal to or smaller than the current inode size.
b3b94faa
DT
2056 *
2057 * Returns: errno
2058 */
2059
8b5860a3 2060static int do_shrink(struct inode *inode, u64 newsize)
b3b94faa 2061{
ff8f33c8 2062 struct gfs2_inode *ip = GFS2_I(inode);
b3b94faa
DT
2063 int error;
2064
8b5860a3 2065 error = trunc_start(inode, newsize);
b3b94faa
DT
2066 if (error < 0)
2067 return error;
ff8f33c8 2068 if (gfs2_is_stuffed(ip))
b3b94faa
DT
2069 return 0;
2070
10d2cf94 2071 error = punch_hole(ip, newsize, 0);
ff8f33c8 2072 if (error == 0)
b3b94faa
DT
2073 error = trunc_end(ip);
2074
2075 return error;
2076}
2077
ff8f33c8 2078void gfs2_trim_blocks(struct inode *inode)
a13b8c5f 2079{
ff8f33c8
SW
2080 int ret;
2081
8b5860a3 2082 ret = do_shrink(inode, inode->i_size);
ff8f33c8
SW
2083 WARN_ON(ret != 0);
2084}
2085
2086/**
2087 * do_grow - Touch and update inode size
2088 * @inode: The inode
2089 * @size: The new size
2090 *
2091 * This function updates the timestamps on the inode and
2092 * may also increase the size of the inode. This function
2093 * must not be called with @size any smaller than the current
2094 * inode size.
2095 *
2096 * Although it is not strictly required to unstuff files here,
2097 * earlier versions of GFS2 have a bug in the stuffed file reading
2098 * code which will result in a buffer overrun if the size is larger
2099 * than the max stuffed file size. In order to prevent this from
25985edc 2100 * occurring, such files are unstuffed, but in other cases we can
ff8f33c8
SW
2101 * just update the inode size directly.
2102 *
2103 * Returns: 0 on success, or -ve on error
2104 */
2105
2106static int do_grow(struct inode *inode, u64 size)
2107{
2108 struct gfs2_inode *ip = GFS2_I(inode);
2109 struct gfs2_sbd *sdp = GFS2_SB(inode);
7b9cff46 2110 struct gfs2_alloc_parms ap = { .target = 1, };
a13b8c5f
WC
2111 struct buffer_head *dibh;
2112 int error;
2f7ee358 2113 int unstuff = 0;
a13b8c5f 2114
235628c5 2115 if (gfs2_is_stuffed(ip) && size > gfs2_max_stuffed_size(ip)) {
b8fbf471 2116 error = gfs2_quota_lock_check(ip, &ap);
ff8f33c8 2117 if (error)
5407e242 2118 return error;
ff8f33c8 2119
7b9cff46 2120 error = gfs2_inplace_reserve(ip, &ap);
ff8f33c8
SW
2121 if (error)
2122 goto do_grow_qunlock;
2f7ee358 2123 unstuff = 1;
ff8f33c8
SW
2124 }
2125
a01aedfe 2126 error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT +
bc020561
BP
2127 (unstuff &&
2128 gfs2_is_jdata(ip) ? RES_JDATA : 0) +
a01aedfe
BP
2129 (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF ?
2130 0 : RES_QUOTA), 0);
a13b8c5f 2131 if (error)
ff8f33c8 2132 goto do_grow_release;
a13b8c5f 2133
2f7ee358 2134 if (unstuff) {
ff8f33c8
SW
2135 error = gfs2_unstuff_dinode(ip, NULL);
2136 if (error)
2137 goto do_end_trans;
2138 }
a13b8c5f
WC
2139
2140 error = gfs2_meta_inode_buffer(ip, &dibh);
2141 if (error)
ff8f33c8 2142 goto do_end_trans;
a13b8c5f 2143
b473bc2d 2144 truncate_setsize(inode, size);
078cd827 2145 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
350a9b0a 2146 gfs2_trans_add_meta(ip->i_gl, dibh);
a13b8c5f
WC
2147 gfs2_dinode_out(ip, dibh->b_data);
2148 brelse(dibh);
2149
ff8f33c8 2150do_end_trans:
a13b8c5f 2151 gfs2_trans_end(sdp);
ff8f33c8 2152do_grow_release:
2f7ee358 2153 if (unstuff) {
ff8f33c8
SW
2154 gfs2_inplace_release(ip);
2155do_grow_qunlock:
2156 gfs2_quota_unlock(ip);
ff8f33c8 2157 }
a13b8c5f
WC
2158 return error;
2159}
2160
b3b94faa 2161/**
ff8f33c8
SW
2162 * gfs2_setattr_size - make a file a given size
2163 * @inode: the inode
2164 * @newsize: the size to make the file
b3b94faa 2165 *
ff8f33c8 2166 * The file size can grow, shrink, or stay the same size. This
3e7aafc3 2167 * is called holding i_rwsem and an exclusive glock on the inode
ff8f33c8 2168 * in question.
b3b94faa
DT
2169 *
2170 * Returns: errno
2171 */
2172
ff8f33c8 2173int gfs2_setattr_size(struct inode *inode, u64 newsize)
b3b94faa 2174{
af5c2697 2175 struct gfs2_inode *ip = GFS2_I(inode);
ff8f33c8 2176 int ret;
b3b94faa 2177
ff8f33c8 2178 BUG_ON(!S_ISREG(inode->i_mode));
b3b94faa 2179
ff8f33c8
SW
2180 ret = inode_newsize_ok(inode, newsize);
2181 if (ret)
2182 return ret;
b3b94faa 2183
562c72aa
CH
2184 inode_dio_wait(inode);
2185
2fba46a0 2186 ret = gfs2_qa_get(ip);
d2b47cfb 2187 if (ret)
2b3dcf35 2188 goto out;
d2b47cfb 2189
8b5860a3 2190 if (newsize >= inode->i_size) {
2b3dcf35
BP
2191 ret = do_grow(inode, newsize);
2192 goto out;
2193 }
ff8f33c8 2194
8b5860a3 2195 ret = do_shrink(inode, newsize);
2b3dcf35 2196out:
1595548f
AG
2197 gfs2_rs_delete(ip, NULL);
2198 gfs2_qa_put(ip);
2b3dcf35 2199 return ret;
b3b94faa
DT
2200}
2201
2202int gfs2_truncatei_resume(struct gfs2_inode *ip)
2203{
2204 int error;
10d2cf94 2205 error = punch_hole(ip, i_size_read(&ip->i_inode), 0);
b3b94faa
DT
2206 if (!error)
2207 error = trunc_end(ip);
2208 return error;
2209}
2210
2211int gfs2_file_dealloc(struct gfs2_inode *ip)
2212{
10d2cf94 2213 return punch_hole(ip, 0, 0);
b3b94faa
DT
2214}
2215
b50f227b
SW
2216/**
2217 * gfs2_free_journal_extents - Free cached journal bmap info
2218 * @jd: The journal
2219 *
2220 */
2221
2222void gfs2_free_journal_extents(struct gfs2_jdesc *jd)
2223{
2224 struct gfs2_journal_extent *jext;
2225
2226 while(!list_empty(&jd->extent_list)) {
969183bc 2227 jext = list_first_entry(&jd->extent_list, struct gfs2_journal_extent, list);
b50f227b
SW
2228 list_del(&jext->list);
2229 kfree(jext);
2230 }
2231}
2232
2233/**
2234 * gfs2_add_jextent - Add or merge a new extent to extent cache
2235 * @jd: The journal descriptor
2236 * @lblock: The logical block at start of new extent
c62baf65 2237 * @dblock: The physical block at start of new extent
b50f227b
SW
2238 * @blocks: Size of extent in fs blocks
2239 *
2240 * Returns: 0 on success or -ENOMEM
2241 */
2242
2243static int gfs2_add_jextent(struct gfs2_jdesc *jd, u64 lblock, u64 dblock, u64 blocks)
2244{
2245 struct gfs2_journal_extent *jext;
2246
2247 if (!list_empty(&jd->extent_list)) {
969183bc 2248 jext = list_last_entry(&jd->extent_list, struct gfs2_journal_extent, list);
b50f227b
SW
2249 if ((jext->dblock + jext->blocks) == dblock) {
2250 jext->blocks += blocks;
2251 return 0;
2252 }
2253 }
2254
2255 jext = kzalloc(sizeof(struct gfs2_journal_extent), GFP_NOFS);
2256 if (jext == NULL)
2257 return -ENOMEM;
2258 jext->dblock = dblock;
2259 jext->lblock = lblock;
2260 jext->blocks = blocks;
2261 list_add_tail(&jext->list, &jd->extent_list);
2262 jd->nr_extents++;
2263 return 0;
2264}
2265
2266/**
2267 * gfs2_map_journal_extents - Cache journal bmap info
2268 * @sdp: The super block
2269 * @jd: The journal to map
2270 *
2271 * Create a reusable "extent" mapping from all logical
2272 * blocks to all physical blocks for the given journal. This will save
2273 * us time when writing journal blocks. Most journals will have only one
2274 * extent that maps all their logical blocks. That's because gfs2.mkfs
2275 * arranges the journal blocks sequentially to maximize performance.
2276 * So the extent would map the first block for the entire file length.
2277 * However, gfs2_jadd can happen while file activity is happening, so
2278 * those journals may not be sequential. Less likely is the case where
2279 * the users created their own journals by mounting the metafs and
2280 * laying it out. But it's still possible. These journals might have
2281 * several extents.
2282 *
2283 * Returns: 0 on success, or error on failure
2284 */
2285
2286int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd)
2287{
2288 u64 lblock = 0;
2289 u64 lblock_stop;
2290 struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
2291 struct buffer_head bh;
2292 unsigned int shift = sdp->sd_sb.sb_bsize_shift;
2293 u64 size;
2294 int rc;
98583b3e 2295 ktime_t start, end;
b50f227b 2296
98583b3e 2297 start = ktime_get();
b50f227b
SW
2298 lblock_stop = i_size_read(jd->jd_inode) >> shift;
2299 size = (lblock_stop - lblock) << shift;
2300 jd->nr_extents = 0;
2301 WARN_ON(!list_empty(&jd->extent_list));
2302
2303 do {
2304 bh.b_state = 0;
2305 bh.b_blocknr = 0;
2306 bh.b_size = size;
2307 rc = gfs2_block_map(jd->jd_inode, lblock, &bh, 0);
2308 if (rc || !buffer_mapped(&bh))
2309 goto fail;
2310 rc = gfs2_add_jextent(jd, lblock, bh.b_blocknr, bh.b_size >> shift);
2311 if (rc)
2312 goto fail;
2313 size -= bh.b_size;
2314 lblock += (bh.b_size >> ip->i_inode.i_blkbits);
2315 } while(size > 0);
2316
98583b3e
AD
2317 end = ktime_get();
2318 fs_info(sdp, "journal %d mapped with %u extents in %lldms\n", jd->jd_jid,
2319 jd->nr_extents, ktime_ms_delta(end, start));
b50f227b
SW
2320 return 0;
2321
2322fail:
2323 fs_warn(sdp, "error %d mapping journal %u at offset %llu (extent %u)\n",
2324 rc, jd->jd_jid,
2325 (unsigned long long)(i_size_read(jd->jd_inode) - size),
2326 jd->nr_extents);
2327 fs_warn(sdp, "bmap=%d lblock=%llu block=%llu, state=0x%08lx, size=%llu\n",
2328 rc, (unsigned long long)lblock, (unsigned long long)bh.b_blocknr,
2329 bh.b_state, (unsigned long long)bh.b_size);
2330 gfs2_free_journal_extents(jd);
2331 return rc;
2332}
2333
b3b94faa
DT
2334/**
2335 * gfs2_write_alloc_required - figure out if a write will require an allocation
2336 * @ip: the file being written to
2337 * @offset: the offset to write to
2338 * @len: the number of bytes being written
b3b94faa 2339 *
461cb419 2340 * Returns: 1 if an alloc is required, 0 otherwise
b3b94faa
DT
2341 */
2342
cd915493 2343int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
461cb419 2344 unsigned int len)
b3b94faa 2345{
feaa7bba 2346 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
941e6d7d
SW
2347 struct buffer_head bh;
2348 unsigned int shift;
2349 u64 lblock, lblock_stop, size;
7ed122e4 2350 u64 end_of_file;
b3b94faa 2351
b3b94faa
DT
2352 if (!len)
2353 return 0;
2354
2355 if (gfs2_is_stuffed(ip)) {
235628c5 2356 if (offset + len > gfs2_max_stuffed_size(ip))
461cb419 2357 return 1;
b3b94faa
DT
2358 return 0;
2359 }
2360
941e6d7d 2361 shift = sdp->sd_sb.sb_bsize_shift;
7ed122e4 2362 BUG_ON(gfs2_is_dir(ip));
a2e0f799 2363 end_of_file = (i_size_read(&ip->i_inode) + sdp->sd_sb.sb_bsize - 1) >> shift;
7ed122e4
SW
2364 lblock = offset >> shift;
2365 lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
77612578 2366 if (lblock_stop > end_of_file && ip != GFS2_I(sdp->sd_rindex))
461cb419 2367 return 1;
b3b94faa 2368
941e6d7d
SW
2369 size = (lblock_stop - lblock) << shift;
2370 do {
2371 bh.b_state = 0;
2372 bh.b_size = size;
2373 gfs2_block_map(&ip->i_inode, lblock, &bh, 0);
2374 if (!buffer_mapped(&bh))
461cb419 2375 return 1;
941e6d7d
SW
2376 size -= bh.b_size;
2377 lblock += (bh.b_size >> ip->i_inode.i_blkbits);
2378 } while(size > 0);
b3b94faa
DT
2379
2380 return 0;
2381}
2382
4e56a641
AG
2383static int stuffed_zero_range(struct inode *inode, loff_t offset, loff_t length)
2384{
2385 struct gfs2_inode *ip = GFS2_I(inode);
2386 struct buffer_head *dibh;
2387 int error;
2388
2389 if (offset >= inode->i_size)
2390 return 0;
2391 if (offset + length > inode->i_size)
2392 length = inode->i_size - offset;
2393
2394 error = gfs2_meta_inode_buffer(ip, &dibh);
2395 if (error)
2396 return error;
2397 gfs2_trans_add_meta(ip->i_gl, dibh);
2398 memset(dibh->b_data + sizeof(struct gfs2_dinode) + offset, 0,
2399 length);
2400 brelse(dibh);
2401 return 0;
2402}
2403
2404static int gfs2_journaled_truncate_range(struct inode *inode, loff_t offset,
2405 loff_t length)
2406{
2407 struct gfs2_sbd *sdp = GFS2_SB(inode);
2408 loff_t max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
2409 int error;
2410
2411 while (length) {
2412 struct gfs2_trans *tr;
2413 loff_t chunk;
2414 unsigned int offs;
2415
2416 chunk = length;
2417 if (chunk > max_chunk)
2418 chunk = max_chunk;
2419
2420 offs = offset & ~PAGE_MASK;
2421 if (offs && chunk > PAGE_SIZE)
2422 chunk = offs + ((chunk - offs) & PAGE_MASK);
2423
2424 truncate_pagecache_range(inode, offset, chunk);
2425 offset += chunk;
2426 length -= chunk;
2427
2428 tr = current->journal_info;
2429 if (!test_bit(TR_TOUCHED, &tr->tr_flags))
2430 continue;
2431
2432 gfs2_trans_end(sdp);
2433 error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
2434 if (error)
2435 return error;
2436 }
2437 return 0;
2438}
2439
2440int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length)
2441{
2442 struct inode *inode = file_inode(file);
2443 struct gfs2_inode *ip = GFS2_I(inode);
2444 struct gfs2_sbd *sdp = GFS2_SB(inode);
39c3a948
AG
2445 unsigned int blocksize = i_blocksize(inode);
2446 loff_t start, end;
4e56a641
AG
2447 int error;
2448
39c3a948
AG
2449 start = round_down(offset, blocksize);
2450 end = round_up(offset + length, blocksize) - 1;
2451 error = filemap_write_and_wait_range(inode->i_mapping, start, end);
2452 if (error)
2453 return error;
2454
4e56a641
AG
2455 if (gfs2_is_jdata(ip))
2456 error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_JDATA,
2457 GFS2_JTRUNC_REVOKES);
2458 else
2459 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
2460 if (error)
2461 return error;
2462
2463 if (gfs2_is_stuffed(ip)) {
2464 error = stuffed_zero_range(inode, offset, length);
2465 if (error)
2466 goto out;
2467 } else {
39c3a948 2468 unsigned int start_off, end_len;
4e56a641 2469
4e56a641 2470 start_off = offset & (blocksize - 1);
00251a16 2471 end_len = (offset + length) & (blocksize - 1);
4e56a641
AG
2472 if (start_off) {
2473 unsigned int len = length;
2474 if (length > blocksize - start_off)
2475 len = blocksize - start_off;
2476 error = gfs2_block_zero_range(inode, offset, len);
2477 if (error)
2478 goto out;
2479 if (start_off + length < blocksize)
00251a16 2480 end_len = 0;
4e56a641 2481 }
00251a16 2482 if (end_len) {
4e56a641 2483 error = gfs2_block_zero_range(inode,
00251a16 2484 offset + length - end_len, end_len);
4e56a641
AG
2485 if (error)
2486 goto out;
2487 }
2488 }
2489
2490 if (gfs2_is_jdata(ip)) {
2491 BUG_ON(!current->journal_info);
2492 gfs2_journaled_truncate_range(inode, offset, length);
2493 } else
2494 truncate_pagecache_range(inode, offset, offset + length - 1);
2495
2496 file_update_time(file);
2497 mark_inode_dirty(inode);
2498
2499 if (current->journal_info)
2500 gfs2_trans_end(sdp);
2501
2502 if (!gfs2_is_stuffed(ip))
2503 error = punch_hole(ip, offset, length);
2504
2505out:
2506 if (current->journal_info)
2507 gfs2_trans_end(sdp);
2508 return error;
2509}