]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - fs/gfs2/bmap.c
GFS2: gfs2_free_extlen can return an extent that is too long
[mirror_ubuntu-jammy-kernel.git] / fs / gfs2 / bmap.c
CommitLineData
b3b94faa
DT
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3a8a9a10 3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
b3b94faa
DT
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
e9fc2aa0 7 * of the GNU General Public License version 2.
b3b94faa
DT
8 */
9
b3b94faa
DT
10#include <linux/spinlock.h>
11#include <linux/completion.h>
12#include <linux/buffer_head.h>
64dd153c 13#include <linux/blkdev.h>
5c676f6d 14#include <linux/gfs2_ondisk.h>
71b86f56 15#include <linux/crc32.h>
3974320c 16#include <linux/iomap.h>
b3b94faa
DT
17
18#include "gfs2.h"
5c676f6d 19#include "incore.h"
b3b94faa
DT
20#include "bmap.h"
21#include "glock.h"
22#include "inode.h"
b3b94faa 23#include "meta_io.h"
b3b94faa
DT
24#include "quota.h"
25#include "rgrp.h"
45138990 26#include "log.h"
4c16c36a 27#include "super.h"
b3b94faa 28#include "trans.h"
18ec7d5c 29#include "dir.h"
5c676f6d 30#include "util.h"
63997775 31#include "trace_gfs2.h"
b3b94faa
DT
32
33/* This doesn't need to be that large as max 64 bit pointers in a 4k
34 * block is 512, so __u16 is fine for that. It saves stack space to
35 * keep it small.
36 */
37struct metapath {
dbac6710 38 struct buffer_head *mp_bh[GFS2_MAX_META_HEIGHT];
b3b94faa 39 __u16 mp_list[GFS2_MAX_META_HEIGHT];
5f8bd444
BP
40 int mp_fheight; /* find_metapath height */
41 int mp_aheight; /* actual height (lookup height) */
b3b94faa
DT
42};
43
f25ef0c1
SW
44/**
45 * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page
46 * @ip: the inode
47 * @dibh: the dinode buffer
48 * @block: the block number that was allocated
ff8f33c8 49 * @page: The (optional) page. This is looked up if @page is NULL
f25ef0c1
SW
50 *
51 * Returns: errno
52 */
53
54static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
cd915493 55 u64 block, struct page *page)
f25ef0c1 56{
f25ef0c1
SW
57 struct inode *inode = &ip->i_inode;
58 struct buffer_head *bh;
59 int release = 0;
60
61 if (!page || page->index) {
220cca2a 62 page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS);
f25ef0c1
SW
63 if (!page)
64 return -ENOMEM;
65 release = 1;
66 }
67
68 if (!PageUptodate(page)) {
69 void *kaddr = kmap(page);
602c89d2
SW
70 u64 dsize = i_size_read(inode);
71
235628c5
AG
72 if (dsize > gfs2_max_stuffed_size(ip))
73 dsize = gfs2_max_stuffed_size(ip);
f25ef0c1 74
602c89d2 75 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
09cbfeaf 76 memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
f25ef0c1
SW
77 kunmap(page);
78
79 SetPageUptodate(page);
80 }
81
82 if (!page_has_buffers(page))
47a9a527
FF
83 create_empty_buffers(page, BIT(inode->i_blkbits),
84 BIT(BH_Uptodate));
f25ef0c1
SW
85
86 bh = page_buffers(page);
87
88 if (!buffer_mapped(bh))
89 map_bh(bh, inode->i_sb, block);
90
91 set_buffer_uptodate(bh);
eaf96527
SW
92 if (!gfs2_is_jdata(ip))
93 mark_buffer_dirty(bh);
bf36a713 94 if (!gfs2_is_writeback(ip))
350a9b0a 95 gfs2_trans_add_data(ip->i_gl, bh);
f25ef0c1
SW
96
97 if (release) {
98 unlock_page(page);
09cbfeaf 99 put_page(page);
f25ef0c1
SW
100 }
101
102 return 0;
103}
104
b3b94faa
DT
105/**
106 * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big
107 * @ip: The GFS2 inode to unstuff
ff8f33c8 108 * @page: The (optional) page. This is looked up if the @page is NULL
b3b94faa
DT
109 *
110 * This routine unstuffs a dinode and returns it to a "normal" state such
111 * that the height can be grown in the traditional way.
112 *
113 * Returns: errno
114 */
115
f25ef0c1 116int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
b3b94faa
DT
117{
118 struct buffer_head *bh, *dibh;
48516ced 119 struct gfs2_dinode *di;
cd915493 120 u64 block = 0;
18ec7d5c 121 int isdir = gfs2_is_dir(ip);
b3b94faa
DT
122 int error;
123
124 down_write(&ip->i_rw_mutex);
125
126 error = gfs2_meta_inode_buffer(ip, &dibh);
127 if (error)
128 goto out;
907b9bce 129
a2e0f799 130 if (i_size_read(&ip->i_inode)) {
b3b94faa
DT
131 /* Get a free block, fill it with the stuffed data,
132 and write it out to disk */
133
b45e41d7 134 unsigned int n = 1;
6e87ed0f 135 error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL);
09010978
SW
136 if (error)
137 goto out_brelse;
18ec7d5c 138 if (isdir) {
5731be53 139 gfs2_trans_add_unrevoke(GFS2_SB(&ip->i_inode), block, 1);
61e085a8 140 error = gfs2_dir_get_new_buffer(ip, block, &bh);
b3b94faa
DT
141 if (error)
142 goto out_brelse;
48516ced 143 gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_meta_header),
b3b94faa
DT
144 dibh, sizeof(struct gfs2_dinode));
145 brelse(bh);
146 } else {
f25ef0c1 147 error = gfs2_unstuffer_page(ip, dibh, block, page);
b3b94faa
DT
148 if (error)
149 goto out_brelse;
150 }
151 }
152
153 /* Set up the pointer to the new block */
154
350a9b0a 155 gfs2_trans_add_meta(ip->i_gl, dibh);
48516ced 156 di = (struct gfs2_dinode *)dibh->b_data;
b3b94faa
DT
157 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
158
a2e0f799 159 if (i_size_read(&ip->i_inode)) {
48516ced 160 *(__be64 *)(di + 1) = cpu_to_be64(block);
77658aad
SW
161 gfs2_add_inode_blocks(&ip->i_inode, 1);
162 di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
b3b94faa
DT
163 }
164
ecc30c79 165 ip->i_height = 1;
48516ced 166 di->di_height = cpu_to_be16(1);
b3b94faa 167
a91ea69f 168out_brelse:
b3b94faa 169 brelse(dibh);
a91ea69f 170out:
b3b94faa 171 up_write(&ip->i_rw_mutex);
b3b94faa
DT
172 return error;
173}
174
b3b94faa
DT
175
176/**
177 * find_metapath - Find path through the metadata tree
9b8c81d1 178 * @sdp: The superblock
b3b94faa 179 * @block: The disk block to look up
07e23d68 180 * @mp: The metapath to return the result in
9b8c81d1 181 * @height: The pre-calculated height of the metadata tree
b3b94faa
DT
182 *
183 * This routine returns a struct metapath structure that defines a path
184 * through the metadata of inode "ip" to get to block "block".
185 *
186 * Example:
187 * Given: "ip" is a height 3 file, "offset" is 101342453, and this is a
188 * filesystem with a blocksize of 4096.
189 *
190 * find_metapath() would return a struct metapath structure set to:
07e23d68 191 * mp_fheight = 3, mp_list[0] = 0, mp_list[1] = 48, and mp_list[2] = 165.
b3b94faa
DT
192 *
193 * That means that in order to get to the block containing the byte at
194 * offset 101342453, we would load the indirect block pointed to by pointer
195 * 0 in the dinode. We would then load the indirect block pointed to by
196 * pointer 48 in that indirect block. We would then load the data block
197 * pointed to by pointer 165 in that indirect block.
198 *
199 * ----------------------------------------
200 * | Dinode | |
201 * | | 4|
202 * | |0 1 2 3 4 5 9|
203 * | | 6|
204 * ----------------------------------------
205 * |
206 * |
207 * V
208 * ----------------------------------------
209 * | Indirect Block |
210 * | 5|
211 * | 4 4 4 4 4 5 5 1|
212 * |0 5 6 7 8 9 0 1 2|
213 * ----------------------------------------
214 * |
215 * |
216 * V
217 * ----------------------------------------
218 * | Indirect Block |
219 * | 1 1 1 1 1 5|
220 * | 6 6 6 6 6 1|
221 * |0 3 4 5 6 7 2|
222 * ----------------------------------------
223 * |
224 * |
225 * V
226 * ----------------------------------------
227 * | Data block containing offset |
228 * | 101342453 |
229 * | |
230 * | |
231 * ----------------------------------------
232 *
233 */
234
9b8c81d1
SW
235static void find_metapath(const struct gfs2_sbd *sdp, u64 block,
236 struct metapath *mp, unsigned int height)
b3b94faa 237{
b3b94faa
DT
238 unsigned int i;
239
5f8bd444 240 mp->mp_fheight = height;
9b8c81d1 241 for (i = height; i--;)
7eabb77e 242 mp->mp_list[i] = do_div(block, sdp->sd_inptrs);
b3b94faa
DT
243}
244
5af4e7a0 245static inline unsigned int metapath_branch_start(const struct metapath *mp)
9b8c81d1 246{
5af4e7a0
BM
247 if (mp->mp_list[0] == 0)
248 return 2;
249 return 1;
9b8c81d1
SW
250}
251
d552a2b9 252/**
20cdc193 253 * metaptr1 - Return the first possible metadata pointer in a metapath buffer
d552a2b9
BP
254 * @height: The metadata height (0 = dinode)
255 * @mp: The metapath
256 */
257static inline __be64 *metaptr1(unsigned int height, const struct metapath *mp)
258{
259 struct buffer_head *bh = mp->mp_bh[height];
260 if (height == 0)
261 return ((__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)));
262 return ((__be64 *)(bh->b_data + sizeof(struct gfs2_meta_header)));
263}
264
b3b94faa
DT
265/**
266 * metapointer - Return pointer to start of metadata in a buffer
b3b94faa
DT
267 * @height: The metadata height (0 = dinode)
268 * @mp: The metapath
269 *
270 * Return a pointer to the block number of the next height of the metadata
271 * tree given a buffer containing the pointer to the current height of the
272 * metadata tree.
273 */
274
9b8c81d1 275static inline __be64 *metapointer(unsigned int height, const struct metapath *mp)
b3b94faa 276{
d552a2b9
BP
277 __be64 *p = metaptr1(height, mp);
278 return p + mp->mp_list[height];
b3b94faa
DT
279}
280
5cf26b1e 281static void gfs2_metapath_ra(struct gfs2_glock *gl, __be64 *start, __be64 *end)
b99b98dc 282{
b99b98dc
SW
283 const __be64 *t;
284
5cf26b1e 285 for (t = start; t < end; t++) {
c3ce5aa9
AG
286 struct buffer_head *rabh;
287
b99b98dc
SW
288 if (!*t)
289 continue;
290
291 rabh = gfs2_getbuf(gl, be64_to_cpu(*t), CREATE);
292 if (trylock_buffer(rabh)) {
293 if (!buffer_uptodate(rabh)) {
294 rabh->b_end_io = end_buffer_read_sync;
e477b24b
CL
295 submit_bh(REQ_OP_READ,
296 REQ_RAHEAD | REQ_META | REQ_PRIO,
297 rabh);
b99b98dc
SW
298 continue;
299 }
300 unlock_buffer(rabh);
301 }
302 brelse(rabh);
303 }
304}
305
e8b43fe0
AG
306static int __fillup_metapath(struct gfs2_inode *ip, struct metapath *mp,
307 unsigned int x, unsigned int h)
d552a2b9 308{
e8b43fe0
AG
309 for (; x < h; x++) {
310 __be64 *ptr = metapointer(x, mp);
311 u64 dblock = be64_to_cpu(*ptr);
312 int ret;
d552a2b9 313
e8b43fe0
AG
314 if (!dblock)
315 break;
316 ret = gfs2_meta_indirect_buffer(ip, x + 1, dblock, &mp->mp_bh[x + 1]);
317 if (ret)
318 return ret;
319 }
320 mp->mp_aheight = x + 1;
321 return 0;
d552a2b9
BP
322}
323
b3b94faa 324/**
9b8c81d1
SW
325 * lookup_metapath - Walk the metadata tree to a specific point
326 * @ip: The inode
b3b94faa 327 * @mp: The metapath
b3b94faa 328 *
9b8c81d1
SW
329 * Assumes that the inode's buffer has already been looked up and
330 * hooked onto mp->mp_bh[0] and that the metapath has been initialised
331 * by find_metapath().
332 *
333 * If this function encounters part of the tree which has not been
334 * allocated, it returns the current height of the tree at the point
335 * at which it found the unallocated block. Blocks which are found are
336 * added to the mp->mp_bh[] list.
b3b94faa 337 *
e8b43fe0 338 * Returns: error
b3b94faa
DT
339 */
340
9b8c81d1 341static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp)
11707ea0 342{
e8b43fe0 343 return __fillup_metapath(ip, mp, 0, ip->i_height - 1);
dbac6710
SW
344}
345
d552a2b9
BP
346/**
347 * fillup_metapath - fill up buffers for the metadata path to a specific height
348 * @ip: The inode
349 * @mp: The metapath
350 * @h: The height to which it should be mapped
351 *
352 * Similar to lookup_metapath, but does lookups for a range of heights
353 *
c3ce5aa9 354 * Returns: error or the number of buffers filled
d552a2b9
BP
355 */
356
357static int fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h)
358{
e8b43fe0 359 unsigned int x = 0;
c3ce5aa9 360 int ret;
d552a2b9
BP
361
362 if (h) {
363 /* find the first buffer we need to look up. */
e8b43fe0
AG
364 for (x = h - 1; x > 0; x--) {
365 if (mp->mp_bh[x])
366 break;
d552a2b9
BP
367 }
368 }
c3ce5aa9
AG
369 ret = __fillup_metapath(ip, mp, x, h);
370 if (ret)
371 return ret;
372 return mp->mp_aheight - x - 1;
d552a2b9
BP
373}
374
9b8c81d1 375static inline void release_metapath(struct metapath *mp)
dbac6710
SW
376{
377 int i;
378
9b8c81d1
SW
379 for (i = 0; i < GFS2_MAX_META_HEIGHT; i++) {
380 if (mp->mp_bh[i] == NULL)
381 break;
382 brelse(mp->mp_bh[i]);
383 }
11707ea0
SW
384}
385
30cbf189
SW
386/**
387 * gfs2_extent_length - Returns length of an extent of blocks
388 * @start: Start of the buffer
389 * @len: Length of the buffer in bytes
390 * @ptr: Current position in the buffer
391 * @limit: Max extent length to return (0 = unlimited)
392 * @eob: Set to 1 if we hit "end of block"
393 *
394 * If the first block is zero (unallocated) it will return the number of
395 * unallocated blocks in the extent, otherwise it will return the number
396 * of contiguous blocks in the extent.
397 *
398 * Returns: The length of the extent (minimum of one block)
399 */
400
b650738c 401static inline unsigned int gfs2_extent_length(void *start, unsigned int len, __be64 *ptr, size_t limit, int *eob)
30cbf189
SW
402{
403 const __be64 *end = (start + len);
404 const __be64 *first = ptr;
405 u64 d = be64_to_cpu(*ptr);
406
407 *eob = 0;
408 do {
409 ptr++;
410 if (ptr >= end)
411 break;
412 if (limit && --limit == 0)
413 break;
414 if (d)
415 d++;
416 } while(be64_to_cpu(*ptr) == d);
417 if (ptr >= end)
418 *eob = 1;
419 return (ptr - first);
420}
421
9b8c81d1 422static inline void bmap_lock(struct gfs2_inode *ip, int create)
4cf1ed81 423{
4cf1ed81
SW
424 if (create)
425 down_write(&ip->i_rw_mutex);
426 else
427 down_read(&ip->i_rw_mutex);
428}
429
9b8c81d1 430static inline void bmap_unlock(struct gfs2_inode *ip, int create)
4cf1ed81 431{
4cf1ed81
SW
432 if (create)
433 up_write(&ip->i_rw_mutex);
434 else
435 up_read(&ip->i_rw_mutex);
436}
437
9b8c81d1
SW
438static inline __be64 *gfs2_indirect_init(struct metapath *mp,
439 struct gfs2_glock *gl, unsigned int i,
440 unsigned offset, u64 bn)
441{
442 __be64 *ptr = (__be64 *)(mp->mp_bh[i - 1]->b_data +
443 ((i > 1) ? sizeof(struct gfs2_meta_header) :
444 sizeof(struct gfs2_dinode)));
445 BUG_ON(i < 1);
446 BUG_ON(mp->mp_bh[i] != NULL);
447 mp->mp_bh[i] = gfs2_meta_new(gl, bn);
350a9b0a 448 gfs2_trans_add_meta(gl, mp->mp_bh[i]);
9b8c81d1
SW
449 gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN);
450 gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header));
451 ptr += offset;
452 *ptr = cpu_to_be64(bn);
453 return ptr;
454}
455
456enum alloc_state {
457 ALLOC_DATA = 0,
458 ALLOC_GROW_DEPTH = 1,
459 ALLOC_GROW_HEIGHT = 2,
460 /* ALLOC_UNSTUFF = 3, TBD and rather complicated */
461};
462
463/**
464 * gfs2_bmap_alloc - Build a metadata tree of the requested height
465 * @inode: The GFS2 inode
466 * @lblock: The logical starting block of the extent
467 * @bh_map: This is used to return the mapping details
5f8bd444
BP
468 * @zero_new: True if newly allocated blocks should be zeroed
469 * @mp: The metapath, with proper height information calculated
9b8c81d1 470 * @maxlen: The max number of data blocks to alloc
5f8bd444
BP
471 * @dblock: Pointer to return the resulting new block
472 * @dblks: Pointer to return the number of blocks allocated
9b8c81d1
SW
473 *
474 * In this routine we may have to alloc:
475 * i) Indirect blocks to grow the metadata tree height
476 * ii) Indirect blocks to fill in lower part of the metadata tree
477 * iii) Data blocks
478 *
479 * The function is in two parts. The first part works out the total
480 * number of blocks which we need. The second part does the actual
481 * allocation asking for an extent at a time (if enough contiguous free
482 * blocks are available, there will only be one request per bmap call)
483 * and uses the state machine to initialise the blocks in order.
484 *
485 * Returns: errno on error
486 */
487
3974320c
BP
488static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
489 unsigned flags, struct metapath *mp)
9b8c81d1
SW
490{
491 struct gfs2_inode *ip = GFS2_I(inode);
492 struct gfs2_sbd *sdp = GFS2_SB(inode);
493 struct buffer_head *dibh = mp->mp_bh[0];
5f8bd444 494 u64 bn;
5af4e7a0 495 unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0;
3974320c 496 unsigned dblks = 0;
9b8c81d1 497 unsigned ptrs_per_blk;
5f8bd444 498 const unsigned end_of_metadata = mp->mp_fheight - 1;
9b8c81d1
SW
499 enum alloc_state state;
500 __be64 *ptr;
501 __be64 zero_bn = 0;
3974320c 502 size_t maxlen = iomap->length >> inode->i_blkbits;
9b8c81d1 503
5f8bd444 504 BUG_ON(mp->mp_aheight < 1);
9b8c81d1
SW
505 BUG_ON(dibh == NULL);
506
350a9b0a 507 gfs2_trans_add_meta(ip->i_gl, dibh);
9b8c81d1 508
5f8bd444 509 if (mp->mp_fheight == mp->mp_aheight) {
9b8c81d1 510 struct buffer_head *bh;
3974320c
BP
511 int eob;
512
9b8c81d1
SW
513 /* Bottom indirect block exists, find unalloced extent size */
514 ptr = metapointer(end_of_metadata, mp);
515 bh = mp->mp_bh[end_of_metadata];
3974320c
BP
516 dblks = gfs2_extent_length(bh->b_data, bh->b_size, ptr,
517 maxlen, &eob);
518 BUG_ON(dblks < 1);
9b8c81d1
SW
519 state = ALLOC_DATA;
520 } else {
521 /* Need to allocate indirect blocks */
5f8bd444
BP
522 ptrs_per_blk = mp->mp_fheight > 1 ? sdp->sd_inptrs :
523 sdp->sd_diptrs;
3974320c
BP
524 dblks = min(maxlen, (size_t)(ptrs_per_blk -
525 mp->mp_list[end_of_metadata]));
5f8bd444 526 if (mp->mp_fheight == ip->i_height) {
9b8c81d1 527 /* Writing into existing tree, extend tree down */
5f8bd444 528 iblks = mp->mp_fheight - mp->mp_aheight;
9b8c81d1
SW
529 state = ALLOC_GROW_DEPTH;
530 } else {
531 /* Building up tree height */
532 state = ALLOC_GROW_HEIGHT;
5f8bd444 533 iblks = mp->mp_fheight - ip->i_height;
5af4e7a0 534 branch_start = metapath_branch_start(mp);
5f8bd444 535 iblks += (mp->mp_fheight - branch_start);
9b8c81d1
SW
536 }
537 }
538
539 /* start of the second part of the function (state machine) */
540
3974320c 541 blks = dblks + iblks;
5f8bd444 542 i = mp->mp_aheight;
9b8c81d1 543 do {
09010978 544 int error;
9b8c81d1 545 n = blks - alloced;
6e87ed0f 546 error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
09010978
SW
547 if (error)
548 return error;
9b8c81d1
SW
549 alloced += n;
550 if (state != ALLOC_DATA || gfs2_is_jdata(ip))
551 gfs2_trans_add_unrevoke(sdp, bn, n);
552 switch (state) {
553 /* Growing height of tree */
554 case ALLOC_GROW_HEIGHT:
555 if (i == 1) {
556 ptr = (__be64 *)(dibh->b_data +
557 sizeof(struct gfs2_dinode));
558 zero_bn = *ptr;
559 }
5f8bd444
BP
560 for (; i - 1 < mp->mp_fheight - ip->i_height && n > 0;
561 i++, n--)
9b8c81d1 562 gfs2_indirect_init(mp, ip->i_gl, i, 0, bn++);
5f8bd444 563 if (i - 1 == mp->mp_fheight - ip->i_height) {
9b8c81d1
SW
564 i--;
565 gfs2_buffer_copy_tail(mp->mp_bh[i],
566 sizeof(struct gfs2_meta_header),
567 dibh, sizeof(struct gfs2_dinode));
568 gfs2_buffer_clear_tail(dibh,
569 sizeof(struct gfs2_dinode) +
570 sizeof(__be64));
571 ptr = (__be64 *)(mp->mp_bh[i]->b_data +
572 sizeof(struct gfs2_meta_header));
573 *ptr = zero_bn;
574 state = ALLOC_GROW_DEPTH;
5f8bd444 575 for(i = branch_start; i < mp->mp_fheight; i++) {
9b8c81d1
SW
576 if (mp->mp_bh[i] == NULL)
577 break;
578 brelse(mp->mp_bh[i]);
579 mp->mp_bh[i] = NULL;
580 }
5af4e7a0 581 i = branch_start;
9b8c81d1
SW
582 }
583 if (n == 0)
584 break;
585 /* Branching from existing tree */
586 case ALLOC_GROW_DEPTH:
5f8bd444 587 if (i > 1 && i < mp->mp_fheight)
350a9b0a 588 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[i-1]);
5f8bd444 589 for (; i < mp->mp_fheight && n > 0; i++, n--)
9b8c81d1
SW
590 gfs2_indirect_init(mp, ip->i_gl, i,
591 mp->mp_list[i-1], bn++);
5f8bd444 592 if (i == mp->mp_fheight)
9b8c81d1
SW
593 state = ALLOC_DATA;
594 if (n == 0)
595 break;
596 /* Tree complete, adding data blocks */
597 case ALLOC_DATA:
3974320c 598 BUG_ON(n > dblks);
9b8c81d1 599 BUG_ON(mp->mp_bh[end_of_metadata] == NULL);
350a9b0a 600 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[end_of_metadata]);
3974320c 601 dblks = n;
9b8c81d1 602 ptr = metapointer(end_of_metadata, mp);
3974320c
BP
603 iomap->addr = bn << inode->i_blkbits;
604 iomap->flags |= IOMAP_F_NEW;
9b8c81d1
SW
605 while (n-- > 0)
606 *ptr++ = cpu_to_be64(bn++);
607 break;
608 }
3974320c 609 } while (iomap->addr == IOMAP_NULL_ADDR);
9b8c81d1 610
3974320c 611 iomap->length = (u64)dblks << inode->i_blkbits;
5f8bd444 612 ip->i_height = mp->mp_fheight;
9b8c81d1
SW
613 gfs2_add_inode_blocks(&ip->i_inode, alloced);
614 gfs2_dinode_out(ip, mp->mp_bh[0]->b_data);
9b8c81d1
SW
615 return 0;
616}
617
b3b94faa 618/**
3974320c 619 * hole_size - figure out the size of a hole
fd88de56 620 * @inode: The inode
3974320c
BP
621 * @lblock: The logical starting block number
622 * @mp: The metapath
b3b94faa 623 *
3974320c 624 * Returns: The hole size in bytes
b3b94faa 625 *
b3b94faa 626 */
3974320c
BP
627static u64 hole_size(struct inode *inode, sector_t lblock, struct metapath *mp)
628{
629 struct gfs2_inode *ip = GFS2_I(inode);
630 struct gfs2_sbd *sdp = GFS2_SB(inode);
631 struct metapath mp_eof;
632 u64 factor = 1;
633 int hgt;
634 u64 holesz = 0;
635 const __be64 *first, *end, *ptr;
636 const struct buffer_head *bh;
637 u64 lblock_stop = (i_size_read(inode) - 1) >> inode->i_blkbits;
638 int zeroptrs;
639 bool done = false;
640
641 /* Get another metapath, to the very last byte */
642 find_metapath(sdp, lblock_stop, &mp_eof, ip->i_height);
643 for (hgt = ip->i_height - 1; hgt >= 0 && !done; hgt--) {
644 bh = mp->mp_bh[hgt];
645 if (bh) {
646 zeroptrs = 0;
647 first = metapointer(hgt, mp);
648 end = (const __be64 *)(bh->b_data + bh->b_size);
649
650 for (ptr = first; ptr < end; ptr++) {
651 if (*ptr) {
652 done = true;
653 break;
654 } else {
655 zeroptrs++;
656 }
657 }
658 } else {
659 zeroptrs = sdp->sd_inptrs;
660 }
661 if (factor * zeroptrs >= lblock_stop - lblock + 1) {
662 holesz = lblock_stop - lblock + 1;
663 break;
664 }
665 holesz += factor * zeroptrs;
b3b94faa 666
3974320c
BP
667 factor *= sdp->sd_inptrs;
668 if (hgt && (mp->mp_list[hgt - 1] < mp_eof.mp_list[hgt - 1]))
669 (mp->mp_list[hgt - 1])++;
670 }
671 return holesz << inode->i_blkbits;
672}
673
674static void gfs2_stuffed_iomap(struct inode *inode, struct iomap *iomap)
675{
676 struct gfs2_inode *ip = GFS2_I(inode);
677
678 iomap->addr = (ip->i_no_addr << inode->i_blkbits) +
679 sizeof(struct gfs2_dinode);
680 iomap->offset = 0;
681 iomap->length = i_size_read(inode);
682 iomap->type = IOMAP_MAPPED;
683 iomap->flags = IOMAP_F_DATA_INLINE;
684}
685
686/**
687 * gfs2_iomap_begin - Map blocks from an inode to disk blocks
688 * @inode: The inode
689 * @pos: Starting position in bytes
690 * @length: Length to map, in bytes
691 * @flags: iomap flags
692 * @iomap: The iomap structure
693 *
694 * Returns: errno
695 */
696int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
697 unsigned flags, struct iomap *iomap)
b3b94faa 698{
feaa7bba
SW
699 struct gfs2_inode *ip = GFS2_I(inode);
700 struct gfs2_sbd *sdp = GFS2_SB(inode);
3974320c 701 struct metapath mp = { .mp_aheight = 1, };
9b8c81d1 702 __be64 *ptr;
3974320c
BP
703 sector_t lblock;
704 sector_t lend;
49edd5bf 705 int ret = 0;
9b8c81d1
SW
706 int eob;
707 unsigned int len;
708 struct buffer_head *bh;
709 u8 height;
7276b3b0 710
3974320c
BP
711 trace_gfs2_iomap_start(ip, pos, length, flags);
712 if (!length) {
713 ret = -EINVAL;
714 goto out;
715 }
b3b94faa 716
49edd5bf
AG
717 if (gfs2_is_stuffed(ip)) {
718 if (flags & IOMAP_REPORT) {
719 gfs2_stuffed_iomap(inode, iomap);
720 if (pos >= iomap->length)
721 ret = -ENOENT;
722 goto out;
723 }
724 BUG_ON(!(flags & IOMAP_WRITE));
3974320c
BP
725 }
726
727 lblock = pos >> inode->i_blkbits;
728 lend = (pos + length + sdp->sd_sb.sb_bsize - 1) >> inode->i_blkbits;
729
730 iomap->offset = lblock << inode->i_blkbits;
731 iomap->addr = IOMAP_NULL_ADDR;
732 iomap->type = IOMAP_HOLE;
733 iomap->length = (u64)(lend - lblock) << inode->i_blkbits;
734 iomap->flags = IOMAP_F_MERGED;
49edd5bf 735 bmap_lock(ip, flags & IOMAP_WRITE);
20cdc193 736
9b8c81d1
SW
737 ret = gfs2_meta_inode_buffer(ip, &mp.mp_bh[0]);
738 if (ret)
3974320c 739 goto out_release;
b3b94faa 740
9b8c81d1 741 height = ip->i_height;
9a38662b 742 while ((lblock + 1) * sdp->sd_sb.sb_bsize > sdp->sd_heightsize[height])
9b8c81d1
SW
743 height++;
744 find_metapath(sdp, lblock, &mp, height);
9b8c81d1
SW
745 if (height > ip->i_height || gfs2_is_stuffed(ip))
746 goto do_alloc;
3974320c 747
9b8c81d1 748 ret = lookup_metapath(ip, &mp);
e8b43fe0 749 if (ret)
3974320c
BP
750 goto out_release;
751
5f8bd444 752 if (mp.mp_aheight != ip->i_height)
9b8c81d1 753 goto do_alloc;
3974320c 754
9b8c81d1
SW
755 ptr = metapointer(ip->i_height - 1, &mp);
756 if (*ptr == 0)
757 goto do_alloc;
3974320c
BP
758
759 iomap->type = IOMAP_MAPPED;
760 iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits;
761
9b8c81d1 762 bh = mp.mp_bh[ip->i_height - 1];
3974320c 763 len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, lend - lblock, &eob);
9b8c81d1 764 if (eob)
3974320c
BP
765 iomap->flags |= IOMAP_F_BOUNDARY;
766 iomap->length = (u64)len << inode->i_blkbits;
767
3974320c 768out_release:
9b8c81d1 769 release_metapath(&mp);
49edd5bf 770 bmap_unlock(ip, flags & IOMAP_WRITE);
3974320c
BP
771out:
772 trace_gfs2_iomap_end(ip, iomap, ret);
9b8c81d1 773 return ret;
30cbf189 774
9b8c81d1 775do_alloc:
49edd5bf
AG
776 if (flags & IOMAP_WRITE) {
777 ret = gfs2_iomap_alloc(inode, iomap, flags, &mp);
778 } else if (flags & IOMAP_REPORT) {
779 loff_t size = i_size_read(inode);
780 if (pos >= size)
3974320c 781 ret = -ENOENT;
49edd5bf
AG
782 else if (height <= ip->i_height)
783 iomap->length = hole_size(inode, lblock, &mp);
784 else
785 iomap->length = size - pos;
b3b94faa 786 }
3974320c
BP
787 goto out_release;
788}
789
790/**
d39d18e0 791 * gfs2_block_map - Map one or more blocks of an inode to a disk block
3974320c
BP
792 * @inode: The inode
793 * @lblock: The logical block number
794 * @bh_map: The bh to be mapped
795 * @create: True if its ok to alloc blocks to satify the request
796 *
d39d18e0
AG
797 * The size of the requested mapping is defined in bh_map->b_size.
798 *
799 * Clears buffer_mapped(bh_map) and leaves bh_map->b_size unchanged
800 * when @lblock is not mapped. Sets buffer_mapped(bh_map) and
801 * bh_map->b_size to indicate the size of the mapping when @lblock and
802 * successive blocks are mapped, up to the requested size.
803 *
804 * Sets buffer_boundary() if a read of metadata will be required
805 * before the next block can be mapped. Sets buffer_new() if new
806 * blocks were allocated.
3974320c
BP
807 *
808 * Returns: errno
809 */
810
811int gfs2_block_map(struct inode *inode, sector_t lblock,
812 struct buffer_head *bh_map, int create)
813{
814 struct gfs2_inode *ip = GFS2_I(inode);
815 struct iomap iomap;
816 int ret, flags = 0;
817
818 clear_buffer_mapped(bh_map);
819 clear_buffer_new(bh_map);
820 clear_buffer_boundary(bh_map);
821 trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
822
823 if (create)
824 flags |= IOMAP_WRITE;
3974320c
BP
825 ret = gfs2_iomap_begin(inode, (loff_t)lblock << inode->i_blkbits,
826 bh_map->b_size, flags, &iomap);
827 if (ret) {
828 if (!create && ret == -ENOENT) {
829 /* Return unmapped buffer beyond the end of file. */
830 ret = 0;
831 }
832 goto out;
833 }
834
835 if (iomap.length > bh_map->b_size) {
836 iomap.length = bh_map->b_size;
837 iomap.flags &= ~IOMAP_F_BOUNDARY;
5f8bd444 838 }
3974320c
BP
839 if (iomap.addr != IOMAP_NULL_ADDR)
840 map_bh(bh_map, inode->i_sb, iomap.addr >> inode->i_blkbits);
841 bh_map->b_size = iomap.length;
842 if (iomap.flags & IOMAP_F_BOUNDARY)
843 set_buffer_boundary(bh_map);
844 if (iomap.flags & IOMAP_F_NEW)
845 set_buffer_new(bh_map);
846
847out:
848 trace_gfs2_bmap(ip, bh_map, lblock, create, ret);
849 return ret;
fd88de56
SW
850}
851
941e6d7d
SW
852/*
853 * Deprecated: do not use in new code
854 */
fd88de56
SW
855int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen)
856{
23591256 857 struct buffer_head bh = { .b_state = 0, .b_blocknr = 0 };
7a6bbacb 858 int ret;
fd88de56
SW
859 int create = *new;
860
861 BUG_ON(!extlen);
862 BUG_ON(!dblock);
863 BUG_ON(!new);
864
47a9a527 865 bh.b_size = BIT(inode->i_blkbits + (create ? 0 : 5));
e9e1ef2b 866 ret = gfs2_block_map(inode, lblock, &bh, create);
7a6bbacb
SW
867 *extlen = bh.b_size >> inode->i_blkbits;
868 *dblock = bh.b_blocknr;
869 if (buffer_new(&bh))
870 *new = 1;
871 else
872 *new = 0;
873 return ret;
b3b94faa
DT
874}
875
ba7f7290 876/**
bdba0d5e 877 * gfs2_block_zero_range - Deal with zeroing out data
ba7f7290
SW
878 *
879 * This is partly borrowed from ext3.
880 */
bdba0d5e
AG
881static int gfs2_block_zero_range(struct inode *inode, loff_t from,
882 unsigned int length)
ba7f7290 883{
bdba0d5e 884 struct address_space *mapping = inode->i_mapping;
ba7f7290 885 struct gfs2_inode *ip = GFS2_I(inode);
09cbfeaf
KS
886 unsigned long index = from >> PAGE_SHIFT;
887 unsigned offset = from & (PAGE_SIZE-1);
bdba0d5e 888 unsigned blocksize, iblock, pos;
ba7f7290
SW
889 struct buffer_head *bh;
890 struct page *page;
ba7f7290
SW
891 int err;
892
220cca2a 893 page = find_or_create_page(mapping, index, GFP_NOFS);
ba7f7290
SW
894 if (!page)
895 return 0;
896
897 blocksize = inode->i_sb->s_blocksize;
09cbfeaf 898 iblock = index << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits);
ba7f7290
SW
899
900 if (!page_has_buffers(page))
901 create_empty_buffers(page, blocksize, 0);
902
903 /* Find the buffer that contains "offset" */
904 bh = page_buffers(page);
905 pos = blocksize;
906 while (offset >= pos) {
907 bh = bh->b_this_page;
908 iblock++;
909 pos += blocksize;
910 }
911
912 err = 0;
913
914 if (!buffer_mapped(bh)) {
e9e1ef2b 915 gfs2_block_map(inode, iblock, bh, 0);
ba7f7290
SW
916 /* unmapped? It's a hole - nothing to do */
917 if (!buffer_mapped(bh))
918 goto unlock;
919 }
920
921 /* Ok, it's mapped. Make sure it's up-to-date */
922 if (PageUptodate(page))
923 set_buffer_uptodate(bh);
924
925 if (!buffer_uptodate(bh)) {
926 err = -EIO;
dfec8a14 927 ll_rw_block(REQ_OP_READ, 0, 1, &bh);
ba7f7290
SW
928 wait_on_buffer(bh);
929 /* Uhhuh. Read error. Complain and punt. */
930 if (!buffer_uptodate(bh))
931 goto unlock;
1875f2f3 932 err = 0;
ba7f7290
SW
933 }
934
bf36a713 935 if (!gfs2_is_writeback(ip))
350a9b0a 936 gfs2_trans_add_data(ip->i_gl, bh);
ba7f7290 937
eebd2aa3 938 zero_user(page, offset, length);
40bc9a27 939 mark_buffer_dirty(bh);
ba7f7290
SW
940unlock:
941 unlock_page(page);
09cbfeaf 942 put_page(page);
ba7f7290
SW
943 return err;
944}
945
c62baf65
FF
946#define GFS2_JTRUNC_REVOKES 8192
947
fa731fc4
SW
948/**
949 * gfs2_journaled_truncate - Wrapper for truncate_pagecache for jdata files
950 * @inode: The inode being truncated
951 * @oldsize: The original (larger) size
952 * @newsize: The new smaller size
953 *
954 * With jdata files, we have to journal a revoke for each block which is
955 * truncated. As a result, we need to split this into separate transactions
956 * if the number of pages being truncated gets too large.
957 */
958
fa731fc4
SW
959static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize)
960{
961 struct gfs2_sbd *sdp = GFS2_SB(inode);
962 u64 max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
963 u64 chunk;
964 int error;
965
966 while (oldsize != newsize) {
e7fdf004
AG
967 struct gfs2_trans *tr;
968 unsigned int offs;
969
fa731fc4
SW
970 chunk = oldsize - newsize;
971 if (chunk > max_chunk)
972 chunk = max_chunk;
e7fdf004
AG
973
974 offs = oldsize & ~PAGE_MASK;
975 if (offs && chunk > PAGE_SIZE)
976 chunk = offs + ((chunk - offs) & PAGE_MASK);
977
7caef267 978 truncate_pagecache(inode, oldsize - chunk);
fa731fc4 979 oldsize -= chunk;
e7fdf004
AG
980
981 tr = current->journal_info;
982 if (!test_bit(TR_TOUCHED, &tr->tr_flags))
983 continue;
984
fa731fc4
SW
985 gfs2_trans_end(sdp);
986 error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
987 if (error)
988 return error;
989 }
990
991 return 0;
992}
993
8b5860a3 994static int trunc_start(struct inode *inode, u64 newsize)
b3b94faa 995{
ff8f33c8
SW
996 struct gfs2_inode *ip = GFS2_I(inode);
997 struct gfs2_sbd *sdp = GFS2_SB(inode);
80990f40 998 struct buffer_head *dibh = NULL;
b3b94faa 999 int journaled = gfs2_is_jdata(ip);
8b5860a3 1000 u64 oldsize = inode->i_size;
b3b94faa
DT
1001 int error;
1002
fa731fc4
SW
1003 if (journaled)
1004 error = gfs2_trans_begin(sdp, RES_DINODE + RES_JDATA, GFS2_JTRUNC_REVOKES);
1005 else
1006 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
b3b94faa
DT
1007 if (error)
1008 return error;
1009
1010 error = gfs2_meta_inode_buffer(ip, &dibh);
1011 if (error)
1012 goto out;
1013
350a9b0a 1014 gfs2_trans_add_meta(ip->i_gl, dibh);
ff8f33c8 1015
b3b94faa 1016 if (gfs2_is_stuffed(ip)) {
ff8f33c8 1017 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize);
b3b94faa 1018 } else {
bdba0d5e
AG
1019 unsigned int blocksize = i_blocksize(inode);
1020 unsigned int offs = newsize & (blocksize - 1);
1021 if (offs) {
1022 error = gfs2_block_zero_range(inode, newsize,
1023 blocksize - offs);
ff8f33c8 1024 if (error)
80990f40 1025 goto out;
b3b94faa 1026 }
ff8f33c8 1027 ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG;
b3b94faa
DT
1028 }
1029
ff8f33c8 1030 i_size_write(inode, newsize);
078cd827 1031 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
ff8f33c8 1032 gfs2_dinode_out(ip, dibh->b_data);
b3b94faa 1033
fa731fc4
SW
1034 if (journaled)
1035 error = gfs2_journaled_truncate(inode, oldsize, newsize);
1036 else
7caef267 1037 truncate_pagecache(inode, newsize);
fa731fc4 1038
a91ea69f 1039out:
80990f40
AG
1040 brelse(dibh);
1041 if (current->journal_info)
1042 gfs2_trans_end(sdp);
b3b94faa
DT
1043 return error;
1044}
1045
d552a2b9
BP
1046/**
1047 * sweep_bh_for_rgrps - find an rgrp in a meta buffer and free blocks therein
1048 * @ip: inode
1049 * @rg_gh: holder of resource group glock
5cf26b1e
AG
1050 * @bh: buffer head to sweep
1051 * @start: starting point in bh
1052 * @end: end point in bh
1053 * @meta: true if bh points to metadata (rather than data)
d552a2b9 1054 * @btotal: place to keep count of total blocks freed
d552a2b9
BP
1055 *
1056 * We sweep a metadata buffer (provided by the metapath) for blocks we need to
1057 * free, and free them all. However, we do it one rgrp at a time. If this
1058 * block has references to multiple rgrps, we break it into individual
1059 * transactions. This allows other processes to use the rgrps while we're
1060 * focused on a single one, for better concurrency / performance.
1061 * At every transaction boundary, we rewrite the inode into the journal.
1062 * That way the bitmaps are kept consistent with the inode and we can recover
1063 * if we're interrupted by power-outages.
1064 *
1065 * Returns: 0, or return code if an error occurred.
1066 * *btotal has the total number of blocks freed
1067 */
1068static int sweep_bh_for_rgrps(struct gfs2_inode *ip, struct gfs2_holder *rd_gh,
5cf26b1e
AG
1069 struct buffer_head *bh, __be64 *start, __be64 *end,
1070 bool meta, u32 *btotal)
b3b94faa 1071{
9b8c81d1 1072 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
d552a2b9
BP
1073 struct gfs2_rgrpd *rgd;
1074 struct gfs2_trans *tr;
5cf26b1e 1075 __be64 *p;
d552a2b9
BP
1076 int blks_outside_rgrp;
1077 u64 bn, bstart, isize_blks;
1078 s64 blen; /* needs to be s64 or gfs2_add_inode_blocks breaks */
d552a2b9
BP
1079 int ret = 0;
1080 bool buf_in_tr = false; /* buffer was added to transaction */
1081
d552a2b9 1082more_rgrps:
5cf26b1e
AG
1083 rgd = NULL;
1084 if (gfs2_holder_initialized(rd_gh)) {
1085 rgd = gfs2_glock2rgrp(rd_gh->gh_gl);
1086 gfs2_assert_withdraw(sdp,
1087 gfs2_glock_is_locked_by_me(rd_gh->gh_gl));
1088 }
d552a2b9
BP
1089 blks_outside_rgrp = 0;
1090 bstart = 0;
1091 blen = 0;
d552a2b9 1092
5cf26b1e 1093 for (p = start; p < end; p++) {
d552a2b9
BP
1094 if (!*p)
1095 continue;
1096 bn = be64_to_cpu(*p);
5cf26b1e
AG
1097
1098 if (rgd) {
1099 if (!rgrp_contains_block(rgd, bn)) {
1100 blks_outside_rgrp++;
1101 continue;
1102 }
d552a2b9 1103 } else {
90bcab99 1104 rgd = gfs2_blk2rgrpd(sdp, bn, true);
5cf26b1e
AG
1105 if (unlikely(!rgd)) {
1106 ret = -EIO;
1107 goto out;
1108 }
d552a2b9
BP
1109 ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
1110 0, rd_gh);
1111 if (ret)
1112 goto out;
1113
1114 /* Must be done with the rgrp glock held: */
1115 if (gfs2_rs_active(&ip->i_res) &&
1116 rgd == ip->i_res.rs_rbm.rgd)
1117 gfs2_rs_deltree(&ip->i_res);
1118 }
1119
d552a2b9
BP
1120 /* The size of our transactions will be unknown until we
1121 actually process all the metadata blocks that relate to
1122 the rgrp. So we estimate. We know it can't be more than
1123 the dinode's i_blocks and we don't want to exceed the
1124 journal flush threshold, sd_log_thresh2. */
1125 if (current->journal_info == NULL) {
1126 unsigned int jblocks_rqsted, revokes;
1127
1128 jblocks_rqsted = rgd->rd_length + RES_DINODE +
1129 RES_INDIRECT;
1130 isize_blks = gfs2_get_inode_blocks(&ip->i_inode);
1131 if (isize_blks > atomic_read(&sdp->sd_log_thresh2))
1132 jblocks_rqsted +=
1133 atomic_read(&sdp->sd_log_thresh2);
1134 else
1135 jblocks_rqsted += isize_blks;
1136 revokes = jblocks_rqsted;
1137 if (meta)
5cf26b1e 1138 revokes += end - start;
d552a2b9
BP
1139 else if (ip->i_depth)
1140 revokes += sdp->sd_inptrs;
1141 ret = gfs2_trans_begin(sdp, jblocks_rqsted, revokes);
1142 if (ret)
1143 goto out_unlock;
1144 down_write(&ip->i_rw_mutex);
1145 }
1146 /* check if we will exceed the transaction blocks requested */
1147 tr = current->journal_info;
1148 if (tr->tr_num_buf_new + RES_STATFS +
1149 RES_QUOTA >= atomic_read(&sdp->sd_log_thresh2)) {
1150 /* We set blks_outside_rgrp to ensure the loop will
1151 be repeated for the same rgrp, but with a new
1152 transaction. */
1153 blks_outside_rgrp++;
1154 /* This next part is tricky. If the buffer was added
1155 to the transaction, we've already set some block
1156 pointers to 0, so we better follow through and free
1157 them, or we will introduce corruption (so break).
1158 This may be impossible, or at least rare, but I
1159 decided to cover the case regardless.
1160
1161 If the buffer was not added to the transaction
1162 (this call), doing so would exceed our transaction
1163 size, so we need to end the transaction and start a
1164 new one (so goto). */
1165
1166 if (buf_in_tr)
1167 break;
1168 goto out_unlock;
1169 }
1170
1171 gfs2_trans_add_meta(ip->i_gl, bh);
1172 buf_in_tr = true;
1173 *p = 0;
1174 if (bstart + blen == bn) {
1175 blen++;
1176 continue;
1177 }
1178 if (bstart) {
1179 __gfs2_free_blocks(ip, bstart, (u32)blen, meta);
1180 (*btotal) += blen;
1181 gfs2_add_inode_blocks(&ip->i_inode, -blen);
1182 }
1183 bstart = bn;
1184 blen = 1;
1185 }
1186 if (bstart) {
1187 __gfs2_free_blocks(ip, bstart, (u32)blen, meta);
1188 (*btotal) += blen;
1189 gfs2_add_inode_blocks(&ip->i_inode, -blen);
1190 }
1191out_unlock:
1192 if (!ret && blks_outside_rgrp) { /* If buffer still has non-zero blocks
1193 outside the rgrp we just processed,
1194 do it all over again. */
1195 if (current->journal_info) {
5cf26b1e
AG
1196 struct buffer_head *dibh;
1197
1198 ret = gfs2_meta_inode_buffer(ip, &dibh);
1199 if (ret)
1200 goto out;
d552a2b9
BP
1201
1202 /* Every transaction boundary, we rewrite the dinode
1203 to keep its di_blocks current in case of failure. */
1204 ip->i_inode.i_mtime = ip->i_inode.i_ctime =
b32c8c76 1205 current_time(&ip->i_inode);
d552a2b9
BP
1206 gfs2_trans_add_meta(ip->i_gl, dibh);
1207 gfs2_dinode_out(ip, dibh->b_data);
5cf26b1e 1208 brelse(dibh);
d552a2b9
BP
1209 up_write(&ip->i_rw_mutex);
1210 gfs2_trans_end(sdp);
1211 }
1212 gfs2_glock_dq_uninit(rd_gh);
1213 cond_resched();
1214 goto more_rgrps;
1215 }
1216out:
1217 return ret;
1218}
1219
10d2cf94
AG
1220static bool mp_eq_to_hgt(struct metapath *mp, __u16 *list, unsigned int h)
1221{
1222 if (memcmp(mp->mp_list, list, h * sizeof(mp->mp_list[0])))
1223 return false;
1224 return true;
1225}
1226
d552a2b9
BP
1227/**
1228 * find_nonnull_ptr - find a non-null pointer given a metapath and height
d552a2b9
BP
1229 * @mp: starting metapath
1230 * @h: desired height to search
1231 *
10d2cf94 1232 * Assumes the metapath is valid (with buffers) out to height h.
d552a2b9
BP
1233 * Returns: true if a non-null pointer was found in the metapath buffer
1234 * false if all remaining pointers are NULL in the buffer
1235 */
1236static bool find_nonnull_ptr(struct gfs2_sbd *sdp, struct metapath *mp,
10d2cf94
AG
1237 unsigned int h,
1238 __u16 *end_list, unsigned int end_aligned)
d552a2b9 1239{
10d2cf94
AG
1240 struct buffer_head *bh = mp->mp_bh[h];
1241 __be64 *first, *ptr, *end;
1242
1243 first = metaptr1(h, mp);
1244 ptr = first + mp->mp_list[h];
1245 end = (__be64 *)(bh->b_data + bh->b_size);
1246 if (end_list && mp_eq_to_hgt(mp, end_list, h)) {
1247 bool keep_end = h < end_aligned;
1248 end = first + end_list[h] + keep_end;
1249 }
d552a2b9 1250
10d2cf94 1251 while (ptr < end) {
c4a9d189 1252 if (*ptr) { /* if we have a non-null pointer */
10d2cf94 1253 mp->mp_list[h] = ptr - first;
c4a9d189
BP
1254 h++;
1255 if (h < GFS2_MAX_META_HEIGHT)
10d2cf94 1256 mp->mp_list[h] = 0;
d552a2b9 1257 return true;
c4a9d189 1258 }
10d2cf94 1259 ptr++;
d552a2b9 1260 }
10d2cf94 1261 return false;
d552a2b9
BP
1262}
1263
1264enum dealloc_states {
1265 DEALLOC_MP_FULL = 0, /* Strip a metapath with all buffers read in */
1266 DEALLOC_MP_LOWER = 1, /* lower the metapath strip height */
1267 DEALLOC_FILL_MP = 2, /* Fill in the metapath to the given height. */
1268 DEALLOC_DONE = 3, /* process complete */
1269};
b3b94faa 1270
5cf26b1e
AG
1271static inline void
1272metapointer_range(struct metapath *mp, int height,
1273 __u16 *start_list, unsigned int start_aligned,
10d2cf94 1274 __u16 *end_list, unsigned int end_aligned,
5cf26b1e
AG
1275 __be64 **start, __be64 **end)
1276{
1277 struct buffer_head *bh = mp->mp_bh[height];
1278 __be64 *first;
1279
1280 first = metaptr1(height, mp);
1281 *start = first;
1282 if (mp_eq_to_hgt(mp, start_list, height)) {
1283 bool keep_start = height < start_aligned;
1284 *start = first + start_list[height] + keep_start;
1285 }
1286 *end = (__be64 *)(bh->b_data + bh->b_size);
10d2cf94
AG
1287 if (end_list && mp_eq_to_hgt(mp, end_list, height)) {
1288 bool keep_end = height < end_aligned;
1289 *end = first + end_list[height] + keep_end;
1290 }
1291}
1292
1293static inline bool walk_done(struct gfs2_sbd *sdp,
1294 struct metapath *mp, int height,
1295 __u16 *end_list, unsigned int end_aligned)
1296{
1297 __u16 end;
1298
1299 if (end_list) {
1300 bool keep_end = height < end_aligned;
1301 if (!mp_eq_to_hgt(mp, end_list, height))
1302 return false;
1303 end = end_list[height] + keep_end;
1304 } else
1305 end = (height > 0) ? sdp->sd_inptrs : sdp->sd_diptrs;
1306 return mp->mp_list[height] >= end;
5cf26b1e
AG
1307}
1308
d552a2b9 1309/**
10d2cf94 1310 * punch_hole - deallocate blocks in a file
d552a2b9 1311 * @ip: inode to truncate
10d2cf94
AG
1312 * @offset: the start of the hole
1313 * @length: the size of the hole (or 0 for truncate)
1314 *
1315 * Punch a hole into a file or truncate a file at a given position. This
1316 * function operates in whole blocks (@offset and @length are rounded
1317 * accordingly); partially filled blocks must be cleared otherwise.
d552a2b9 1318 *
10d2cf94
AG
1319 * This function works from the bottom up, and from the right to the left. In
1320 * other words, it strips off the highest layer (data) before stripping any of
1321 * the metadata. Doing it this way is best in case the operation is interrupted
1322 * by power failure, etc. The dinode is rewritten in every transaction to
1323 * guarantee integrity.
d552a2b9 1324 */
10d2cf94 1325static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length)
d552a2b9
BP
1326{
1327 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
bb491ce6 1328 u64 maxsize = sdp->sd_heightsize[ip->i_height];
10d2cf94 1329 struct metapath mp = {};
d552a2b9
BP
1330 struct buffer_head *dibh, *bh;
1331 struct gfs2_holder rd_gh;
cb7f0903 1332 unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift;
10d2cf94
AG
1333 u64 lblock = (offset + (1 << bsize_shift) - 1) >> bsize_shift;
1334 __u16 start_list[GFS2_MAX_META_HEIGHT];
1335 __u16 __end_list[GFS2_MAX_META_HEIGHT], *end_list = NULL;
4e56a641 1336 unsigned int start_aligned, uninitialized_var(end_aligned);
d552a2b9
BP
1337 unsigned int strip_h = ip->i_height - 1;
1338 u32 btotal = 0;
1339 int ret, state;
1340 int mp_h; /* metapath buffers are read in to this height */
d552a2b9 1341 u64 prev_bnr = 0;
5cf26b1e 1342 __be64 *start, *end;
b3b94faa 1343
bb491ce6
AG
1344 if (offset >= maxsize) {
1345 /*
1346 * The starting point lies beyond the allocated meta-data;
1347 * there are no blocks do deallocate.
1348 */
1349 return 0;
1350 }
1351
10d2cf94
AG
1352 /*
1353 * The start position of the hole is defined by lblock, start_list, and
1354 * start_aligned. The end position of the hole is defined by lend,
1355 * end_list, and end_aligned.
1356 *
1357 * start_aligned and end_aligned define down to which height the start
1358 * and end positions are aligned to the metadata tree (i.e., the
1359 * position is a multiple of the metadata granularity at the height
1360 * above). This determines at which heights additional meta pointers
1361 * needs to be preserved for the remaining data.
1362 */
b3b94faa 1363
10d2cf94 1364 if (length) {
10d2cf94
AG
1365 u64 end_offset = offset + length;
1366 u64 lend;
1367
1368 /*
1369 * Clip the end at the maximum file size for the given height:
1370 * that's how far the metadata goes; files bigger than that
1371 * will have additional layers of indirection.
1372 */
1373 if (end_offset > maxsize)
1374 end_offset = maxsize;
1375 lend = end_offset >> bsize_shift;
1376
1377 if (lblock >= lend)
1378 return 0;
1379
1380 find_metapath(sdp, lend, &mp, ip->i_height);
1381 end_list = __end_list;
1382 memcpy(end_list, mp.mp_list, sizeof(mp.mp_list));
1383
1384 for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) {
1385 if (end_list[mp_h])
1386 break;
1387 }
1388 end_aligned = mp_h;
1389 }
1390
1391 find_metapath(sdp, lblock, &mp, ip->i_height);
cb7f0903
AG
1392 memcpy(start_list, mp.mp_list, sizeof(start_list));
1393
cb7f0903
AG
1394 for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) {
1395 if (start_list[mp_h])
1396 break;
1397 }
1398 start_aligned = mp_h;
d552a2b9
BP
1399
1400 ret = gfs2_meta_inode_buffer(ip, &dibh);
1401 if (ret)
1402 return ret;
b3b94faa 1403
d552a2b9
BP
1404 mp.mp_bh[0] = dibh;
1405 ret = lookup_metapath(ip, &mp);
e8b43fe0
AG
1406 if (ret)
1407 goto out_metapath;
c3ce5aa9
AG
1408
1409 /* issue read-ahead on metadata */
5cf26b1e
AG
1410 for (mp_h = 0; mp_h < mp.mp_aheight - 1; mp_h++) {
1411 metapointer_range(&mp, mp_h, start_list, start_aligned,
10d2cf94 1412 end_list, end_aligned, &start, &end);
5cf26b1e
AG
1413 gfs2_metapath_ra(ip->i_gl, start, end);
1414 }
c3ce5aa9 1415
e8b43fe0 1416 if (mp.mp_aheight == ip->i_height)
d552a2b9
BP
1417 state = DEALLOC_MP_FULL; /* We have a complete metapath */
1418 else
1419 state = DEALLOC_FILL_MP; /* deal with partial metapath */
b3b94faa 1420
d552a2b9
BP
1421 ret = gfs2_rindex_update(sdp);
1422 if (ret)
1423 goto out_metapath;
1424
1425 ret = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
1426 if (ret)
1427 goto out_metapath;
1428 gfs2_holder_mark_uninitialized(&rd_gh);
1429
1430 mp_h = strip_h;
1431
1432 while (state != DEALLOC_DONE) {
1433 switch (state) {
1434 /* Truncate a full metapath at the given strip height.
1435 * Note that strip_h == mp_h in order to be in this state. */
1436 case DEALLOC_MP_FULL:
d552a2b9
BP
1437 bh = mp.mp_bh[mp_h];
1438 gfs2_assert_withdraw(sdp, bh);
1439 if (gfs2_assert_withdraw(sdp,
1440 prev_bnr != bh->b_blocknr)) {
1441 printk(KERN_EMERG "GFS2: fsid=%s:inode %llu, "
1442 "block:%llu, i_h:%u, s_h:%u, mp_h:%u\n",
1443 sdp->sd_fsname,
1444 (unsigned long long)ip->i_no_addr,
1445 prev_bnr, ip->i_height, strip_h, mp_h);
1446 }
1447 prev_bnr = bh->b_blocknr;
cb7f0903 1448
5cf26b1e
AG
1449 if (gfs2_metatype_check(sdp, bh,
1450 (mp_h ? GFS2_METATYPE_IN :
1451 GFS2_METATYPE_DI))) {
1452 ret = -EIO;
1453 goto out;
1454 }
1455
10d2cf94
AG
1456 /*
1457 * Below, passing end_aligned as 0 gives us the
1458 * metapointer range excluding the end point: the end
1459 * point is the first metapath we must not deallocate!
1460 */
1461
5cf26b1e 1462 metapointer_range(&mp, mp_h, start_list, start_aligned,
10d2cf94 1463 end_list, 0 /* end_aligned */,
5cf26b1e
AG
1464 &start, &end);
1465 ret = sweep_bh_for_rgrps(ip, &rd_gh, mp.mp_bh[mp_h],
1466 start, end,
1467 mp_h != ip->i_height - 1,
1468 &btotal);
cb7f0903 1469
d552a2b9
BP
1470 /* If we hit an error or just swept dinode buffer,
1471 just exit. */
1472 if (ret || !mp_h) {
1473 state = DEALLOC_DONE;
1474 break;
1475 }
1476 state = DEALLOC_MP_LOWER;
1477 break;
1478
1479 /* lower the metapath strip height */
1480 case DEALLOC_MP_LOWER:
1481 /* We're done with the current buffer, so release it,
1482 unless it's the dinode buffer. Then back up to the
1483 previous pointer. */
1484 if (mp_h) {
1485 brelse(mp.mp_bh[mp_h]);
1486 mp.mp_bh[mp_h] = NULL;
1487 }
1488 /* If we can't get any lower in height, we've stripped
1489 off all we can. Next step is to back up and start
1490 stripping the previous level of metadata. */
1491 if (mp_h == 0) {
1492 strip_h--;
cb7f0903 1493 memcpy(mp.mp_list, start_list, sizeof(start_list));
d552a2b9
BP
1494 mp_h = strip_h;
1495 state = DEALLOC_FILL_MP;
1496 break;
1497 }
1498 mp.mp_list[mp_h] = 0;
1499 mp_h--; /* search one metadata height down */
d552a2b9 1500 mp.mp_list[mp_h]++;
10d2cf94
AG
1501 if (walk_done(sdp, &mp, mp_h, end_list, end_aligned))
1502 break;
d552a2b9
BP
1503 /* Here we've found a part of the metapath that is not
1504 * allocated. We need to search at that height for the
1505 * next non-null pointer. */
10d2cf94 1506 if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned)) {
d552a2b9
BP
1507 state = DEALLOC_FILL_MP;
1508 mp_h++;
1509 }
1510 /* No more non-null pointers at this height. Back up
1511 to the previous height and try again. */
1512 break; /* loop around in the same state */
1513
1514 /* Fill the metapath with buffers to the given height. */
1515 case DEALLOC_FILL_MP:
1516 /* Fill the buffers out to the current height. */
1517 ret = fillup_metapath(ip, &mp, mp_h);
c3ce5aa9 1518 if (ret < 0)
d552a2b9 1519 goto out;
c3ce5aa9
AG
1520
1521 /* issue read-ahead on metadata */
1522 if (mp.mp_aheight > 1) {
5cf26b1e
AG
1523 for (; ret > 1; ret--) {
1524 metapointer_range(&mp, mp.mp_aheight - ret,
1525 start_list, start_aligned,
10d2cf94 1526 end_list, end_aligned,
5cf26b1e
AG
1527 &start, &end);
1528 gfs2_metapath_ra(ip->i_gl, start, end);
1529 }
c3ce5aa9 1530 }
d552a2b9
BP
1531
1532 /* If buffers found for the entire strip height */
e8b43fe0 1533 if (mp.mp_aheight - 1 == strip_h) {
d552a2b9
BP
1534 state = DEALLOC_MP_FULL;
1535 break;
1536 }
e8b43fe0
AG
1537 if (mp.mp_aheight < ip->i_height) /* We have a partial height */
1538 mp_h = mp.mp_aheight - 1;
d552a2b9
BP
1539
1540 /* If we find a non-null block pointer, crawl a bit
1541 higher up in the metapath and try again, otherwise
1542 we need to look lower for a new starting point. */
10d2cf94 1543 if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned))
d552a2b9
BP
1544 mp_h++;
1545 else
1546 state = DEALLOC_MP_LOWER;
b3b94faa 1547 break;
d552a2b9 1548 }
b3b94faa
DT
1549 }
1550
d552a2b9
BP
1551 if (btotal) {
1552 if (current->journal_info == NULL) {
1553 ret = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS +
1554 RES_QUOTA, 0);
1555 if (ret)
1556 goto out;
1557 down_write(&ip->i_rw_mutex);
1558 }
1559 gfs2_statfs_change(sdp, 0, +btotal, 0);
1560 gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid,
1561 ip->i_inode.i_gid);
b32c8c76 1562 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
d552a2b9
BP
1563 gfs2_trans_add_meta(ip->i_gl, dibh);
1564 gfs2_dinode_out(ip, dibh->b_data);
1565 up_write(&ip->i_rw_mutex);
1566 gfs2_trans_end(sdp);
1567 }
b3b94faa 1568
d552a2b9
BP
1569out:
1570 if (gfs2_holder_initialized(&rd_gh))
1571 gfs2_glock_dq_uninit(&rd_gh);
1572 if (current->journal_info) {
1573 up_write(&ip->i_rw_mutex);
1574 gfs2_trans_end(sdp);
1575 cond_resched();
1576 }
1577 gfs2_quota_unhold(ip);
1578out_metapath:
1579 release_metapath(&mp);
1580 return ret;
b3b94faa
DT
1581}
1582
1583static int trunc_end(struct gfs2_inode *ip)
1584{
feaa7bba 1585 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
b3b94faa
DT
1586 struct buffer_head *dibh;
1587 int error;
1588
1589 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
1590 if (error)
1591 return error;
1592
1593 down_write(&ip->i_rw_mutex);
1594
1595 error = gfs2_meta_inode_buffer(ip, &dibh);
1596 if (error)
1597 goto out;
1598
a2e0f799 1599 if (!i_size_read(&ip->i_inode)) {
ecc30c79 1600 ip->i_height = 0;
ce276b06 1601 ip->i_goal = ip->i_no_addr;
b3b94faa 1602 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
45138990 1603 gfs2_ordered_del_inode(ip);
b3b94faa 1604 }
078cd827 1605 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
383f01fb 1606 ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG;
b3b94faa 1607
350a9b0a 1608 gfs2_trans_add_meta(ip->i_gl, dibh);
539e5d6b 1609 gfs2_dinode_out(ip, dibh->b_data);
b3b94faa
DT
1610 brelse(dibh);
1611
a91ea69f 1612out:
b3b94faa 1613 up_write(&ip->i_rw_mutex);
b3b94faa 1614 gfs2_trans_end(sdp);
b3b94faa
DT
1615 return error;
1616}
1617
1618/**
1619 * do_shrink - make a file smaller
ff8f33c8 1620 * @inode: the inode
ff8f33c8 1621 * @newsize: the size to make the file
b3b94faa 1622 *
ff8f33c8
SW
1623 * Called with an exclusive lock on @inode. The @size must
1624 * be equal to or smaller than the current inode size.
b3b94faa
DT
1625 *
1626 * Returns: errno
1627 */
1628
8b5860a3 1629static int do_shrink(struct inode *inode, u64 newsize)
b3b94faa 1630{
ff8f33c8 1631 struct gfs2_inode *ip = GFS2_I(inode);
b3b94faa
DT
1632 int error;
1633
8b5860a3 1634 error = trunc_start(inode, newsize);
b3b94faa
DT
1635 if (error < 0)
1636 return error;
ff8f33c8 1637 if (gfs2_is_stuffed(ip))
b3b94faa
DT
1638 return 0;
1639
10d2cf94 1640 error = punch_hole(ip, newsize, 0);
ff8f33c8 1641 if (error == 0)
b3b94faa
DT
1642 error = trunc_end(ip);
1643
1644 return error;
1645}
1646
ff8f33c8 1647void gfs2_trim_blocks(struct inode *inode)
a13b8c5f 1648{
ff8f33c8
SW
1649 int ret;
1650
8b5860a3 1651 ret = do_shrink(inode, inode->i_size);
ff8f33c8
SW
1652 WARN_ON(ret != 0);
1653}
1654
1655/**
1656 * do_grow - Touch and update inode size
1657 * @inode: The inode
1658 * @size: The new size
1659 *
1660 * This function updates the timestamps on the inode and
1661 * may also increase the size of the inode. This function
1662 * must not be called with @size any smaller than the current
1663 * inode size.
1664 *
1665 * Although it is not strictly required to unstuff files here,
1666 * earlier versions of GFS2 have a bug in the stuffed file reading
1667 * code which will result in a buffer overrun if the size is larger
1668 * than the max stuffed file size. In order to prevent this from
25985edc 1669 * occurring, such files are unstuffed, but in other cases we can
ff8f33c8
SW
1670 * just update the inode size directly.
1671 *
1672 * Returns: 0 on success, or -ve on error
1673 */
1674
1675static int do_grow(struct inode *inode, u64 size)
1676{
1677 struct gfs2_inode *ip = GFS2_I(inode);
1678 struct gfs2_sbd *sdp = GFS2_SB(inode);
7b9cff46 1679 struct gfs2_alloc_parms ap = { .target = 1, };
a13b8c5f
WC
1680 struct buffer_head *dibh;
1681 int error;
2f7ee358 1682 int unstuff = 0;
a13b8c5f 1683
235628c5 1684 if (gfs2_is_stuffed(ip) && size > gfs2_max_stuffed_size(ip)) {
b8fbf471 1685 error = gfs2_quota_lock_check(ip, &ap);
ff8f33c8 1686 if (error)
5407e242 1687 return error;
ff8f33c8 1688
7b9cff46 1689 error = gfs2_inplace_reserve(ip, &ap);
ff8f33c8
SW
1690 if (error)
1691 goto do_grow_qunlock;
2f7ee358 1692 unstuff = 1;
ff8f33c8
SW
1693 }
1694
a01aedfe
BP
1695 error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT +
1696 (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF ?
1697 0 : RES_QUOTA), 0);
a13b8c5f 1698 if (error)
ff8f33c8 1699 goto do_grow_release;
a13b8c5f 1700
2f7ee358 1701 if (unstuff) {
ff8f33c8
SW
1702 error = gfs2_unstuff_dinode(ip, NULL);
1703 if (error)
1704 goto do_end_trans;
1705 }
a13b8c5f
WC
1706
1707 error = gfs2_meta_inode_buffer(ip, &dibh);
1708 if (error)
ff8f33c8 1709 goto do_end_trans;
a13b8c5f 1710
ff8f33c8 1711 i_size_write(inode, size);
078cd827 1712 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
350a9b0a 1713 gfs2_trans_add_meta(ip->i_gl, dibh);
a13b8c5f
WC
1714 gfs2_dinode_out(ip, dibh->b_data);
1715 brelse(dibh);
1716
ff8f33c8 1717do_end_trans:
a13b8c5f 1718 gfs2_trans_end(sdp);
ff8f33c8 1719do_grow_release:
2f7ee358 1720 if (unstuff) {
ff8f33c8
SW
1721 gfs2_inplace_release(ip);
1722do_grow_qunlock:
1723 gfs2_quota_unlock(ip);
ff8f33c8 1724 }
a13b8c5f
WC
1725 return error;
1726}
1727
b3b94faa 1728/**
ff8f33c8
SW
1729 * gfs2_setattr_size - make a file a given size
1730 * @inode: the inode
1731 * @newsize: the size to make the file
b3b94faa 1732 *
ff8f33c8 1733 * The file size can grow, shrink, or stay the same size. This
3e7aafc3 1734 * is called holding i_rwsem and an exclusive glock on the inode
ff8f33c8 1735 * in question.
b3b94faa
DT
1736 *
1737 * Returns: errno
1738 */
1739
ff8f33c8 1740int gfs2_setattr_size(struct inode *inode, u64 newsize)
b3b94faa 1741{
af5c2697 1742 struct gfs2_inode *ip = GFS2_I(inode);
ff8f33c8 1743 int ret;
b3b94faa 1744
ff8f33c8 1745 BUG_ON(!S_ISREG(inode->i_mode));
b3b94faa 1746
ff8f33c8
SW
1747 ret = inode_newsize_ok(inode, newsize);
1748 if (ret)
1749 return ret;
b3b94faa 1750
562c72aa
CH
1751 inode_dio_wait(inode);
1752
b54e9a0b 1753 ret = gfs2_rsqa_alloc(ip);
d2b47cfb 1754 if (ret)
2b3dcf35 1755 goto out;
d2b47cfb 1756
8b5860a3 1757 if (newsize >= inode->i_size) {
2b3dcf35
BP
1758 ret = do_grow(inode, newsize);
1759 goto out;
1760 }
ff8f33c8 1761
8b5860a3 1762 ret = do_shrink(inode, newsize);
2b3dcf35 1763out:
a097dc7e 1764 gfs2_rsqa_delete(ip, NULL);
2b3dcf35 1765 return ret;
b3b94faa
DT
1766}
1767
1768int gfs2_truncatei_resume(struct gfs2_inode *ip)
1769{
1770 int error;
10d2cf94 1771 error = punch_hole(ip, i_size_read(&ip->i_inode), 0);
b3b94faa
DT
1772 if (!error)
1773 error = trunc_end(ip);
1774 return error;
1775}
1776
1777int gfs2_file_dealloc(struct gfs2_inode *ip)
1778{
10d2cf94 1779 return punch_hole(ip, 0, 0);
b3b94faa
DT
1780}
1781
b50f227b
SW
1782/**
1783 * gfs2_free_journal_extents - Free cached journal bmap info
1784 * @jd: The journal
1785 *
1786 */
1787
1788void gfs2_free_journal_extents(struct gfs2_jdesc *jd)
1789{
1790 struct gfs2_journal_extent *jext;
1791
1792 while(!list_empty(&jd->extent_list)) {
1793 jext = list_entry(jd->extent_list.next, struct gfs2_journal_extent, list);
1794 list_del(&jext->list);
1795 kfree(jext);
1796 }
1797}
1798
1799/**
1800 * gfs2_add_jextent - Add or merge a new extent to extent cache
1801 * @jd: The journal descriptor
1802 * @lblock: The logical block at start of new extent
c62baf65 1803 * @dblock: The physical block at start of new extent
b50f227b
SW
1804 * @blocks: Size of extent in fs blocks
1805 *
1806 * Returns: 0 on success or -ENOMEM
1807 */
1808
1809static int gfs2_add_jextent(struct gfs2_jdesc *jd, u64 lblock, u64 dblock, u64 blocks)
1810{
1811 struct gfs2_journal_extent *jext;
1812
1813 if (!list_empty(&jd->extent_list)) {
1814 jext = list_entry(jd->extent_list.prev, struct gfs2_journal_extent, list);
1815 if ((jext->dblock + jext->blocks) == dblock) {
1816 jext->blocks += blocks;
1817 return 0;
1818 }
1819 }
1820
1821 jext = kzalloc(sizeof(struct gfs2_journal_extent), GFP_NOFS);
1822 if (jext == NULL)
1823 return -ENOMEM;
1824 jext->dblock = dblock;
1825 jext->lblock = lblock;
1826 jext->blocks = blocks;
1827 list_add_tail(&jext->list, &jd->extent_list);
1828 jd->nr_extents++;
1829 return 0;
1830}
1831
1832/**
1833 * gfs2_map_journal_extents - Cache journal bmap info
1834 * @sdp: The super block
1835 * @jd: The journal to map
1836 *
1837 * Create a reusable "extent" mapping from all logical
1838 * blocks to all physical blocks for the given journal. This will save
1839 * us time when writing journal blocks. Most journals will have only one
1840 * extent that maps all their logical blocks. That's because gfs2.mkfs
1841 * arranges the journal blocks sequentially to maximize performance.
1842 * So the extent would map the first block for the entire file length.
1843 * However, gfs2_jadd can happen while file activity is happening, so
1844 * those journals may not be sequential. Less likely is the case where
1845 * the users created their own journals by mounting the metafs and
1846 * laying it out. But it's still possible. These journals might have
1847 * several extents.
1848 *
1849 * Returns: 0 on success, or error on failure
1850 */
1851
1852int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd)
1853{
1854 u64 lblock = 0;
1855 u64 lblock_stop;
1856 struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
1857 struct buffer_head bh;
1858 unsigned int shift = sdp->sd_sb.sb_bsize_shift;
1859 u64 size;
1860 int rc;
1861
1862 lblock_stop = i_size_read(jd->jd_inode) >> shift;
1863 size = (lblock_stop - lblock) << shift;
1864 jd->nr_extents = 0;
1865 WARN_ON(!list_empty(&jd->extent_list));
1866
1867 do {
1868 bh.b_state = 0;
1869 bh.b_blocknr = 0;
1870 bh.b_size = size;
1871 rc = gfs2_block_map(jd->jd_inode, lblock, &bh, 0);
1872 if (rc || !buffer_mapped(&bh))
1873 goto fail;
1874 rc = gfs2_add_jextent(jd, lblock, bh.b_blocknr, bh.b_size >> shift);
1875 if (rc)
1876 goto fail;
1877 size -= bh.b_size;
1878 lblock += (bh.b_size >> ip->i_inode.i_blkbits);
1879 } while(size > 0);
1880
1881 fs_info(sdp, "journal %d mapped with %u extents\n", jd->jd_jid,
1882 jd->nr_extents);
1883 return 0;
1884
1885fail:
1886 fs_warn(sdp, "error %d mapping journal %u at offset %llu (extent %u)\n",
1887 rc, jd->jd_jid,
1888 (unsigned long long)(i_size_read(jd->jd_inode) - size),
1889 jd->nr_extents);
1890 fs_warn(sdp, "bmap=%d lblock=%llu block=%llu, state=0x%08lx, size=%llu\n",
1891 rc, (unsigned long long)lblock, (unsigned long long)bh.b_blocknr,
1892 bh.b_state, (unsigned long long)bh.b_size);
1893 gfs2_free_journal_extents(jd);
1894 return rc;
1895}
1896
b3b94faa
DT
1897/**
1898 * gfs2_write_alloc_required - figure out if a write will require an allocation
1899 * @ip: the file being written to
1900 * @offset: the offset to write to
1901 * @len: the number of bytes being written
b3b94faa 1902 *
461cb419 1903 * Returns: 1 if an alloc is required, 0 otherwise
b3b94faa
DT
1904 */
1905
cd915493 1906int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
461cb419 1907 unsigned int len)
b3b94faa 1908{
feaa7bba 1909 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
941e6d7d
SW
1910 struct buffer_head bh;
1911 unsigned int shift;
1912 u64 lblock, lblock_stop, size;
7ed122e4 1913 u64 end_of_file;
b3b94faa 1914
b3b94faa
DT
1915 if (!len)
1916 return 0;
1917
1918 if (gfs2_is_stuffed(ip)) {
235628c5 1919 if (offset + len > gfs2_max_stuffed_size(ip))
461cb419 1920 return 1;
b3b94faa
DT
1921 return 0;
1922 }
1923
941e6d7d 1924 shift = sdp->sd_sb.sb_bsize_shift;
7ed122e4 1925 BUG_ON(gfs2_is_dir(ip));
a2e0f799 1926 end_of_file = (i_size_read(&ip->i_inode) + sdp->sd_sb.sb_bsize - 1) >> shift;
7ed122e4
SW
1927 lblock = offset >> shift;
1928 lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
1929 if (lblock_stop > end_of_file)
461cb419 1930 return 1;
b3b94faa 1931
941e6d7d
SW
1932 size = (lblock_stop - lblock) << shift;
1933 do {
1934 bh.b_state = 0;
1935 bh.b_size = size;
1936 gfs2_block_map(&ip->i_inode, lblock, &bh, 0);
1937 if (!buffer_mapped(&bh))
461cb419 1938 return 1;
941e6d7d
SW
1939 size -= bh.b_size;
1940 lblock += (bh.b_size >> ip->i_inode.i_blkbits);
1941 } while(size > 0);
b3b94faa
DT
1942
1943 return 0;
1944}
1945
4e56a641
AG
1946static int stuffed_zero_range(struct inode *inode, loff_t offset, loff_t length)
1947{
1948 struct gfs2_inode *ip = GFS2_I(inode);
1949 struct buffer_head *dibh;
1950 int error;
1951
1952 if (offset >= inode->i_size)
1953 return 0;
1954 if (offset + length > inode->i_size)
1955 length = inode->i_size - offset;
1956
1957 error = gfs2_meta_inode_buffer(ip, &dibh);
1958 if (error)
1959 return error;
1960 gfs2_trans_add_meta(ip->i_gl, dibh);
1961 memset(dibh->b_data + sizeof(struct gfs2_dinode) + offset, 0,
1962 length);
1963 brelse(dibh);
1964 return 0;
1965}
1966
1967static int gfs2_journaled_truncate_range(struct inode *inode, loff_t offset,
1968 loff_t length)
1969{
1970 struct gfs2_sbd *sdp = GFS2_SB(inode);
1971 loff_t max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
1972 int error;
1973
1974 while (length) {
1975 struct gfs2_trans *tr;
1976 loff_t chunk;
1977 unsigned int offs;
1978
1979 chunk = length;
1980 if (chunk > max_chunk)
1981 chunk = max_chunk;
1982
1983 offs = offset & ~PAGE_MASK;
1984 if (offs && chunk > PAGE_SIZE)
1985 chunk = offs + ((chunk - offs) & PAGE_MASK);
1986
1987 truncate_pagecache_range(inode, offset, chunk);
1988 offset += chunk;
1989 length -= chunk;
1990
1991 tr = current->journal_info;
1992 if (!test_bit(TR_TOUCHED, &tr->tr_flags))
1993 continue;
1994
1995 gfs2_trans_end(sdp);
1996 error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
1997 if (error)
1998 return error;
1999 }
2000 return 0;
2001}
2002
2003int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length)
2004{
2005 struct inode *inode = file_inode(file);
2006 struct gfs2_inode *ip = GFS2_I(inode);
2007 struct gfs2_sbd *sdp = GFS2_SB(inode);
2008 int error;
2009
2010 if (gfs2_is_jdata(ip))
2011 error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_JDATA,
2012 GFS2_JTRUNC_REVOKES);
2013 else
2014 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
2015 if (error)
2016 return error;
2017
2018 if (gfs2_is_stuffed(ip)) {
2019 error = stuffed_zero_range(inode, offset, length);
2020 if (error)
2021 goto out;
2022 } else {
2023 unsigned int start_off, end_off, blocksize;
2024
2025 blocksize = i_blocksize(inode);
2026 start_off = offset & (blocksize - 1);
2027 end_off = (offset + length) & (blocksize - 1);
2028 if (start_off) {
2029 unsigned int len = length;
2030 if (length > blocksize - start_off)
2031 len = blocksize - start_off;
2032 error = gfs2_block_zero_range(inode, offset, len);
2033 if (error)
2034 goto out;
2035 if (start_off + length < blocksize)
2036 end_off = 0;
2037 }
2038 if (end_off) {
2039 error = gfs2_block_zero_range(inode,
2040 offset + length - end_off, end_off);
2041 if (error)
2042 goto out;
2043 }
2044 }
2045
2046 if (gfs2_is_jdata(ip)) {
2047 BUG_ON(!current->journal_info);
2048 gfs2_journaled_truncate_range(inode, offset, length);
2049 } else
2050 truncate_pagecache_range(inode, offset, offset + length - 1);
2051
2052 file_update_time(file);
2053 mark_inode_dirty(inode);
2054
2055 if (current->journal_info)
2056 gfs2_trans_end(sdp);
2057
2058 if (!gfs2_is_stuffed(ip))
2059 error = punch_hole(ip, offset, length);
2060
2061out:
2062 if (current->journal_info)
2063 gfs2_trans_end(sdp);
2064 return error;
2065}