]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - fs/gfs2/bmap.c
iomap: fix the comment describing IOMAP_NOWAIT
[mirror_ubuntu-jammy-kernel.git] / fs / gfs2 / bmap.c
CommitLineData
b3b94faa
DT
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3a8a9a10 3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
b3b94faa
DT
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
e9fc2aa0 7 * of the GNU General Public License version 2.
b3b94faa
DT
8 */
9
b3b94faa
DT
10#include <linux/spinlock.h>
11#include <linux/completion.h>
12#include <linux/buffer_head.h>
64dd153c 13#include <linux/blkdev.h>
5c676f6d 14#include <linux/gfs2_ondisk.h>
71b86f56 15#include <linux/crc32.h>
3974320c 16#include <linux/iomap.h>
b3b94faa
DT
17
18#include "gfs2.h"
5c676f6d 19#include "incore.h"
b3b94faa
DT
20#include "bmap.h"
21#include "glock.h"
22#include "inode.h"
b3b94faa 23#include "meta_io.h"
b3b94faa
DT
24#include "quota.h"
25#include "rgrp.h"
45138990 26#include "log.h"
4c16c36a 27#include "super.h"
b3b94faa 28#include "trans.h"
18ec7d5c 29#include "dir.h"
5c676f6d 30#include "util.h"
63997775 31#include "trace_gfs2.h"
b3b94faa
DT
32
33/* This doesn't need to be that large as max 64 bit pointers in a 4k
34 * block is 512, so __u16 is fine for that. It saves stack space to
35 * keep it small.
36 */
37struct metapath {
dbac6710 38 struct buffer_head *mp_bh[GFS2_MAX_META_HEIGHT];
b3b94faa 39 __u16 mp_list[GFS2_MAX_META_HEIGHT];
5f8bd444
BP
40 int mp_fheight; /* find_metapath height */
41 int mp_aheight; /* actual height (lookup height) */
b3b94faa
DT
42};
43
f25ef0c1
SW
44/**
45 * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page
46 * @ip: the inode
47 * @dibh: the dinode buffer
48 * @block: the block number that was allocated
ff8f33c8 49 * @page: The (optional) page. This is looked up if @page is NULL
f25ef0c1
SW
50 *
51 * Returns: errno
52 */
53
54static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
cd915493 55 u64 block, struct page *page)
f25ef0c1 56{
f25ef0c1
SW
57 struct inode *inode = &ip->i_inode;
58 struct buffer_head *bh;
59 int release = 0;
60
61 if (!page || page->index) {
220cca2a 62 page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS);
f25ef0c1
SW
63 if (!page)
64 return -ENOMEM;
65 release = 1;
66 }
67
68 if (!PageUptodate(page)) {
69 void *kaddr = kmap(page);
602c89d2
SW
70 u64 dsize = i_size_read(inode);
71
235628c5
AG
72 if (dsize > gfs2_max_stuffed_size(ip))
73 dsize = gfs2_max_stuffed_size(ip);
f25ef0c1 74
602c89d2 75 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
09cbfeaf 76 memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
f25ef0c1
SW
77 kunmap(page);
78
79 SetPageUptodate(page);
80 }
81
82 if (!page_has_buffers(page))
47a9a527
FF
83 create_empty_buffers(page, BIT(inode->i_blkbits),
84 BIT(BH_Uptodate));
f25ef0c1
SW
85
86 bh = page_buffers(page);
87
88 if (!buffer_mapped(bh))
89 map_bh(bh, inode->i_sb, block);
90
91 set_buffer_uptodate(bh);
eaf96527
SW
92 if (!gfs2_is_jdata(ip))
93 mark_buffer_dirty(bh);
bf36a713 94 if (!gfs2_is_writeback(ip))
350a9b0a 95 gfs2_trans_add_data(ip->i_gl, bh);
f25ef0c1
SW
96
97 if (release) {
98 unlock_page(page);
09cbfeaf 99 put_page(page);
f25ef0c1
SW
100 }
101
102 return 0;
103}
104
b3b94faa
DT
105/**
106 * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big
107 * @ip: The GFS2 inode to unstuff
ff8f33c8 108 * @page: The (optional) page. This is looked up if the @page is NULL
b3b94faa
DT
109 *
110 * This routine unstuffs a dinode and returns it to a "normal" state such
111 * that the height can be grown in the traditional way.
112 *
113 * Returns: errno
114 */
115
f25ef0c1 116int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
b3b94faa
DT
117{
118 struct buffer_head *bh, *dibh;
48516ced 119 struct gfs2_dinode *di;
cd915493 120 u64 block = 0;
18ec7d5c 121 int isdir = gfs2_is_dir(ip);
b3b94faa
DT
122 int error;
123
124 down_write(&ip->i_rw_mutex);
125
126 error = gfs2_meta_inode_buffer(ip, &dibh);
127 if (error)
128 goto out;
907b9bce 129
a2e0f799 130 if (i_size_read(&ip->i_inode)) {
b3b94faa
DT
131 /* Get a free block, fill it with the stuffed data,
132 and write it out to disk */
133
b45e41d7 134 unsigned int n = 1;
6e87ed0f 135 error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL);
09010978
SW
136 if (error)
137 goto out_brelse;
18ec7d5c 138 if (isdir) {
5731be53 139 gfs2_trans_add_unrevoke(GFS2_SB(&ip->i_inode), block, 1);
61e085a8 140 error = gfs2_dir_get_new_buffer(ip, block, &bh);
b3b94faa
DT
141 if (error)
142 goto out_brelse;
48516ced 143 gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_meta_header),
b3b94faa
DT
144 dibh, sizeof(struct gfs2_dinode));
145 brelse(bh);
146 } else {
f25ef0c1 147 error = gfs2_unstuffer_page(ip, dibh, block, page);
b3b94faa
DT
148 if (error)
149 goto out_brelse;
150 }
151 }
152
153 /* Set up the pointer to the new block */
154
350a9b0a 155 gfs2_trans_add_meta(ip->i_gl, dibh);
48516ced 156 di = (struct gfs2_dinode *)dibh->b_data;
b3b94faa
DT
157 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
158
a2e0f799 159 if (i_size_read(&ip->i_inode)) {
48516ced 160 *(__be64 *)(di + 1) = cpu_to_be64(block);
77658aad
SW
161 gfs2_add_inode_blocks(&ip->i_inode, 1);
162 di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
b3b94faa
DT
163 }
164
ecc30c79 165 ip->i_height = 1;
48516ced 166 di->di_height = cpu_to_be16(1);
b3b94faa 167
a91ea69f 168out_brelse:
b3b94faa 169 brelse(dibh);
a91ea69f 170out:
b3b94faa 171 up_write(&ip->i_rw_mutex);
b3b94faa
DT
172 return error;
173}
174
b3b94faa
DT
175
176/**
177 * find_metapath - Find path through the metadata tree
9b8c81d1 178 * @sdp: The superblock
b3b94faa
DT
179 * @mp: The metapath to return the result in
180 * @block: The disk block to look up
9b8c81d1 181 * @height: The pre-calculated height of the metadata tree
b3b94faa
DT
182 *
183 * This routine returns a struct metapath structure that defines a path
184 * through the metadata of inode "ip" to get to block "block".
185 *
186 * Example:
187 * Given: "ip" is a height 3 file, "offset" is 101342453, and this is a
188 * filesystem with a blocksize of 4096.
189 *
190 * find_metapath() would return a struct metapath structure set to:
191 * mp_offset = 101342453, mp_height = 3, mp_list[0] = 0, mp_list[1] = 48,
192 * and mp_list[2] = 165.
193 *
194 * That means that in order to get to the block containing the byte at
195 * offset 101342453, we would load the indirect block pointed to by pointer
196 * 0 in the dinode. We would then load the indirect block pointed to by
197 * pointer 48 in that indirect block. We would then load the data block
198 * pointed to by pointer 165 in that indirect block.
199 *
200 * ----------------------------------------
201 * | Dinode | |
202 * | | 4|
203 * | |0 1 2 3 4 5 9|
204 * | | 6|
205 * ----------------------------------------
206 * |
207 * |
208 * V
209 * ----------------------------------------
210 * | Indirect Block |
211 * | 5|
212 * | 4 4 4 4 4 5 5 1|
213 * |0 5 6 7 8 9 0 1 2|
214 * ----------------------------------------
215 * |
216 * |
217 * V
218 * ----------------------------------------
219 * | Indirect Block |
220 * | 1 1 1 1 1 5|
221 * | 6 6 6 6 6 1|
222 * |0 3 4 5 6 7 2|
223 * ----------------------------------------
224 * |
225 * |
226 * V
227 * ----------------------------------------
228 * | Data block containing offset |
229 * | 101342453 |
230 * | |
231 * | |
232 * ----------------------------------------
233 *
234 */
235
9b8c81d1
SW
236static void find_metapath(const struct gfs2_sbd *sdp, u64 block,
237 struct metapath *mp, unsigned int height)
b3b94faa 238{
b3b94faa
DT
239 unsigned int i;
240
5f8bd444 241 mp->mp_fheight = height;
9b8c81d1 242 for (i = height; i--;)
7eabb77e 243 mp->mp_list[i] = do_div(block, sdp->sd_inptrs);
b3b94faa
DT
244}
245
5af4e7a0 246static inline unsigned int metapath_branch_start(const struct metapath *mp)
9b8c81d1 247{
5af4e7a0
BM
248 if (mp->mp_list[0] == 0)
249 return 2;
250 return 1;
9b8c81d1
SW
251}
252
d552a2b9 253/**
20cdc193 254 * metaptr1 - Return the first possible metadata pointer in a metapath buffer
d552a2b9
BP
255 * @height: The metadata height (0 = dinode)
256 * @mp: The metapath
257 */
258static inline __be64 *metaptr1(unsigned int height, const struct metapath *mp)
259{
260 struct buffer_head *bh = mp->mp_bh[height];
261 if (height == 0)
262 return ((__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)));
263 return ((__be64 *)(bh->b_data + sizeof(struct gfs2_meta_header)));
264}
265
b3b94faa
DT
266/**
267 * metapointer - Return pointer to start of metadata in a buffer
b3b94faa
DT
268 * @height: The metadata height (0 = dinode)
269 * @mp: The metapath
270 *
271 * Return a pointer to the block number of the next height of the metadata
272 * tree given a buffer containing the pointer to the current height of the
273 * metadata tree.
274 */
275
9b8c81d1 276static inline __be64 *metapointer(unsigned int height, const struct metapath *mp)
b3b94faa 277{
d552a2b9
BP
278 __be64 *p = metaptr1(height, mp);
279 return p + mp->mp_list[height];
b3b94faa
DT
280}
281
5cf26b1e 282static void gfs2_metapath_ra(struct gfs2_glock *gl, __be64 *start, __be64 *end)
b99b98dc 283{
b99b98dc
SW
284 const __be64 *t;
285
5cf26b1e 286 for (t = start; t < end; t++) {
c3ce5aa9
AG
287 struct buffer_head *rabh;
288
b99b98dc
SW
289 if (!*t)
290 continue;
291
292 rabh = gfs2_getbuf(gl, be64_to_cpu(*t), CREATE);
293 if (trylock_buffer(rabh)) {
294 if (!buffer_uptodate(rabh)) {
295 rabh->b_end_io = end_buffer_read_sync;
e477b24b
CL
296 submit_bh(REQ_OP_READ,
297 REQ_RAHEAD | REQ_META | REQ_PRIO,
298 rabh);
b99b98dc
SW
299 continue;
300 }
301 unlock_buffer(rabh);
302 }
303 brelse(rabh);
304 }
305}
306
e8b43fe0
AG
307static int __fillup_metapath(struct gfs2_inode *ip, struct metapath *mp,
308 unsigned int x, unsigned int h)
d552a2b9 309{
e8b43fe0
AG
310 for (; x < h; x++) {
311 __be64 *ptr = metapointer(x, mp);
312 u64 dblock = be64_to_cpu(*ptr);
313 int ret;
d552a2b9 314
e8b43fe0
AG
315 if (!dblock)
316 break;
317 ret = gfs2_meta_indirect_buffer(ip, x + 1, dblock, &mp->mp_bh[x + 1]);
318 if (ret)
319 return ret;
320 }
321 mp->mp_aheight = x + 1;
322 return 0;
d552a2b9
BP
323}
324
b3b94faa 325/**
9b8c81d1
SW
326 * lookup_metapath - Walk the metadata tree to a specific point
327 * @ip: The inode
b3b94faa 328 * @mp: The metapath
b3b94faa 329 *
9b8c81d1
SW
330 * Assumes that the inode's buffer has already been looked up and
331 * hooked onto mp->mp_bh[0] and that the metapath has been initialised
332 * by find_metapath().
333 *
334 * If this function encounters part of the tree which has not been
335 * allocated, it returns the current height of the tree at the point
336 * at which it found the unallocated block. Blocks which are found are
337 * added to the mp->mp_bh[] list.
b3b94faa 338 *
e8b43fe0 339 * Returns: error
b3b94faa
DT
340 */
341
9b8c81d1 342static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp)
11707ea0 343{
e8b43fe0 344 return __fillup_metapath(ip, mp, 0, ip->i_height - 1);
dbac6710
SW
345}
346
d552a2b9
BP
347/**
348 * fillup_metapath - fill up buffers for the metadata path to a specific height
349 * @ip: The inode
350 * @mp: The metapath
351 * @h: The height to which it should be mapped
352 *
353 * Similar to lookup_metapath, but does lookups for a range of heights
354 *
c3ce5aa9 355 * Returns: error or the number of buffers filled
d552a2b9
BP
356 */
357
358static int fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h)
359{
e8b43fe0 360 unsigned int x = 0;
c3ce5aa9 361 int ret;
d552a2b9
BP
362
363 if (h) {
364 /* find the first buffer we need to look up. */
e8b43fe0
AG
365 for (x = h - 1; x > 0; x--) {
366 if (mp->mp_bh[x])
367 break;
d552a2b9
BP
368 }
369 }
c3ce5aa9
AG
370 ret = __fillup_metapath(ip, mp, x, h);
371 if (ret)
372 return ret;
373 return mp->mp_aheight - x - 1;
d552a2b9
BP
374}
375
9b8c81d1 376static inline void release_metapath(struct metapath *mp)
dbac6710
SW
377{
378 int i;
379
9b8c81d1
SW
380 for (i = 0; i < GFS2_MAX_META_HEIGHT; i++) {
381 if (mp->mp_bh[i] == NULL)
382 break;
383 brelse(mp->mp_bh[i]);
384 }
11707ea0
SW
385}
386
30cbf189
SW
387/**
388 * gfs2_extent_length - Returns length of an extent of blocks
389 * @start: Start of the buffer
390 * @len: Length of the buffer in bytes
391 * @ptr: Current position in the buffer
392 * @limit: Max extent length to return (0 = unlimited)
393 * @eob: Set to 1 if we hit "end of block"
394 *
395 * If the first block is zero (unallocated) it will return the number of
396 * unallocated blocks in the extent, otherwise it will return the number
397 * of contiguous blocks in the extent.
398 *
399 * Returns: The length of the extent (minimum of one block)
400 */
401
b650738c 402static inline unsigned int gfs2_extent_length(void *start, unsigned int len, __be64 *ptr, size_t limit, int *eob)
30cbf189
SW
403{
404 const __be64 *end = (start + len);
405 const __be64 *first = ptr;
406 u64 d = be64_to_cpu(*ptr);
407
408 *eob = 0;
409 do {
410 ptr++;
411 if (ptr >= end)
412 break;
413 if (limit && --limit == 0)
414 break;
415 if (d)
416 d++;
417 } while(be64_to_cpu(*ptr) == d);
418 if (ptr >= end)
419 *eob = 1;
420 return (ptr - first);
421}
422
9b8c81d1 423static inline void bmap_lock(struct gfs2_inode *ip, int create)
4cf1ed81 424{
4cf1ed81
SW
425 if (create)
426 down_write(&ip->i_rw_mutex);
427 else
428 down_read(&ip->i_rw_mutex);
429}
430
9b8c81d1 431static inline void bmap_unlock(struct gfs2_inode *ip, int create)
4cf1ed81 432{
4cf1ed81
SW
433 if (create)
434 up_write(&ip->i_rw_mutex);
435 else
436 up_read(&ip->i_rw_mutex);
437}
438
9b8c81d1
SW
439static inline __be64 *gfs2_indirect_init(struct metapath *mp,
440 struct gfs2_glock *gl, unsigned int i,
441 unsigned offset, u64 bn)
442{
443 __be64 *ptr = (__be64 *)(mp->mp_bh[i - 1]->b_data +
444 ((i > 1) ? sizeof(struct gfs2_meta_header) :
445 sizeof(struct gfs2_dinode)));
446 BUG_ON(i < 1);
447 BUG_ON(mp->mp_bh[i] != NULL);
448 mp->mp_bh[i] = gfs2_meta_new(gl, bn);
350a9b0a 449 gfs2_trans_add_meta(gl, mp->mp_bh[i]);
9b8c81d1
SW
450 gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN);
451 gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header));
452 ptr += offset;
453 *ptr = cpu_to_be64(bn);
454 return ptr;
455}
456
457enum alloc_state {
458 ALLOC_DATA = 0,
459 ALLOC_GROW_DEPTH = 1,
460 ALLOC_GROW_HEIGHT = 2,
461 /* ALLOC_UNSTUFF = 3, TBD and rather complicated */
462};
463
464/**
465 * gfs2_bmap_alloc - Build a metadata tree of the requested height
466 * @inode: The GFS2 inode
467 * @lblock: The logical starting block of the extent
468 * @bh_map: This is used to return the mapping details
5f8bd444
BP
469 * @zero_new: True if newly allocated blocks should be zeroed
470 * @mp: The metapath, with proper height information calculated
9b8c81d1 471 * @maxlen: The max number of data blocks to alloc
5f8bd444
BP
472 * @dblock: Pointer to return the resulting new block
473 * @dblks: Pointer to return the number of blocks allocated
9b8c81d1
SW
474 *
475 * In this routine we may have to alloc:
476 * i) Indirect blocks to grow the metadata tree height
477 * ii) Indirect blocks to fill in lower part of the metadata tree
478 * iii) Data blocks
479 *
480 * The function is in two parts. The first part works out the total
481 * number of blocks which we need. The second part does the actual
482 * allocation asking for an extent at a time (if enough contiguous free
483 * blocks are available, there will only be one request per bmap call)
484 * and uses the state machine to initialise the blocks in order.
485 *
486 * Returns: errno on error
487 */
488
3974320c
BP
489static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
490 unsigned flags, struct metapath *mp)
9b8c81d1
SW
491{
492 struct gfs2_inode *ip = GFS2_I(inode);
493 struct gfs2_sbd *sdp = GFS2_SB(inode);
494 struct buffer_head *dibh = mp->mp_bh[0];
5f8bd444 495 u64 bn;
5af4e7a0 496 unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0;
3974320c 497 unsigned dblks = 0;
9b8c81d1 498 unsigned ptrs_per_blk;
5f8bd444 499 const unsigned end_of_metadata = mp->mp_fheight - 1;
9b8c81d1
SW
500 enum alloc_state state;
501 __be64 *ptr;
502 __be64 zero_bn = 0;
3974320c 503 size_t maxlen = iomap->length >> inode->i_blkbits;
9b8c81d1 504
5f8bd444 505 BUG_ON(mp->mp_aheight < 1);
9b8c81d1
SW
506 BUG_ON(dibh == NULL);
507
350a9b0a 508 gfs2_trans_add_meta(ip->i_gl, dibh);
9b8c81d1 509
5f8bd444 510 if (mp->mp_fheight == mp->mp_aheight) {
9b8c81d1 511 struct buffer_head *bh;
3974320c
BP
512 int eob;
513
9b8c81d1
SW
514 /* Bottom indirect block exists, find unalloced extent size */
515 ptr = metapointer(end_of_metadata, mp);
516 bh = mp->mp_bh[end_of_metadata];
3974320c
BP
517 dblks = gfs2_extent_length(bh->b_data, bh->b_size, ptr,
518 maxlen, &eob);
519 BUG_ON(dblks < 1);
9b8c81d1
SW
520 state = ALLOC_DATA;
521 } else {
522 /* Need to allocate indirect blocks */
5f8bd444
BP
523 ptrs_per_blk = mp->mp_fheight > 1 ? sdp->sd_inptrs :
524 sdp->sd_diptrs;
3974320c
BP
525 dblks = min(maxlen, (size_t)(ptrs_per_blk -
526 mp->mp_list[end_of_metadata]));
5f8bd444 527 if (mp->mp_fheight == ip->i_height) {
9b8c81d1 528 /* Writing into existing tree, extend tree down */
5f8bd444 529 iblks = mp->mp_fheight - mp->mp_aheight;
9b8c81d1
SW
530 state = ALLOC_GROW_DEPTH;
531 } else {
532 /* Building up tree height */
533 state = ALLOC_GROW_HEIGHT;
5f8bd444 534 iblks = mp->mp_fheight - ip->i_height;
5af4e7a0 535 branch_start = metapath_branch_start(mp);
5f8bd444 536 iblks += (mp->mp_fheight - branch_start);
9b8c81d1
SW
537 }
538 }
539
540 /* start of the second part of the function (state machine) */
541
3974320c 542 blks = dblks + iblks;
5f8bd444 543 i = mp->mp_aheight;
9b8c81d1 544 do {
09010978 545 int error;
9b8c81d1 546 n = blks - alloced;
6e87ed0f 547 error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
09010978
SW
548 if (error)
549 return error;
9b8c81d1
SW
550 alloced += n;
551 if (state != ALLOC_DATA || gfs2_is_jdata(ip))
552 gfs2_trans_add_unrevoke(sdp, bn, n);
553 switch (state) {
554 /* Growing height of tree */
555 case ALLOC_GROW_HEIGHT:
556 if (i == 1) {
557 ptr = (__be64 *)(dibh->b_data +
558 sizeof(struct gfs2_dinode));
559 zero_bn = *ptr;
560 }
5f8bd444
BP
561 for (; i - 1 < mp->mp_fheight - ip->i_height && n > 0;
562 i++, n--)
9b8c81d1 563 gfs2_indirect_init(mp, ip->i_gl, i, 0, bn++);
5f8bd444 564 if (i - 1 == mp->mp_fheight - ip->i_height) {
9b8c81d1
SW
565 i--;
566 gfs2_buffer_copy_tail(mp->mp_bh[i],
567 sizeof(struct gfs2_meta_header),
568 dibh, sizeof(struct gfs2_dinode));
569 gfs2_buffer_clear_tail(dibh,
570 sizeof(struct gfs2_dinode) +
571 sizeof(__be64));
572 ptr = (__be64 *)(mp->mp_bh[i]->b_data +
573 sizeof(struct gfs2_meta_header));
574 *ptr = zero_bn;
575 state = ALLOC_GROW_DEPTH;
5f8bd444 576 for(i = branch_start; i < mp->mp_fheight; i++) {
9b8c81d1
SW
577 if (mp->mp_bh[i] == NULL)
578 break;
579 brelse(mp->mp_bh[i]);
580 mp->mp_bh[i] = NULL;
581 }
5af4e7a0 582 i = branch_start;
9b8c81d1
SW
583 }
584 if (n == 0)
585 break;
586 /* Branching from existing tree */
587 case ALLOC_GROW_DEPTH:
5f8bd444 588 if (i > 1 && i < mp->mp_fheight)
350a9b0a 589 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[i-1]);
5f8bd444 590 for (; i < mp->mp_fheight && n > 0; i++, n--)
9b8c81d1
SW
591 gfs2_indirect_init(mp, ip->i_gl, i,
592 mp->mp_list[i-1], bn++);
5f8bd444 593 if (i == mp->mp_fheight)
9b8c81d1
SW
594 state = ALLOC_DATA;
595 if (n == 0)
596 break;
597 /* Tree complete, adding data blocks */
598 case ALLOC_DATA:
3974320c 599 BUG_ON(n > dblks);
9b8c81d1 600 BUG_ON(mp->mp_bh[end_of_metadata] == NULL);
350a9b0a 601 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[end_of_metadata]);
3974320c 602 dblks = n;
9b8c81d1 603 ptr = metapointer(end_of_metadata, mp);
3974320c
BP
604 iomap->addr = bn << inode->i_blkbits;
605 iomap->flags |= IOMAP_F_NEW;
9b8c81d1
SW
606 while (n-- > 0)
607 *ptr++ = cpu_to_be64(bn++);
608 break;
609 }
3974320c 610 } while (iomap->addr == IOMAP_NULL_ADDR);
9b8c81d1 611
3974320c 612 iomap->length = (u64)dblks << inode->i_blkbits;
5f8bd444 613 ip->i_height = mp->mp_fheight;
9b8c81d1
SW
614 gfs2_add_inode_blocks(&ip->i_inode, alloced);
615 gfs2_dinode_out(ip, mp->mp_bh[0]->b_data);
9b8c81d1
SW
616 return 0;
617}
618
b3b94faa 619/**
3974320c 620 * hole_size - figure out the size of a hole
fd88de56 621 * @inode: The inode
3974320c
BP
622 * @lblock: The logical starting block number
623 * @mp: The metapath
b3b94faa 624 *
3974320c 625 * Returns: The hole size in bytes
b3b94faa 626 *
b3b94faa 627 */
3974320c
BP
628static u64 hole_size(struct inode *inode, sector_t lblock, struct metapath *mp)
629{
630 struct gfs2_inode *ip = GFS2_I(inode);
631 struct gfs2_sbd *sdp = GFS2_SB(inode);
632 struct metapath mp_eof;
633 u64 factor = 1;
634 int hgt;
635 u64 holesz = 0;
636 const __be64 *first, *end, *ptr;
637 const struct buffer_head *bh;
638 u64 lblock_stop = (i_size_read(inode) - 1) >> inode->i_blkbits;
639 int zeroptrs;
640 bool done = false;
641
642 /* Get another metapath, to the very last byte */
643 find_metapath(sdp, lblock_stop, &mp_eof, ip->i_height);
644 for (hgt = ip->i_height - 1; hgt >= 0 && !done; hgt--) {
645 bh = mp->mp_bh[hgt];
646 if (bh) {
647 zeroptrs = 0;
648 first = metapointer(hgt, mp);
649 end = (const __be64 *)(bh->b_data + bh->b_size);
650
651 for (ptr = first; ptr < end; ptr++) {
652 if (*ptr) {
653 done = true;
654 break;
655 } else {
656 zeroptrs++;
657 }
658 }
659 } else {
660 zeroptrs = sdp->sd_inptrs;
661 }
662 if (factor * zeroptrs >= lblock_stop - lblock + 1) {
663 holesz = lblock_stop - lblock + 1;
664 break;
665 }
666 holesz += factor * zeroptrs;
b3b94faa 667
3974320c
BP
668 factor *= sdp->sd_inptrs;
669 if (hgt && (mp->mp_list[hgt - 1] < mp_eof.mp_list[hgt - 1]))
670 (mp->mp_list[hgt - 1])++;
671 }
672 return holesz << inode->i_blkbits;
673}
674
675static void gfs2_stuffed_iomap(struct inode *inode, struct iomap *iomap)
676{
677 struct gfs2_inode *ip = GFS2_I(inode);
678
679 iomap->addr = (ip->i_no_addr << inode->i_blkbits) +
680 sizeof(struct gfs2_dinode);
681 iomap->offset = 0;
682 iomap->length = i_size_read(inode);
19319b53 683 iomap->type = IOMAP_INLINE;
3974320c
BP
684}
685
686/**
687 * gfs2_iomap_begin - Map blocks from an inode to disk blocks
688 * @inode: The inode
689 * @pos: Starting position in bytes
690 * @length: Length to map, in bytes
691 * @flags: iomap flags
692 * @iomap: The iomap structure
693 *
694 * Returns: errno
695 */
696int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
697 unsigned flags, struct iomap *iomap)
b3b94faa 698{
feaa7bba
SW
699 struct gfs2_inode *ip = GFS2_I(inode);
700 struct gfs2_sbd *sdp = GFS2_SB(inode);
3974320c 701 struct metapath mp = { .mp_aheight = 1, };
20cdc193 702 unsigned int factor = sdp->sd_sb.sb_bsize;
ecc30c79 703 const u64 *arr = sdp->sd_heightsize;
9b8c81d1 704 __be64 *ptr;
3974320c
BP
705 sector_t lblock;
706 sector_t lend;
49edd5bf 707 int ret = 0;
9b8c81d1
SW
708 int eob;
709 unsigned int len;
710 struct buffer_head *bh;
711 u8 height;
7276b3b0 712
3974320c
BP
713 trace_gfs2_iomap_start(ip, pos, length, flags);
714 if (!length) {
715 ret = -EINVAL;
716 goto out;
717 }
b3b94faa 718
49edd5bf
AG
719 if (gfs2_is_stuffed(ip)) {
720 if (flags & IOMAP_REPORT) {
721 gfs2_stuffed_iomap(inode, iomap);
722 if (pos >= iomap->length)
723 ret = -ENOENT;
724 goto out;
725 }
726 BUG_ON(!(flags & IOMAP_WRITE));
3974320c
BP
727 }
728
729 lblock = pos >> inode->i_blkbits;
730 lend = (pos + length + sdp->sd_sb.sb_bsize - 1) >> inode->i_blkbits;
731
732 iomap->offset = lblock << inode->i_blkbits;
733 iomap->addr = IOMAP_NULL_ADDR;
734 iomap->type = IOMAP_HOLE;
735 iomap->length = (u64)(lend - lblock) << inode->i_blkbits;
736 iomap->flags = IOMAP_F_MERGED;
49edd5bf 737 bmap_lock(ip, flags & IOMAP_WRITE);
20cdc193
AG
738
739 /*
740 * Directory data blocks have a struct gfs2_meta_header header, so the
741 * remaining size is smaller than the filesystem block size. Logical
742 * block numbers for directories are in units of this remaining size!
743 */
ecc30c79 744 if (gfs2_is_dir(ip)) {
20cdc193 745 factor = sdp->sd_jbsize;
ecc30c79
SW
746 arr = sdp->sd_jheightsize;
747 }
4cf1ed81 748
9b8c81d1
SW
749 ret = gfs2_meta_inode_buffer(ip, &mp.mp_bh[0]);
750 if (ret)
3974320c 751 goto out_release;
b3b94faa 752
9b8c81d1 753 height = ip->i_height;
3974320c 754 while ((lblock + 1) * factor > arr[height])
9b8c81d1
SW
755 height++;
756 find_metapath(sdp, lblock, &mp, height);
9b8c81d1
SW
757 if (height > ip->i_height || gfs2_is_stuffed(ip))
758 goto do_alloc;
3974320c 759
9b8c81d1 760 ret = lookup_metapath(ip, &mp);
e8b43fe0 761 if (ret)
3974320c
BP
762 goto out_release;
763
5f8bd444 764 if (mp.mp_aheight != ip->i_height)
9b8c81d1 765 goto do_alloc;
3974320c 766
9b8c81d1
SW
767 ptr = metapointer(ip->i_height - 1, &mp);
768 if (*ptr == 0)
769 goto do_alloc;
3974320c
BP
770
771 iomap->type = IOMAP_MAPPED;
772 iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits;
773
9b8c81d1 774 bh = mp.mp_bh[ip->i_height - 1];
3974320c 775 len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, lend - lblock, &eob);
9b8c81d1 776 if (eob)
3974320c
BP
777 iomap->flags |= IOMAP_F_BOUNDARY;
778 iomap->length = (u64)len << inode->i_blkbits;
779
3974320c 780out_release:
9b8c81d1 781 release_metapath(&mp);
49edd5bf 782 bmap_unlock(ip, flags & IOMAP_WRITE);
3974320c
BP
783out:
784 trace_gfs2_iomap_end(ip, iomap, ret);
9b8c81d1 785 return ret;
30cbf189 786
9b8c81d1 787do_alloc:
49edd5bf
AG
788 if (flags & IOMAP_WRITE) {
789 ret = gfs2_iomap_alloc(inode, iomap, flags, &mp);
790 } else if (flags & IOMAP_REPORT) {
791 loff_t size = i_size_read(inode);
792 if (pos >= size)
3974320c 793 ret = -ENOENT;
49edd5bf
AG
794 else if (height <= ip->i_height)
795 iomap->length = hole_size(inode, lblock, &mp);
796 else
797 iomap->length = size - pos;
b3b94faa 798 }
3974320c
BP
799 goto out_release;
800}
801
802/**
d39d18e0 803 * gfs2_block_map - Map one or more blocks of an inode to a disk block
3974320c
BP
804 * @inode: The inode
805 * @lblock: The logical block number
806 * @bh_map: The bh to be mapped
807 * @create: True if its ok to alloc blocks to satify the request
808 *
d39d18e0
AG
809 * The size of the requested mapping is defined in bh_map->b_size.
810 *
811 * Clears buffer_mapped(bh_map) and leaves bh_map->b_size unchanged
812 * when @lblock is not mapped. Sets buffer_mapped(bh_map) and
813 * bh_map->b_size to indicate the size of the mapping when @lblock and
814 * successive blocks are mapped, up to the requested size.
815 *
816 * Sets buffer_boundary() if a read of metadata will be required
817 * before the next block can be mapped. Sets buffer_new() if new
818 * blocks were allocated.
3974320c
BP
819 *
820 * Returns: errno
821 */
822
823int gfs2_block_map(struct inode *inode, sector_t lblock,
824 struct buffer_head *bh_map, int create)
825{
826 struct gfs2_inode *ip = GFS2_I(inode);
827 struct iomap iomap;
828 int ret, flags = 0;
829
830 clear_buffer_mapped(bh_map);
831 clear_buffer_new(bh_map);
832 clear_buffer_boundary(bh_map);
833 trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
834
835 if (create)
836 flags |= IOMAP_WRITE;
3974320c
BP
837 ret = gfs2_iomap_begin(inode, (loff_t)lblock << inode->i_blkbits,
838 bh_map->b_size, flags, &iomap);
839 if (ret) {
840 if (!create && ret == -ENOENT) {
841 /* Return unmapped buffer beyond the end of file. */
842 ret = 0;
843 }
844 goto out;
845 }
846
847 if (iomap.length > bh_map->b_size) {
848 iomap.length = bh_map->b_size;
849 iomap.flags &= ~IOMAP_F_BOUNDARY;
5f8bd444 850 }
3974320c
BP
851 if (iomap.addr != IOMAP_NULL_ADDR)
852 map_bh(bh_map, inode->i_sb, iomap.addr >> inode->i_blkbits);
853 bh_map->b_size = iomap.length;
854 if (iomap.flags & IOMAP_F_BOUNDARY)
855 set_buffer_boundary(bh_map);
856 if (iomap.flags & IOMAP_F_NEW)
857 set_buffer_new(bh_map);
858
859out:
860 trace_gfs2_bmap(ip, bh_map, lblock, create, ret);
861 return ret;
fd88de56
SW
862}
863
941e6d7d
SW
864/*
865 * Deprecated: do not use in new code
866 */
fd88de56
SW
867int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen)
868{
23591256 869 struct buffer_head bh = { .b_state = 0, .b_blocknr = 0 };
7a6bbacb 870 int ret;
fd88de56
SW
871 int create = *new;
872
873 BUG_ON(!extlen);
874 BUG_ON(!dblock);
875 BUG_ON(!new);
876
47a9a527 877 bh.b_size = BIT(inode->i_blkbits + (create ? 0 : 5));
e9e1ef2b 878 ret = gfs2_block_map(inode, lblock, &bh, create);
7a6bbacb
SW
879 *extlen = bh.b_size >> inode->i_blkbits;
880 *dblock = bh.b_blocknr;
881 if (buffer_new(&bh))
882 *new = 1;
883 else
884 *new = 0;
885 return ret;
b3b94faa
DT
886}
887
ba7f7290 888/**
bdba0d5e 889 * gfs2_block_zero_range - Deal with zeroing out data
ba7f7290
SW
890 *
891 * This is partly borrowed from ext3.
892 */
bdba0d5e
AG
893static int gfs2_block_zero_range(struct inode *inode, loff_t from,
894 unsigned int length)
ba7f7290 895{
bdba0d5e 896 struct address_space *mapping = inode->i_mapping;
ba7f7290 897 struct gfs2_inode *ip = GFS2_I(inode);
09cbfeaf
KS
898 unsigned long index = from >> PAGE_SHIFT;
899 unsigned offset = from & (PAGE_SIZE-1);
bdba0d5e 900 unsigned blocksize, iblock, pos;
ba7f7290
SW
901 struct buffer_head *bh;
902 struct page *page;
ba7f7290
SW
903 int err;
904
220cca2a 905 page = find_or_create_page(mapping, index, GFP_NOFS);
ba7f7290
SW
906 if (!page)
907 return 0;
908
909 blocksize = inode->i_sb->s_blocksize;
09cbfeaf 910 iblock = index << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits);
ba7f7290
SW
911
912 if (!page_has_buffers(page))
913 create_empty_buffers(page, blocksize, 0);
914
915 /* Find the buffer that contains "offset" */
916 bh = page_buffers(page);
917 pos = blocksize;
918 while (offset >= pos) {
919 bh = bh->b_this_page;
920 iblock++;
921 pos += blocksize;
922 }
923
924 err = 0;
925
926 if (!buffer_mapped(bh)) {
e9e1ef2b 927 gfs2_block_map(inode, iblock, bh, 0);
ba7f7290
SW
928 /* unmapped? It's a hole - nothing to do */
929 if (!buffer_mapped(bh))
930 goto unlock;
931 }
932
933 /* Ok, it's mapped. Make sure it's up-to-date */
934 if (PageUptodate(page))
935 set_buffer_uptodate(bh);
936
937 if (!buffer_uptodate(bh)) {
938 err = -EIO;
dfec8a14 939 ll_rw_block(REQ_OP_READ, 0, 1, &bh);
ba7f7290
SW
940 wait_on_buffer(bh);
941 /* Uhhuh. Read error. Complain and punt. */
942 if (!buffer_uptodate(bh))
943 goto unlock;
1875f2f3 944 err = 0;
ba7f7290
SW
945 }
946
bf36a713 947 if (!gfs2_is_writeback(ip))
350a9b0a 948 gfs2_trans_add_data(ip->i_gl, bh);
ba7f7290 949
eebd2aa3 950 zero_user(page, offset, length);
40bc9a27 951 mark_buffer_dirty(bh);
ba7f7290
SW
952unlock:
953 unlock_page(page);
09cbfeaf 954 put_page(page);
ba7f7290
SW
955 return err;
956}
957
c62baf65
FF
958#define GFS2_JTRUNC_REVOKES 8192
959
fa731fc4
SW
960/**
961 * gfs2_journaled_truncate - Wrapper for truncate_pagecache for jdata files
962 * @inode: The inode being truncated
963 * @oldsize: The original (larger) size
964 * @newsize: The new smaller size
965 *
966 * With jdata files, we have to journal a revoke for each block which is
967 * truncated. As a result, we need to split this into separate transactions
968 * if the number of pages being truncated gets too large.
969 */
970
fa731fc4
SW
971static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize)
972{
973 struct gfs2_sbd *sdp = GFS2_SB(inode);
974 u64 max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
975 u64 chunk;
976 int error;
977
978 while (oldsize != newsize) {
e7fdf004
AG
979 struct gfs2_trans *tr;
980 unsigned int offs;
981
fa731fc4
SW
982 chunk = oldsize - newsize;
983 if (chunk > max_chunk)
984 chunk = max_chunk;
e7fdf004
AG
985
986 offs = oldsize & ~PAGE_MASK;
987 if (offs && chunk > PAGE_SIZE)
988 chunk = offs + ((chunk - offs) & PAGE_MASK);
989
7caef267 990 truncate_pagecache(inode, oldsize - chunk);
fa731fc4 991 oldsize -= chunk;
e7fdf004
AG
992
993 tr = current->journal_info;
994 if (!test_bit(TR_TOUCHED, &tr->tr_flags))
995 continue;
996
fa731fc4
SW
997 gfs2_trans_end(sdp);
998 error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
999 if (error)
1000 return error;
1001 }
1002
1003 return 0;
1004}
1005
8b5860a3 1006static int trunc_start(struct inode *inode, u64 newsize)
b3b94faa 1007{
ff8f33c8
SW
1008 struct gfs2_inode *ip = GFS2_I(inode);
1009 struct gfs2_sbd *sdp = GFS2_SB(inode);
80990f40 1010 struct buffer_head *dibh = NULL;
b3b94faa 1011 int journaled = gfs2_is_jdata(ip);
8b5860a3 1012 u64 oldsize = inode->i_size;
b3b94faa
DT
1013 int error;
1014
fa731fc4
SW
1015 if (journaled)
1016 error = gfs2_trans_begin(sdp, RES_DINODE + RES_JDATA, GFS2_JTRUNC_REVOKES);
1017 else
1018 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
b3b94faa
DT
1019 if (error)
1020 return error;
1021
1022 error = gfs2_meta_inode_buffer(ip, &dibh);
1023 if (error)
1024 goto out;
1025
350a9b0a 1026 gfs2_trans_add_meta(ip->i_gl, dibh);
ff8f33c8 1027
b3b94faa 1028 if (gfs2_is_stuffed(ip)) {
ff8f33c8 1029 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize);
b3b94faa 1030 } else {
bdba0d5e
AG
1031 unsigned int blocksize = i_blocksize(inode);
1032 unsigned int offs = newsize & (blocksize - 1);
1033 if (offs) {
1034 error = gfs2_block_zero_range(inode, newsize,
1035 blocksize - offs);
ff8f33c8 1036 if (error)
80990f40 1037 goto out;
b3b94faa 1038 }
ff8f33c8 1039 ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG;
b3b94faa
DT
1040 }
1041
ff8f33c8 1042 i_size_write(inode, newsize);
078cd827 1043 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
ff8f33c8 1044 gfs2_dinode_out(ip, dibh->b_data);
b3b94faa 1045
fa731fc4
SW
1046 if (journaled)
1047 error = gfs2_journaled_truncate(inode, oldsize, newsize);
1048 else
7caef267 1049 truncate_pagecache(inode, newsize);
fa731fc4 1050
a91ea69f 1051out:
80990f40
AG
1052 brelse(dibh);
1053 if (current->journal_info)
1054 gfs2_trans_end(sdp);
b3b94faa
DT
1055 return error;
1056}
1057
d552a2b9
BP
1058/**
1059 * sweep_bh_for_rgrps - find an rgrp in a meta buffer and free blocks therein
1060 * @ip: inode
1061 * @rg_gh: holder of resource group glock
5cf26b1e
AG
1062 * @bh: buffer head to sweep
1063 * @start: starting point in bh
1064 * @end: end point in bh
1065 * @meta: true if bh points to metadata (rather than data)
d552a2b9 1066 * @btotal: place to keep count of total blocks freed
d552a2b9
BP
1067 *
1068 * We sweep a metadata buffer (provided by the metapath) for blocks we need to
1069 * free, and free them all. However, we do it one rgrp at a time. If this
1070 * block has references to multiple rgrps, we break it into individual
1071 * transactions. This allows other processes to use the rgrps while we're
1072 * focused on a single one, for better concurrency / performance.
1073 * At every transaction boundary, we rewrite the inode into the journal.
1074 * That way the bitmaps are kept consistent with the inode and we can recover
1075 * if we're interrupted by power-outages.
1076 *
1077 * Returns: 0, or return code if an error occurred.
1078 * *btotal has the total number of blocks freed
1079 */
1080static int sweep_bh_for_rgrps(struct gfs2_inode *ip, struct gfs2_holder *rd_gh,
5cf26b1e
AG
1081 struct buffer_head *bh, __be64 *start, __be64 *end,
1082 bool meta, u32 *btotal)
b3b94faa 1083{
9b8c81d1 1084 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
d552a2b9
BP
1085 struct gfs2_rgrpd *rgd;
1086 struct gfs2_trans *tr;
5cf26b1e 1087 __be64 *p;
d552a2b9
BP
1088 int blks_outside_rgrp;
1089 u64 bn, bstart, isize_blks;
1090 s64 blen; /* needs to be s64 or gfs2_add_inode_blocks breaks */
d552a2b9
BP
1091 int ret = 0;
1092 bool buf_in_tr = false; /* buffer was added to transaction */
1093
d552a2b9 1094more_rgrps:
5cf26b1e
AG
1095 rgd = NULL;
1096 if (gfs2_holder_initialized(rd_gh)) {
1097 rgd = gfs2_glock2rgrp(rd_gh->gh_gl);
1098 gfs2_assert_withdraw(sdp,
1099 gfs2_glock_is_locked_by_me(rd_gh->gh_gl));
1100 }
d552a2b9
BP
1101 blks_outside_rgrp = 0;
1102 bstart = 0;
1103 blen = 0;
d552a2b9 1104
5cf26b1e 1105 for (p = start; p < end; p++) {
d552a2b9
BP
1106 if (!*p)
1107 continue;
1108 bn = be64_to_cpu(*p);
5cf26b1e
AG
1109
1110 if (rgd) {
1111 if (!rgrp_contains_block(rgd, bn)) {
1112 blks_outside_rgrp++;
1113 continue;
1114 }
d552a2b9 1115 } else {
90bcab99 1116 rgd = gfs2_blk2rgrpd(sdp, bn, true);
5cf26b1e
AG
1117 if (unlikely(!rgd)) {
1118 ret = -EIO;
1119 goto out;
1120 }
d552a2b9
BP
1121 ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
1122 0, rd_gh);
1123 if (ret)
1124 goto out;
1125
1126 /* Must be done with the rgrp glock held: */
1127 if (gfs2_rs_active(&ip->i_res) &&
1128 rgd == ip->i_res.rs_rbm.rgd)
1129 gfs2_rs_deltree(&ip->i_res);
1130 }
1131
d552a2b9
BP
1132 /* The size of our transactions will be unknown until we
1133 actually process all the metadata blocks that relate to
1134 the rgrp. So we estimate. We know it can't be more than
1135 the dinode's i_blocks and we don't want to exceed the
1136 journal flush threshold, sd_log_thresh2. */
1137 if (current->journal_info == NULL) {
1138 unsigned int jblocks_rqsted, revokes;
1139
1140 jblocks_rqsted = rgd->rd_length + RES_DINODE +
1141 RES_INDIRECT;
1142 isize_blks = gfs2_get_inode_blocks(&ip->i_inode);
1143 if (isize_blks > atomic_read(&sdp->sd_log_thresh2))
1144 jblocks_rqsted +=
1145 atomic_read(&sdp->sd_log_thresh2);
1146 else
1147 jblocks_rqsted += isize_blks;
1148 revokes = jblocks_rqsted;
1149 if (meta)
5cf26b1e 1150 revokes += end - start;
d552a2b9
BP
1151 else if (ip->i_depth)
1152 revokes += sdp->sd_inptrs;
1153 ret = gfs2_trans_begin(sdp, jblocks_rqsted, revokes);
1154 if (ret)
1155 goto out_unlock;
1156 down_write(&ip->i_rw_mutex);
1157 }
1158 /* check if we will exceed the transaction blocks requested */
1159 tr = current->journal_info;
1160 if (tr->tr_num_buf_new + RES_STATFS +
1161 RES_QUOTA >= atomic_read(&sdp->sd_log_thresh2)) {
1162 /* We set blks_outside_rgrp to ensure the loop will
1163 be repeated for the same rgrp, but with a new
1164 transaction. */
1165 blks_outside_rgrp++;
1166 /* This next part is tricky. If the buffer was added
1167 to the transaction, we've already set some block
1168 pointers to 0, so we better follow through and free
1169 them, or we will introduce corruption (so break).
1170 This may be impossible, or at least rare, but I
1171 decided to cover the case regardless.
1172
1173 If the buffer was not added to the transaction
1174 (this call), doing so would exceed our transaction
1175 size, so we need to end the transaction and start a
1176 new one (so goto). */
1177
1178 if (buf_in_tr)
1179 break;
1180 goto out_unlock;
1181 }
1182
1183 gfs2_trans_add_meta(ip->i_gl, bh);
1184 buf_in_tr = true;
1185 *p = 0;
1186 if (bstart + blen == bn) {
1187 blen++;
1188 continue;
1189 }
1190 if (bstart) {
1191 __gfs2_free_blocks(ip, bstart, (u32)blen, meta);
1192 (*btotal) += blen;
1193 gfs2_add_inode_blocks(&ip->i_inode, -blen);
1194 }
1195 bstart = bn;
1196 blen = 1;
1197 }
1198 if (bstart) {
1199 __gfs2_free_blocks(ip, bstart, (u32)blen, meta);
1200 (*btotal) += blen;
1201 gfs2_add_inode_blocks(&ip->i_inode, -blen);
1202 }
1203out_unlock:
1204 if (!ret && blks_outside_rgrp) { /* If buffer still has non-zero blocks
1205 outside the rgrp we just processed,
1206 do it all over again. */
1207 if (current->journal_info) {
5cf26b1e
AG
1208 struct buffer_head *dibh;
1209
1210 ret = gfs2_meta_inode_buffer(ip, &dibh);
1211 if (ret)
1212 goto out;
d552a2b9
BP
1213
1214 /* Every transaction boundary, we rewrite the dinode
1215 to keep its di_blocks current in case of failure. */
1216 ip->i_inode.i_mtime = ip->i_inode.i_ctime =
b32c8c76 1217 current_time(&ip->i_inode);
d552a2b9
BP
1218 gfs2_trans_add_meta(ip->i_gl, dibh);
1219 gfs2_dinode_out(ip, dibh->b_data);
5cf26b1e 1220 brelse(dibh);
d552a2b9
BP
1221 up_write(&ip->i_rw_mutex);
1222 gfs2_trans_end(sdp);
1223 }
1224 gfs2_glock_dq_uninit(rd_gh);
1225 cond_resched();
1226 goto more_rgrps;
1227 }
1228out:
1229 return ret;
1230}
1231
10d2cf94
AG
1232static bool mp_eq_to_hgt(struct metapath *mp, __u16 *list, unsigned int h)
1233{
1234 if (memcmp(mp->mp_list, list, h * sizeof(mp->mp_list[0])))
1235 return false;
1236 return true;
1237}
1238
d552a2b9
BP
1239/**
1240 * find_nonnull_ptr - find a non-null pointer given a metapath and height
d552a2b9
BP
1241 * @mp: starting metapath
1242 * @h: desired height to search
1243 *
10d2cf94 1244 * Assumes the metapath is valid (with buffers) out to height h.
d552a2b9
BP
1245 * Returns: true if a non-null pointer was found in the metapath buffer
1246 * false if all remaining pointers are NULL in the buffer
1247 */
1248static bool find_nonnull_ptr(struct gfs2_sbd *sdp, struct metapath *mp,
10d2cf94
AG
1249 unsigned int h,
1250 __u16 *end_list, unsigned int end_aligned)
d552a2b9 1251{
10d2cf94
AG
1252 struct buffer_head *bh = mp->mp_bh[h];
1253 __be64 *first, *ptr, *end;
1254
1255 first = metaptr1(h, mp);
1256 ptr = first + mp->mp_list[h];
1257 end = (__be64 *)(bh->b_data + bh->b_size);
1258 if (end_list && mp_eq_to_hgt(mp, end_list, h)) {
1259 bool keep_end = h < end_aligned;
1260 end = first + end_list[h] + keep_end;
1261 }
d552a2b9 1262
10d2cf94 1263 while (ptr < end) {
c4a9d189 1264 if (*ptr) { /* if we have a non-null pointer */
10d2cf94 1265 mp->mp_list[h] = ptr - first;
c4a9d189
BP
1266 h++;
1267 if (h < GFS2_MAX_META_HEIGHT)
10d2cf94 1268 mp->mp_list[h] = 0;
d552a2b9 1269 return true;
c4a9d189 1270 }
10d2cf94 1271 ptr++;
d552a2b9 1272 }
10d2cf94 1273 return false;
d552a2b9
BP
1274}
1275
1276enum dealloc_states {
1277 DEALLOC_MP_FULL = 0, /* Strip a metapath with all buffers read in */
1278 DEALLOC_MP_LOWER = 1, /* lower the metapath strip height */
1279 DEALLOC_FILL_MP = 2, /* Fill in the metapath to the given height. */
1280 DEALLOC_DONE = 3, /* process complete */
1281};
b3b94faa 1282
5cf26b1e
AG
1283static inline void
1284metapointer_range(struct metapath *mp, int height,
1285 __u16 *start_list, unsigned int start_aligned,
10d2cf94 1286 __u16 *end_list, unsigned int end_aligned,
5cf26b1e
AG
1287 __be64 **start, __be64 **end)
1288{
1289 struct buffer_head *bh = mp->mp_bh[height];
1290 __be64 *first;
1291
1292 first = metaptr1(height, mp);
1293 *start = first;
1294 if (mp_eq_to_hgt(mp, start_list, height)) {
1295 bool keep_start = height < start_aligned;
1296 *start = first + start_list[height] + keep_start;
1297 }
1298 *end = (__be64 *)(bh->b_data + bh->b_size);
10d2cf94
AG
1299 if (end_list && mp_eq_to_hgt(mp, end_list, height)) {
1300 bool keep_end = height < end_aligned;
1301 *end = first + end_list[height] + keep_end;
1302 }
1303}
1304
1305static inline bool walk_done(struct gfs2_sbd *sdp,
1306 struct metapath *mp, int height,
1307 __u16 *end_list, unsigned int end_aligned)
1308{
1309 __u16 end;
1310
1311 if (end_list) {
1312 bool keep_end = height < end_aligned;
1313 if (!mp_eq_to_hgt(mp, end_list, height))
1314 return false;
1315 end = end_list[height] + keep_end;
1316 } else
1317 end = (height > 0) ? sdp->sd_inptrs : sdp->sd_diptrs;
1318 return mp->mp_list[height] >= end;
5cf26b1e
AG
1319}
1320
d552a2b9 1321/**
10d2cf94 1322 * punch_hole - deallocate blocks in a file
d552a2b9 1323 * @ip: inode to truncate
10d2cf94
AG
1324 * @offset: the start of the hole
1325 * @length: the size of the hole (or 0 for truncate)
1326 *
1327 * Punch a hole into a file or truncate a file at a given position. This
1328 * function operates in whole blocks (@offset and @length are rounded
1329 * accordingly); partially filled blocks must be cleared otherwise.
d552a2b9 1330 *
10d2cf94
AG
1331 * This function works from the bottom up, and from the right to the left. In
1332 * other words, it strips off the highest layer (data) before stripping any of
1333 * the metadata. Doing it this way is best in case the operation is interrupted
1334 * by power failure, etc. The dinode is rewritten in every transaction to
1335 * guarantee integrity.
d552a2b9 1336 */
10d2cf94 1337static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length)
d552a2b9
BP
1338{
1339 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
bb491ce6 1340 u64 maxsize = sdp->sd_heightsize[ip->i_height];
10d2cf94 1341 struct metapath mp = {};
d552a2b9
BP
1342 struct buffer_head *dibh, *bh;
1343 struct gfs2_holder rd_gh;
cb7f0903 1344 unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift;
10d2cf94
AG
1345 u64 lblock = (offset + (1 << bsize_shift) - 1) >> bsize_shift;
1346 __u16 start_list[GFS2_MAX_META_HEIGHT];
1347 __u16 __end_list[GFS2_MAX_META_HEIGHT], *end_list = NULL;
4e56a641 1348 unsigned int start_aligned, uninitialized_var(end_aligned);
d552a2b9
BP
1349 unsigned int strip_h = ip->i_height - 1;
1350 u32 btotal = 0;
1351 int ret, state;
1352 int mp_h; /* metapath buffers are read in to this height */
d552a2b9 1353 u64 prev_bnr = 0;
5cf26b1e 1354 __be64 *start, *end;
b3b94faa 1355
bb491ce6
AG
1356 if (offset >= maxsize) {
1357 /*
1358 * The starting point lies beyond the allocated meta-data;
1359 * there are no blocks do deallocate.
1360 */
1361 return 0;
1362 }
1363
10d2cf94
AG
1364 /*
1365 * The start position of the hole is defined by lblock, start_list, and
1366 * start_aligned. The end position of the hole is defined by lend,
1367 * end_list, and end_aligned.
1368 *
1369 * start_aligned and end_aligned define down to which height the start
1370 * and end positions are aligned to the metadata tree (i.e., the
1371 * position is a multiple of the metadata granularity at the height
1372 * above). This determines at which heights additional meta pointers
1373 * needs to be preserved for the remaining data.
1374 */
b3b94faa 1375
10d2cf94 1376 if (length) {
10d2cf94
AG
1377 u64 end_offset = offset + length;
1378 u64 lend;
1379
1380 /*
1381 * Clip the end at the maximum file size for the given height:
1382 * that's how far the metadata goes; files bigger than that
1383 * will have additional layers of indirection.
1384 */
1385 if (end_offset > maxsize)
1386 end_offset = maxsize;
1387 lend = end_offset >> bsize_shift;
1388
1389 if (lblock >= lend)
1390 return 0;
1391
1392 find_metapath(sdp, lend, &mp, ip->i_height);
1393 end_list = __end_list;
1394 memcpy(end_list, mp.mp_list, sizeof(mp.mp_list));
1395
1396 for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) {
1397 if (end_list[mp_h])
1398 break;
1399 }
1400 end_aligned = mp_h;
1401 }
1402
1403 find_metapath(sdp, lblock, &mp, ip->i_height);
cb7f0903
AG
1404 memcpy(start_list, mp.mp_list, sizeof(start_list));
1405
cb7f0903
AG
1406 for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) {
1407 if (start_list[mp_h])
1408 break;
1409 }
1410 start_aligned = mp_h;
d552a2b9
BP
1411
1412 ret = gfs2_meta_inode_buffer(ip, &dibh);
1413 if (ret)
1414 return ret;
b3b94faa 1415
d552a2b9
BP
1416 mp.mp_bh[0] = dibh;
1417 ret = lookup_metapath(ip, &mp);
e8b43fe0
AG
1418 if (ret)
1419 goto out_metapath;
c3ce5aa9
AG
1420
1421 /* issue read-ahead on metadata */
5cf26b1e
AG
1422 for (mp_h = 0; mp_h < mp.mp_aheight - 1; mp_h++) {
1423 metapointer_range(&mp, mp_h, start_list, start_aligned,
10d2cf94 1424 end_list, end_aligned, &start, &end);
5cf26b1e
AG
1425 gfs2_metapath_ra(ip->i_gl, start, end);
1426 }
c3ce5aa9 1427
e8b43fe0 1428 if (mp.mp_aheight == ip->i_height)
d552a2b9
BP
1429 state = DEALLOC_MP_FULL; /* We have a complete metapath */
1430 else
1431 state = DEALLOC_FILL_MP; /* deal with partial metapath */
b3b94faa 1432
d552a2b9
BP
1433 ret = gfs2_rindex_update(sdp);
1434 if (ret)
1435 goto out_metapath;
1436
1437 ret = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
1438 if (ret)
1439 goto out_metapath;
1440 gfs2_holder_mark_uninitialized(&rd_gh);
1441
1442 mp_h = strip_h;
1443
1444 while (state != DEALLOC_DONE) {
1445 switch (state) {
1446 /* Truncate a full metapath at the given strip height.
1447 * Note that strip_h == mp_h in order to be in this state. */
1448 case DEALLOC_MP_FULL:
d552a2b9
BP
1449 bh = mp.mp_bh[mp_h];
1450 gfs2_assert_withdraw(sdp, bh);
1451 if (gfs2_assert_withdraw(sdp,
1452 prev_bnr != bh->b_blocknr)) {
1453 printk(KERN_EMERG "GFS2: fsid=%s:inode %llu, "
1454 "block:%llu, i_h:%u, s_h:%u, mp_h:%u\n",
1455 sdp->sd_fsname,
1456 (unsigned long long)ip->i_no_addr,
1457 prev_bnr, ip->i_height, strip_h, mp_h);
1458 }
1459 prev_bnr = bh->b_blocknr;
cb7f0903 1460
5cf26b1e
AG
1461 if (gfs2_metatype_check(sdp, bh,
1462 (mp_h ? GFS2_METATYPE_IN :
1463 GFS2_METATYPE_DI))) {
1464 ret = -EIO;
1465 goto out;
1466 }
1467
10d2cf94
AG
1468 /*
1469 * Below, passing end_aligned as 0 gives us the
1470 * metapointer range excluding the end point: the end
1471 * point is the first metapath we must not deallocate!
1472 */
1473
5cf26b1e 1474 metapointer_range(&mp, mp_h, start_list, start_aligned,
10d2cf94 1475 end_list, 0 /* end_aligned */,
5cf26b1e
AG
1476 &start, &end);
1477 ret = sweep_bh_for_rgrps(ip, &rd_gh, mp.mp_bh[mp_h],
1478 start, end,
1479 mp_h != ip->i_height - 1,
1480 &btotal);
cb7f0903 1481
d552a2b9
BP
1482 /* If we hit an error or just swept dinode buffer,
1483 just exit. */
1484 if (ret || !mp_h) {
1485 state = DEALLOC_DONE;
1486 break;
1487 }
1488 state = DEALLOC_MP_LOWER;
1489 break;
1490
1491 /* lower the metapath strip height */
1492 case DEALLOC_MP_LOWER:
1493 /* We're done with the current buffer, so release it,
1494 unless it's the dinode buffer. Then back up to the
1495 previous pointer. */
1496 if (mp_h) {
1497 brelse(mp.mp_bh[mp_h]);
1498 mp.mp_bh[mp_h] = NULL;
1499 }
1500 /* If we can't get any lower in height, we've stripped
1501 off all we can. Next step is to back up and start
1502 stripping the previous level of metadata. */
1503 if (mp_h == 0) {
1504 strip_h--;
cb7f0903 1505 memcpy(mp.mp_list, start_list, sizeof(start_list));
d552a2b9
BP
1506 mp_h = strip_h;
1507 state = DEALLOC_FILL_MP;
1508 break;
1509 }
1510 mp.mp_list[mp_h] = 0;
1511 mp_h--; /* search one metadata height down */
d552a2b9 1512 mp.mp_list[mp_h]++;
10d2cf94
AG
1513 if (walk_done(sdp, &mp, mp_h, end_list, end_aligned))
1514 break;
d552a2b9
BP
1515 /* Here we've found a part of the metapath that is not
1516 * allocated. We need to search at that height for the
1517 * next non-null pointer. */
10d2cf94 1518 if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned)) {
d552a2b9
BP
1519 state = DEALLOC_FILL_MP;
1520 mp_h++;
1521 }
1522 /* No more non-null pointers at this height. Back up
1523 to the previous height and try again. */
1524 break; /* loop around in the same state */
1525
1526 /* Fill the metapath with buffers to the given height. */
1527 case DEALLOC_FILL_MP:
1528 /* Fill the buffers out to the current height. */
1529 ret = fillup_metapath(ip, &mp, mp_h);
c3ce5aa9 1530 if (ret < 0)
d552a2b9 1531 goto out;
c3ce5aa9
AG
1532
1533 /* issue read-ahead on metadata */
1534 if (mp.mp_aheight > 1) {
5cf26b1e
AG
1535 for (; ret > 1; ret--) {
1536 metapointer_range(&mp, mp.mp_aheight - ret,
1537 start_list, start_aligned,
10d2cf94 1538 end_list, end_aligned,
5cf26b1e
AG
1539 &start, &end);
1540 gfs2_metapath_ra(ip->i_gl, start, end);
1541 }
c3ce5aa9 1542 }
d552a2b9
BP
1543
1544 /* If buffers found for the entire strip height */
e8b43fe0 1545 if (mp.mp_aheight - 1 == strip_h) {
d552a2b9
BP
1546 state = DEALLOC_MP_FULL;
1547 break;
1548 }
e8b43fe0
AG
1549 if (mp.mp_aheight < ip->i_height) /* We have a partial height */
1550 mp_h = mp.mp_aheight - 1;
d552a2b9
BP
1551
1552 /* If we find a non-null block pointer, crawl a bit
1553 higher up in the metapath and try again, otherwise
1554 we need to look lower for a new starting point. */
10d2cf94 1555 if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned))
d552a2b9
BP
1556 mp_h++;
1557 else
1558 state = DEALLOC_MP_LOWER;
b3b94faa 1559 break;
d552a2b9 1560 }
b3b94faa
DT
1561 }
1562
d552a2b9
BP
1563 if (btotal) {
1564 if (current->journal_info == NULL) {
1565 ret = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS +
1566 RES_QUOTA, 0);
1567 if (ret)
1568 goto out;
1569 down_write(&ip->i_rw_mutex);
1570 }
1571 gfs2_statfs_change(sdp, 0, +btotal, 0);
1572 gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid,
1573 ip->i_inode.i_gid);
b32c8c76 1574 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
d552a2b9
BP
1575 gfs2_trans_add_meta(ip->i_gl, dibh);
1576 gfs2_dinode_out(ip, dibh->b_data);
1577 up_write(&ip->i_rw_mutex);
1578 gfs2_trans_end(sdp);
1579 }
b3b94faa 1580
d552a2b9
BP
1581out:
1582 if (gfs2_holder_initialized(&rd_gh))
1583 gfs2_glock_dq_uninit(&rd_gh);
1584 if (current->journal_info) {
1585 up_write(&ip->i_rw_mutex);
1586 gfs2_trans_end(sdp);
1587 cond_resched();
1588 }
1589 gfs2_quota_unhold(ip);
1590out_metapath:
1591 release_metapath(&mp);
1592 return ret;
b3b94faa
DT
1593}
1594
1595static int trunc_end(struct gfs2_inode *ip)
1596{
feaa7bba 1597 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
b3b94faa
DT
1598 struct buffer_head *dibh;
1599 int error;
1600
1601 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
1602 if (error)
1603 return error;
1604
1605 down_write(&ip->i_rw_mutex);
1606
1607 error = gfs2_meta_inode_buffer(ip, &dibh);
1608 if (error)
1609 goto out;
1610
a2e0f799 1611 if (!i_size_read(&ip->i_inode)) {
ecc30c79 1612 ip->i_height = 0;
ce276b06 1613 ip->i_goal = ip->i_no_addr;
b3b94faa 1614 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
45138990 1615 gfs2_ordered_del_inode(ip);
b3b94faa 1616 }
078cd827 1617 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
383f01fb 1618 ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG;
b3b94faa 1619
350a9b0a 1620 gfs2_trans_add_meta(ip->i_gl, dibh);
539e5d6b 1621 gfs2_dinode_out(ip, dibh->b_data);
b3b94faa
DT
1622 brelse(dibh);
1623
a91ea69f 1624out:
b3b94faa 1625 up_write(&ip->i_rw_mutex);
b3b94faa 1626 gfs2_trans_end(sdp);
b3b94faa
DT
1627 return error;
1628}
1629
1630/**
1631 * do_shrink - make a file smaller
ff8f33c8 1632 * @inode: the inode
ff8f33c8 1633 * @newsize: the size to make the file
b3b94faa 1634 *
ff8f33c8
SW
1635 * Called with an exclusive lock on @inode. The @size must
1636 * be equal to or smaller than the current inode size.
b3b94faa
DT
1637 *
1638 * Returns: errno
1639 */
1640
8b5860a3 1641static int do_shrink(struct inode *inode, u64 newsize)
b3b94faa 1642{
ff8f33c8 1643 struct gfs2_inode *ip = GFS2_I(inode);
b3b94faa
DT
1644 int error;
1645
8b5860a3 1646 error = trunc_start(inode, newsize);
b3b94faa
DT
1647 if (error < 0)
1648 return error;
ff8f33c8 1649 if (gfs2_is_stuffed(ip))
b3b94faa
DT
1650 return 0;
1651
10d2cf94 1652 error = punch_hole(ip, newsize, 0);
ff8f33c8 1653 if (error == 0)
b3b94faa
DT
1654 error = trunc_end(ip);
1655
1656 return error;
1657}
1658
ff8f33c8 1659void gfs2_trim_blocks(struct inode *inode)
a13b8c5f 1660{
ff8f33c8
SW
1661 int ret;
1662
8b5860a3 1663 ret = do_shrink(inode, inode->i_size);
ff8f33c8
SW
1664 WARN_ON(ret != 0);
1665}
1666
1667/**
1668 * do_grow - Touch and update inode size
1669 * @inode: The inode
1670 * @size: The new size
1671 *
1672 * This function updates the timestamps on the inode and
1673 * may also increase the size of the inode. This function
1674 * must not be called with @size any smaller than the current
1675 * inode size.
1676 *
1677 * Although it is not strictly required to unstuff files here,
1678 * earlier versions of GFS2 have a bug in the stuffed file reading
1679 * code which will result in a buffer overrun if the size is larger
1680 * than the max stuffed file size. In order to prevent this from
25985edc 1681 * occurring, such files are unstuffed, but in other cases we can
ff8f33c8
SW
1682 * just update the inode size directly.
1683 *
1684 * Returns: 0 on success, or -ve on error
1685 */
1686
1687static int do_grow(struct inode *inode, u64 size)
1688{
1689 struct gfs2_inode *ip = GFS2_I(inode);
1690 struct gfs2_sbd *sdp = GFS2_SB(inode);
7b9cff46 1691 struct gfs2_alloc_parms ap = { .target = 1, };
a13b8c5f
WC
1692 struct buffer_head *dibh;
1693 int error;
2f7ee358 1694 int unstuff = 0;
a13b8c5f 1695
235628c5 1696 if (gfs2_is_stuffed(ip) && size > gfs2_max_stuffed_size(ip)) {
b8fbf471 1697 error = gfs2_quota_lock_check(ip, &ap);
ff8f33c8 1698 if (error)
5407e242 1699 return error;
ff8f33c8 1700
7b9cff46 1701 error = gfs2_inplace_reserve(ip, &ap);
ff8f33c8
SW
1702 if (error)
1703 goto do_grow_qunlock;
2f7ee358 1704 unstuff = 1;
ff8f33c8
SW
1705 }
1706
a01aedfe
BP
1707 error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT +
1708 (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF ?
1709 0 : RES_QUOTA), 0);
a13b8c5f 1710 if (error)
ff8f33c8 1711 goto do_grow_release;
a13b8c5f 1712
2f7ee358 1713 if (unstuff) {
ff8f33c8
SW
1714 error = gfs2_unstuff_dinode(ip, NULL);
1715 if (error)
1716 goto do_end_trans;
1717 }
a13b8c5f
WC
1718
1719 error = gfs2_meta_inode_buffer(ip, &dibh);
1720 if (error)
ff8f33c8 1721 goto do_end_trans;
a13b8c5f 1722
ff8f33c8 1723 i_size_write(inode, size);
078cd827 1724 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
350a9b0a 1725 gfs2_trans_add_meta(ip->i_gl, dibh);
a13b8c5f
WC
1726 gfs2_dinode_out(ip, dibh->b_data);
1727 brelse(dibh);
1728
ff8f33c8 1729do_end_trans:
a13b8c5f 1730 gfs2_trans_end(sdp);
ff8f33c8 1731do_grow_release:
2f7ee358 1732 if (unstuff) {
ff8f33c8
SW
1733 gfs2_inplace_release(ip);
1734do_grow_qunlock:
1735 gfs2_quota_unlock(ip);
ff8f33c8 1736 }
a13b8c5f
WC
1737 return error;
1738}
1739
b3b94faa 1740/**
ff8f33c8
SW
1741 * gfs2_setattr_size - make a file a given size
1742 * @inode: the inode
1743 * @newsize: the size to make the file
b3b94faa 1744 *
ff8f33c8 1745 * The file size can grow, shrink, or stay the same size. This
3e7aafc3 1746 * is called holding i_rwsem and an exclusive glock on the inode
ff8f33c8 1747 * in question.
b3b94faa
DT
1748 *
1749 * Returns: errno
1750 */
1751
ff8f33c8 1752int gfs2_setattr_size(struct inode *inode, u64 newsize)
b3b94faa 1753{
af5c2697 1754 struct gfs2_inode *ip = GFS2_I(inode);
ff8f33c8 1755 int ret;
b3b94faa 1756
ff8f33c8 1757 BUG_ON(!S_ISREG(inode->i_mode));
b3b94faa 1758
ff8f33c8
SW
1759 ret = inode_newsize_ok(inode, newsize);
1760 if (ret)
1761 return ret;
b3b94faa 1762
562c72aa
CH
1763 inode_dio_wait(inode);
1764
b54e9a0b 1765 ret = gfs2_rsqa_alloc(ip);
d2b47cfb 1766 if (ret)
2b3dcf35 1767 goto out;
d2b47cfb 1768
8b5860a3 1769 if (newsize >= inode->i_size) {
2b3dcf35
BP
1770 ret = do_grow(inode, newsize);
1771 goto out;
1772 }
ff8f33c8 1773
8b5860a3 1774 ret = do_shrink(inode, newsize);
2b3dcf35 1775out:
a097dc7e 1776 gfs2_rsqa_delete(ip, NULL);
2b3dcf35 1777 return ret;
b3b94faa
DT
1778}
1779
1780int gfs2_truncatei_resume(struct gfs2_inode *ip)
1781{
1782 int error;
10d2cf94 1783 error = punch_hole(ip, i_size_read(&ip->i_inode), 0);
b3b94faa
DT
1784 if (!error)
1785 error = trunc_end(ip);
1786 return error;
1787}
1788
1789int gfs2_file_dealloc(struct gfs2_inode *ip)
1790{
10d2cf94 1791 return punch_hole(ip, 0, 0);
b3b94faa
DT
1792}
1793
b50f227b
SW
1794/**
1795 * gfs2_free_journal_extents - Free cached journal bmap info
1796 * @jd: The journal
1797 *
1798 */
1799
1800void gfs2_free_journal_extents(struct gfs2_jdesc *jd)
1801{
1802 struct gfs2_journal_extent *jext;
1803
1804 while(!list_empty(&jd->extent_list)) {
1805 jext = list_entry(jd->extent_list.next, struct gfs2_journal_extent, list);
1806 list_del(&jext->list);
1807 kfree(jext);
1808 }
1809}
1810
1811/**
1812 * gfs2_add_jextent - Add or merge a new extent to extent cache
1813 * @jd: The journal descriptor
1814 * @lblock: The logical block at start of new extent
c62baf65 1815 * @dblock: The physical block at start of new extent
b50f227b
SW
1816 * @blocks: Size of extent in fs blocks
1817 *
1818 * Returns: 0 on success or -ENOMEM
1819 */
1820
1821static int gfs2_add_jextent(struct gfs2_jdesc *jd, u64 lblock, u64 dblock, u64 blocks)
1822{
1823 struct gfs2_journal_extent *jext;
1824
1825 if (!list_empty(&jd->extent_list)) {
1826 jext = list_entry(jd->extent_list.prev, struct gfs2_journal_extent, list);
1827 if ((jext->dblock + jext->blocks) == dblock) {
1828 jext->blocks += blocks;
1829 return 0;
1830 }
1831 }
1832
1833 jext = kzalloc(sizeof(struct gfs2_journal_extent), GFP_NOFS);
1834 if (jext == NULL)
1835 return -ENOMEM;
1836 jext->dblock = dblock;
1837 jext->lblock = lblock;
1838 jext->blocks = blocks;
1839 list_add_tail(&jext->list, &jd->extent_list);
1840 jd->nr_extents++;
1841 return 0;
1842}
1843
1844/**
1845 * gfs2_map_journal_extents - Cache journal bmap info
1846 * @sdp: The super block
1847 * @jd: The journal to map
1848 *
1849 * Create a reusable "extent" mapping from all logical
1850 * blocks to all physical blocks for the given journal. This will save
1851 * us time when writing journal blocks. Most journals will have only one
1852 * extent that maps all their logical blocks. That's because gfs2.mkfs
1853 * arranges the journal blocks sequentially to maximize performance.
1854 * So the extent would map the first block for the entire file length.
1855 * However, gfs2_jadd can happen while file activity is happening, so
1856 * those journals may not be sequential. Less likely is the case where
1857 * the users created their own journals by mounting the metafs and
1858 * laying it out. But it's still possible. These journals might have
1859 * several extents.
1860 *
1861 * Returns: 0 on success, or error on failure
1862 */
1863
1864int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd)
1865{
1866 u64 lblock = 0;
1867 u64 lblock_stop;
1868 struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
1869 struct buffer_head bh;
1870 unsigned int shift = sdp->sd_sb.sb_bsize_shift;
1871 u64 size;
1872 int rc;
1873
1874 lblock_stop = i_size_read(jd->jd_inode) >> shift;
1875 size = (lblock_stop - lblock) << shift;
1876 jd->nr_extents = 0;
1877 WARN_ON(!list_empty(&jd->extent_list));
1878
1879 do {
1880 bh.b_state = 0;
1881 bh.b_blocknr = 0;
1882 bh.b_size = size;
1883 rc = gfs2_block_map(jd->jd_inode, lblock, &bh, 0);
1884 if (rc || !buffer_mapped(&bh))
1885 goto fail;
1886 rc = gfs2_add_jextent(jd, lblock, bh.b_blocknr, bh.b_size >> shift);
1887 if (rc)
1888 goto fail;
1889 size -= bh.b_size;
1890 lblock += (bh.b_size >> ip->i_inode.i_blkbits);
1891 } while(size > 0);
1892
1893 fs_info(sdp, "journal %d mapped with %u extents\n", jd->jd_jid,
1894 jd->nr_extents);
1895 return 0;
1896
1897fail:
1898 fs_warn(sdp, "error %d mapping journal %u at offset %llu (extent %u)\n",
1899 rc, jd->jd_jid,
1900 (unsigned long long)(i_size_read(jd->jd_inode) - size),
1901 jd->nr_extents);
1902 fs_warn(sdp, "bmap=%d lblock=%llu block=%llu, state=0x%08lx, size=%llu\n",
1903 rc, (unsigned long long)lblock, (unsigned long long)bh.b_blocknr,
1904 bh.b_state, (unsigned long long)bh.b_size);
1905 gfs2_free_journal_extents(jd);
1906 return rc;
1907}
1908
b3b94faa
DT
1909/**
1910 * gfs2_write_alloc_required - figure out if a write will require an allocation
1911 * @ip: the file being written to
1912 * @offset: the offset to write to
1913 * @len: the number of bytes being written
b3b94faa 1914 *
461cb419 1915 * Returns: 1 if an alloc is required, 0 otherwise
b3b94faa
DT
1916 */
1917
cd915493 1918int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
461cb419 1919 unsigned int len)
b3b94faa 1920{
feaa7bba 1921 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
941e6d7d
SW
1922 struct buffer_head bh;
1923 unsigned int shift;
1924 u64 lblock, lblock_stop, size;
7ed122e4 1925 u64 end_of_file;
b3b94faa 1926
b3b94faa
DT
1927 if (!len)
1928 return 0;
1929
1930 if (gfs2_is_stuffed(ip)) {
235628c5 1931 if (offset + len > gfs2_max_stuffed_size(ip))
461cb419 1932 return 1;
b3b94faa
DT
1933 return 0;
1934 }
1935
941e6d7d 1936 shift = sdp->sd_sb.sb_bsize_shift;
7ed122e4 1937 BUG_ON(gfs2_is_dir(ip));
a2e0f799 1938 end_of_file = (i_size_read(&ip->i_inode) + sdp->sd_sb.sb_bsize - 1) >> shift;
7ed122e4
SW
1939 lblock = offset >> shift;
1940 lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
1941 if (lblock_stop > end_of_file)
461cb419 1942 return 1;
b3b94faa 1943
941e6d7d
SW
1944 size = (lblock_stop - lblock) << shift;
1945 do {
1946 bh.b_state = 0;
1947 bh.b_size = size;
1948 gfs2_block_map(&ip->i_inode, lblock, &bh, 0);
1949 if (!buffer_mapped(&bh))
461cb419 1950 return 1;
941e6d7d
SW
1951 size -= bh.b_size;
1952 lblock += (bh.b_size >> ip->i_inode.i_blkbits);
1953 } while(size > 0);
b3b94faa
DT
1954
1955 return 0;
1956}
1957
4e56a641
AG
1958static int stuffed_zero_range(struct inode *inode, loff_t offset, loff_t length)
1959{
1960 struct gfs2_inode *ip = GFS2_I(inode);
1961 struct buffer_head *dibh;
1962 int error;
1963
1964 if (offset >= inode->i_size)
1965 return 0;
1966 if (offset + length > inode->i_size)
1967 length = inode->i_size - offset;
1968
1969 error = gfs2_meta_inode_buffer(ip, &dibh);
1970 if (error)
1971 return error;
1972 gfs2_trans_add_meta(ip->i_gl, dibh);
1973 memset(dibh->b_data + sizeof(struct gfs2_dinode) + offset, 0,
1974 length);
1975 brelse(dibh);
1976 return 0;
1977}
1978
1979static int gfs2_journaled_truncate_range(struct inode *inode, loff_t offset,
1980 loff_t length)
1981{
1982 struct gfs2_sbd *sdp = GFS2_SB(inode);
1983 loff_t max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
1984 int error;
1985
1986 while (length) {
1987 struct gfs2_trans *tr;
1988 loff_t chunk;
1989 unsigned int offs;
1990
1991 chunk = length;
1992 if (chunk > max_chunk)
1993 chunk = max_chunk;
1994
1995 offs = offset & ~PAGE_MASK;
1996 if (offs && chunk > PAGE_SIZE)
1997 chunk = offs + ((chunk - offs) & PAGE_MASK);
1998
1999 truncate_pagecache_range(inode, offset, chunk);
2000 offset += chunk;
2001 length -= chunk;
2002
2003 tr = current->journal_info;
2004 if (!test_bit(TR_TOUCHED, &tr->tr_flags))
2005 continue;
2006
2007 gfs2_trans_end(sdp);
2008 error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
2009 if (error)
2010 return error;
2011 }
2012 return 0;
2013}
2014
2015int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length)
2016{
2017 struct inode *inode = file_inode(file);
2018 struct gfs2_inode *ip = GFS2_I(inode);
2019 struct gfs2_sbd *sdp = GFS2_SB(inode);
2020 int error;
2021
2022 if (gfs2_is_jdata(ip))
2023 error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_JDATA,
2024 GFS2_JTRUNC_REVOKES);
2025 else
2026 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
2027 if (error)
2028 return error;
2029
2030 if (gfs2_is_stuffed(ip)) {
2031 error = stuffed_zero_range(inode, offset, length);
2032 if (error)
2033 goto out;
2034 } else {
2035 unsigned int start_off, end_off, blocksize;
2036
2037 blocksize = i_blocksize(inode);
2038 start_off = offset & (blocksize - 1);
2039 end_off = (offset + length) & (blocksize - 1);
2040 if (start_off) {
2041 unsigned int len = length;
2042 if (length > blocksize - start_off)
2043 len = blocksize - start_off;
2044 error = gfs2_block_zero_range(inode, offset, len);
2045 if (error)
2046 goto out;
2047 if (start_off + length < blocksize)
2048 end_off = 0;
2049 }
2050 if (end_off) {
2051 error = gfs2_block_zero_range(inode,
2052 offset + length - end_off, end_off);
2053 if (error)
2054 goto out;
2055 }
2056 }
2057
2058 if (gfs2_is_jdata(ip)) {
2059 BUG_ON(!current->journal_info);
2060 gfs2_journaled_truncate_range(inode, offset, length);
2061 } else
2062 truncate_pagecache_range(inode, offset, offset + length - 1);
2063
2064 file_update_time(file);
2065 mark_inode_dirty(inode);
2066
2067 if (current->journal_info)
2068 gfs2_trans_end(sdp);
2069
2070 if (!gfs2_is_stuffed(ip))
2071 error = punch_hole(ip, offset, length);
2072
2073out:
2074 if (current->journal_info)
2075 gfs2_trans_end(sdp);
2076 return error;
2077}