]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - fs/gfs2/bmap.c
iomap: don't mark the inode dirty in iomap_write_end
[mirror_ubuntu-jammy-kernel.git] / fs / gfs2 / bmap.c
CommitLineData
7336d0e6 1// SPDX-License-Identifier: GPL-2.0-only
b3b94faa
DT
2/*
3 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3a8a9a10 4 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
b3b94faa
DT
5 */
6
b3b94faa
DT
7#include <linux/spinlock.h>
8#include <linux/completion.h>
9#include <linux/buffer_head.h>
64dd153c 10#include <linux/blkdev.h>
5c676f6d 11#include <linux/gfs2_ondisk.h>
71b86f56 12#include <linux/crc32.h>
3974320c 13#include <linux/iomap.h>
98583b3e 14#include <linux/ktime.h>
b3b94faa
DT
15
16#include "gfs2.h"
5c676f6d 17#include "incore.h"
b3b94faa
DT
18#include "bmap.h"
19#include "glock.h"
20#include "inode.h"
b3b94faa 21#include "meta_io.h"
b3b94faa
DT
22#include "quota.h"
23#include "rgrp.h"
45138990 24#include "log.h"
4c16c36a 25#include "super.h"
b3b94faa 26#include "trans.h"
18ec7d5c 27#include "dir.h"
5c676f6d 28#include "util.h"
64bc06bb 29#include "aops.h"
63997775 30#include "trace_gfs2.h"
b3b94faa
DT
31
32/* This doesn't need to be that large as max 64 bit pointers in a 4k
33 * block is 512, so __u16 is fine for that. It saves stack space to
34 * keep it small.
35 */
36struct metapath {
dbac6710 37 struct buffer_head *mp_bh[GFS2_MAX_META_HEIGHT];
b3b94faa 38 __u16 mp_list[GFS2_MAX_META_HEIGHT];
5f8bd444
BP
39 int mp_fheight; /* find_metapath height */
40 int mp_aheight; /* actual height (lookup height) */
b3b94faa
DT
41};
42
64bc06bb
AG
43static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length);
44
f25ef0c1
SW
45/**
46 * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page
47 * @ip: the inode
48 * @dibh: the dinode buffer
49 * @block: the block number that was allocated
ff8f33c8 50 * @page: The (optional) page. This is looked up if @page is NULL
f25ef0c1
SW
51 *
52 * Returns: errno
53 */
54
55static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
cd915493 56 u64 block, struct page *page)
f25ef0c1 57{
f25ef0c1
SW
58 struct inode *inode = &ip->i_inode;
59 struct buffer_head *bh;
60 int release = 0;
61
62 if (!page || page->index) {
220cca2a 63 page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS);
f25ef0c1
SW
64 if (!page)
65 return -ENOMEM;
66 release = 1;
67 }
68
69 if (!PageUptodate(page)) {
70 void *kaddr = kmap(page);
602c89d2
SW
71 u64 dsize = i_size_read(inode);
72
235628c5
AG
73 if (dsize > gfs2_max_stuffed_size(ip))
74 dsize = gfs2_max_stuffed_size(ip);
f25ef0c1 75
602c89d2 76 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
09cbfeaf 77 memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
f25ef0c1
SW
78 kunmap(page);
79
80 SetPageUptodate(page);
81 }
82
83 if (!page_has_buffers(page))
47a9a527
FF
84 create_empty_buffers(page, BIT(inode->i_blkbits),
85 BIT(BH_Uptodate));
f25ef0c1
SW
86
87 bh = page_buffers(page);
88
89 if (!buffer_mapped(bh))
90 map_bh(bh, inode->i_sb, block);
91
92 set_buffer_uptodate(bh);
845802b1 93 if (gfs2_is_jdata(ip))
350a9b0a 94 gfs2_trans_add_data(ip->i_gl, bh);
845802b1
AG
95 else {
96 mark_buffer_dirty(bh);
97 gfs2_ordered_add_inode(ip);
98 }
f25ef0c1
SW
99
100 if (release) {
101 unlock_page(page);
09cbfeaf 102 put_page(page);
f25ef0c1
SW
103 }
104
105 return 0;
106}
107
b3b94faa
DT
108/**
109 * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big
110 * @ip: The GFS2 inode to unstuff
ff8f33c8 111 * @page: The (optional) page. This is looked up if the @page is NULL
b3b94faa
DT
112 *
113 * This routine unstuffs a dinode and returns it to a "normal" state such
114 * that the height can be grown in the traditional way.
115 *
116 * Returns: errno
117 */
118
f25ef0c1 119int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
b3b94faa
DT
120{
121 struct buffer_head *bh, *dibh;
48516ced 122 struct gfs2_dinode *di;
cd915493 123 u64 block = 0;
18ec7d5c 124 int isdir = gfs2_is_dir(ip);
b3b94faa
DT
125 int error;
126
127 down_write(&ip->i_rw_mutex);
128
129 error = gfs2_meta_inode_buffer(ip, &dibh);
130 if (error)
131 goto out;
907b9bce 132
a2e0f799 133 if (i_size_read(&ip->i_inode)) {
b3b94faa
DT
134 /* Get a free block, fill it with the stuffed data,
135 and write it out to disk */
136
b45e41d7 137 unsigned int n = 1;
6e87ed0f 138 error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL);
09010978
SW
139 if (error)
140 goto out_brelse;
18ec7d5c 141 if (isdir) {
fbb27873 142 gfs2_trans_remove_revoke(GFS2_SB(&ip->i_inode), block, 1);
61e085a8 143 error = gfs2_dir_get_new_buffer(ip, block, &bh);
b3b94faa
DT
144 if (error)
145 goto out_brelse;
48516ced 146 gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_meta_header),
b3b94faa
DT
147 dibh, sizeof(struct gfs2_dinode));
148 brelse(bh);
149 } else {
f25ef0c1 150 error = gfs2_unstuffer_page(ip, dibh, block, page);
b3b94faa
DT
151 if (error)
152 goto out_brelse;
153 }
154 }
155
156 /* Set up the pointer to the new block */
157
350a9b0a 158 gfs2_trans_add_meta(ip->i_gl, dibh);
48516ced 159 di = (struct gfs2_dinode *)dibh->b_data;
b3b94faa
DT
160 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
161
a2e0f799 162 if (i_size_read(&ip->i_inode)) {
48516ced 163 *(__be64 *)(di + 1) = cpu_to_be64(block);
77658aad
SW
164 gfs2_add_inode_blocks(&ip->i_inode, 1);
165 di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
b3b94faa
DT
166 }
167
ecc30c79 168 ip->i_height = 1;
48516ced 169 di->di_height = cpu_to_be16(1);
b3b94faa 170
a91ea69f 171out_brelse:
b3b94faa 172 brelse(dibh);
a91ea69f 173out:
b3b94faa 174 up_write(&ip->i_rw_mutex);
b3b94faa
DT
175 return error;
176}
177
b3b94faa
DT
178
179/**
180 * find_metapath - Find path through the metadata tree
9b8c81d1 181 * @sdp: The superblock
b3b94faa 182 * @block: The disk block to look up
07e23d68 183 * @mp: The metapath to return the result in
9b8c81d1 184 * @height: The pre-calculated height of the metadata tree
b3b94faa
DT
185 *
186 * This routine returns a struct metapath structure that defines a path
187 * through the metadata of inode "ip" to get to block "block".
188 *
189 * Example:
190 * Given: "ip" is a height 3 file, "offset" is 101342453, and this is a
191 * filesystem with a blocksize of 4096.
192 *
193 * find_metapath() would return a struct metapath structure set to:
07e23d68 194 * mp_fheight = 3, mp_list[0] = 0, mp_list[1] = 48, and mp_list[2] = 165.
b3b94faa
DT
195 *
196 * That means that in order to get to the block containing the byte at
197 * offset 101342453, we would load the indirect block pointed to by pointer
198 * 0 in the dinode. We would then load the indirect block pointed to by
199 * pointer 48 in that indirect block. We would then load the data block
200 * pointed to by pointer 165 in that indirect block.
201 *
202 * ----------------------------------------
203 * | Dinode | |
204 * | | 4|
205 * | |0 1 2 3 4 5 9|
206 * | | 6|
207 * ----------------------------------------
208 * |
209 * |
210 * V
211 * ----------------------------------------
212 * | Indirect Block |
213 * | 5|
214 * | 4 4 4 4 4 5 5 1|
215 * |0 5 6 7 8 9 0 1 2|
216 * ----------------------------------------
217 * |
218 * |
219 * V
220 * ----------------------------------------
221 * | Indirect Block |
222 * | 1 1 1 1 1 5|
223 * | 6 6 6 6 6 1|
224 * |0 3 4 5 6 7 2|
225 * ----------------------------------------
226 * |
227 * |
228 * V
229 * ----------------------------------------
230 * | Data block containing offset |
231 * | 101342453 |
232 * | |
233 * | |
234 * ----------------------------------------
235 *
236 */
237
9b8c81d1
SW
238static void find_metapath(const struct gfs2_sbd *sdp, u64 block,
239 struct metapath *mp, unsigned int height)
b3b94faa 240{
b3b94faa
DT
241 unsigned int i;
242
5f8bd444 243 mp->mp_fheight = height;
9b8c81d1 244 for (i = height; i--;)
7eabb77e 245 mp->mp_list[i] = do_div(block, sdp->sd_inptrs);
b3b94faa
DT
246}
247
5af4e7a0 248static inline unsigned int metapath_branch_start(const struct metapath *mp)
9b8c81d1 249{
5af4e7a0
BM
250 if (mp->mp_list[0] == 0)
251 return 2;
252 return 1;
9b8c81d1
SW
253}
254
d552a2b9 255/**
20cdc193 256 * metaptr1 - Return the first possible metadata pointer in a metapath buffer
d552a2b9
BP
257 * @height: The metadata height (0 = dinode)
258 * @mp: The metapath
259 */
260static inline __be64 *metaptr1(unsigned int height, const struct metapath *mp)
261{
262 struct buffer_head *bh = mp->mp_bh[height];
263 if (height == 0)
264 return ((__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)));
265 return ((__be64 *)(bh->b_data + sizeof(struct gfs2_meta_header)));
266}
267
b3b94faa
DT
268/**
269 * metapointer - Return pointer to start of metadata in a buffer
b3b94faa
DT
270 * @height: The metadata height (0 = dinode)
271 * @mp: The metapath
272 *
273 * Return a pointer to the block number of the next height of the metadata
274 * tree given a buffer containing the pointer to the current height of the
275 * metadata tree.
276 */
277
9b8c81d1 278static inline __be64 *metapointer(unsigned int height, const struct metapath *mp)
b3b94faa 279{
d552a2b9
BP
280 __be64 *p = metaptr1(height, mp);
281 return p + mp->mp_list[height];
b3b94faa
DT
282}
283
7841b9f0
AG
284static inline const __be64 *metaend(unsigned int height, const struct metapath *mp)
285{
286 const struct buffer_head *bh = mp->mp_bh[height];
287 return (const __be64 *)(bh->b_data + bh->b_size);
288}
289
290static void clone_metapath(struct metapath *clone, struct metapath *mp)
291{
292 unsigned int hgt;
293
294 *clone = *mp;
295 for (hgt = 0; hgt < mp->mp_aheight; hgt++)
296 get_bh(clone->mp_bh[hgt]);
297}
298
5cf26b1e 299static void gfs2_metapath_ra(struct gfs2_glock *gl, __be64 *start, __be64 *end)
b99b98dc 300{
b99b98dc
SW
301 const __be64 *t;
302
5cf26b1e 303 for (t = start; t < end; t++) {
c3ce5aa9
AG
304 struct buffer_head *rabh;
305
b99b98dc
SW
306 if (!*t)
307 continue;
308
309 rabh = gfs2_getbuf(gl, be64_to_cpu(*t), CREATE);
310 if (trylock_buffer(rabh)) {
311 if (!buffer_uptodate(rabh)) {
312 rabh->b_end_io = end_buffer_read_sync;
e477b24b
CL
313 submit_bh(REQ_OP_READ,
314 REQ_RAHEAD | REQ_META | REQ_PRIO,
315 rabh);
b99b98dc
SW
316 continue;
317 }
318 unlock_buffer(rabh);
319 }
320 brelse(rabh);
321 }
322}
323
e8b43fe0
AG
324static int __fillup_metapath(struct gfs2_inode *ip, struct metapath *mp,
325 unsigned int x, unsigned int h)
d552a2b9 326{
e8b43fe0
AG
327 for (; x < h; x++) {
328 __be64 *ptr = metapointer(x, mp);
329 u64 dblock = be64_to_cpu(*ptr);
330 int ret;
d552a2b9 331
e8b43fe0
AG
332 if (!dblock)
333 break;
334 ret = gfs2_meta_indirect_buffer(ip, x + 1, dblock, &mp->mp_bh[x + 1]);
335 if (ret)
336 return ret;
337 }
338 mp->mp_aheight = x + 1;
339 return 0;
d552a2b9
BP
340}
341
b3b94faa 342/**
9b8c81d1
SW
343 * lookup_metapath - Walk the metadata tree to a specific point
344 * @ip: The inode
b3b94faa 345 * @mp: The metapath
b3b94faa 346 *
9b8c81d1
SW
347 * Assumes that the inode's buffer has already been looked up and
348 * hooked onto mp->mp_bh[0] and that the metapath has been initialised
349 * by find_metapath().
350 *
351 * If this function encounters part of the tree which has not been
352 * allocated, it returns the current height of the tree at the point
353 * at which it found the unallocated block. Blocks which are found are
354 * added to the mp->mp_bh[] list.
b3b94faa 355 *
e8b43fe0 356 * Returns: error
b3b94faa
DT
357 */
358
9b8c81d1 359static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp)
11707ea0 360{
e8b43fe0 361 return __fillup_metapath(ip, mp, 0, ip->i_height - 1);
dbac6710
SW
362}
363
d552a2b9
BP
364/**
365 * fillup_metapath - fill up buffers for the metadata path to a specific height
366 * @ip: The inode
367 * @mp: The metapath
368 * @h: The height to which it should be mapped
369 *
370 * Similar to lookup_metapath, but does lookups for a range of heights
371 *
c3ce5aa9 372 * Returns: error or the number of buffers filled
d552a2b9
BP
373 */
374
375static int fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h)
376{
e8b43fe0 377 unsigned int x = 0;
c3ce5aa9 378 int ret;
d552a2b9
BP
379
380 if (h) {
381 /* find the first buffer we need to look up. */
e8b43fe0
AG
382 for (x = h - 1; x > 0; x--) {
383 if (mp->mp_bh[x])
384 break;
d552a2b9
BP
385 }
386 }
c3ce5aa9
AG
387 ret = __fillup_metapath(ip, mp, x, h);
388 if (ret)
389 return ret;
390 return mp->mp_aheight - x - 1;
d552a2b9
BP
391}
392
64bc06bb 393static void release_metapath(struct metapath *mp)
dbac6710
SW
394{
395 int i;
396
9b8c81d1
SW
397 for (i = 0; i < GFS2_MAX_META_HEIGHT; i++) {
398 if (mp->mp_bh[i] == NULL)
399 break;
400 brelse(mp->mp_bh[i]);
64bc06bb 401 mp->mp_bh[i] = NULL;
9b8c81d1 402 }
11707ea0
SW
403}
404
30cbf189
SW
405/**
406 * gfs2_extent_length - Returns length of an extent of blocks
bcfe9413
AG
407 * @bh: The metadata block
408 * @ptr: Current position in @bh
409 * @limit: Max extent length to return
30cbf189
SW
410 * @eob: Set to 1 if we hit "end of block"
411 *
30cbf189
SW
412 * Returns: The length of the extent (minimum of one block)
413 */
414
bcfe9413 415static inline unsigned int gfs2_extent_length(struct buffer_head *bh, __be64 *ptr, size_t limit, int *eob)
30cbf189 416{
bcfe9413 417 const __be64 *end = (__be64 *)(bh->b_data + bh->b_size);
30cbf189
SW
418 const __be64 *first = ptr;
419 u64 d = be64_to_cpu(*ptr);
420
421 *eob = 0;
422 do {
423 ptr++;
424 if (ptr >= end)
425 break;
bcfe9413 426 d++;
30cbf189
SW
427 } while(be64_to_cpu(*ptr) == d);
428 if (ptr >= end)
429 *eob = 1;
bcfe9413 430 return ptr - first;
30cbf189
SW
431}
432
7841b9f0
AG
433typedef const __be64 *(*gfs2_metadata_walker)(
434 struct metapath *mp,
435 const __be64 *start, const __be64 *end,
436 u64 factor, void *data);
437
438#define WALK_STOP ((__be64 *)0)
439#define WALK_NEXT ((__be64 *)1)
440
441static int gfs2_walk_metadata(struct inode *inode, sector_t lblock,
442 u64 len, struct metapath *mp, gfs2_metadata_walker walker,
443 void *data)
444{
445 struct metapath clone;
446 struct gfs2_inode *ip = GFS2_I(inode);
447 struct gfs2_sbd *sdp = GFS2_SB(inode);
448 const __be64 *start, *end, *ptr;
449 u64 factor = 1;
450 unsigned int hgt;
451 int ret = 0;
452
453 for (hgt = ip->i_height - 1; hgt >= mp->mp_aheight; hgt--)
454 factor *= sdp->sd_inptrs;
455
456 for (;;) {
457 u64 step;
458
459 /* Walk indirect block. */
460 start = metapointer(hgt, mp);
461 end = metaend(hgt, mp);
462
463 step = (end - start) * factor;
464 if (step > len)
465 end = start + DIV_ROUND_UP_ULL(len, factor);
466
467 ptr = walker(mp, start, end, factor, data);
468 if (ptr == WALK_STOP)
469 break;
470 if (step >= len)
471 break;
472 len -= step;
473 if (ptr != WALK_NEXT) {
474 BUG_ON(!*ptr);
475 mp->mp_list[hgt] += ptr - start;
476 goto fill_up_metapath;
477 }
478
479lower_metapath:
480 /* Decrease height of metapath. */
481 if (mp != &clone) {
482 clone_metapath(&clone, mp);
483 mp = &clone;
484 }
485 brelse(mp->mp_bh[hgt]);
486 mp->mp_bh[hgt] = NULL;
487 if (!hgt)
488 break;
489 hgt--;
490 factor *= sdp->sd_inptrs;
491
492 /* Advance in metadata tree. */
493 (mp->mp_list[hgt])++;
494 start = metapointer(hgt, mp);
495 end = metaend(hgt, mp);
496 if (start >= end) {
497 mp->mp_list[hgt] = 0;
498 if (!hgt)
499 break;
500 goto lower_metapath;
501 }
502
503fill_up_metapath:
504 /* Increase height of metapath. */
505 if (mp != &clone) {
506 clone_metapath(&clone, mp);
507 mp = &clone;
508 }
509 ret = fillup_metapath(ip, mp, ip->i_height - 1);
510 if (ret < 0)
511 break;
512 hgt += ret;
513 for (; ret; ret--)
514 do_div(factor, sdp->sd_inptrs);
515 mp->mp_aheight = hgt + 1;
516 }
517 if (mp == &clone)
518 release_metapath(mp);
519 return ret;
520}
521
522struct gfs2_hole_walker_args {
523 u64 blocks;
524};
525
526static const __be64 *gfs2_hole_walker(struct metapath *mp,
527 const __be64 *start, const __be64 *end,
528 u64 factor, void *data)
529{
530 struct gfs2_hole_walker_args *args = data;
531 const __be64 *ptr;
532
533 for (ptr = start; ptr < end; ptr++) {
534 if (*ptr) {
535 args->blocks += (ptr - start) * factor;
536 if (mp->mp_aheight == mp->mp_fheight)
537 return WALK_STOP;
538 return ptr; /* increase height */
539 }
540 }
541 args->blocks += (end - start) * factor;
542 return WALK_NEXT;
543}
544
545/**
546 * gfs2_hole_size - figure out the size of a hole
547 * @inode: The inode
548 * @lblock: The logical starting block number
549 * @len: How far to look (in blocks)
550 * @mp: The metapath at lblock
551 * @iomap: The iomap to store the hole size in
552 *
553 * This function modifies @mp.
554 *
555 * Returns: errno on error
556 */
557static int gfs2_hole_size(struct inode *inode, sector_t lblock, u64 len,
558 struct metapath *mp, struct iomap *iomap)
559{
560 struct gfs2_hole_walker_args args = { };
561 int ret = 0;
562
563 ret = gfs2_walk_metadata(inode, lblock, len, mp, gfs2_hole_walker, &args);
564 if (!ret)
565 iomap->length = args.blocks << inode->i_blkbits;
566 return ret;
567}
568
9b8c81d1
SW
569static inline __be64 *gfs2_indirect_init(struct metapath *mp,
570 struct gfs2_glock *gl, unsigned int i,
571 unsigned offset, u64 bn)
572{
573 __be64 *ptr = (__be64 *)(mp->mp_bh[i - 1]->b_data +
574 ((i > 1) ? sizeof(struct gfs2_meta_header) :
575 sizeof(struct gfs2_dinode)));
576 BUG_ON(i < 1);
577 BUG_ON(mp->mp_bh[i] != NULL);
578 mp->mp_bh[i] = gfs2_meta_new(gl, bn);
350a9b0a 579 gfs2_trans_add_meta(gl, mp->mp_bh[i]);
9b8c81d1
SW
580 gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN);
581 gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header));
582 ptr += offset;
583 *ptr = cpu_to_be64(bn);
584 return ptr;
585}
586
587enum alloc_state {
588 ALLOC_DATA = 0,
589 ALLOC_GROW_DEPTH = 1,
590 ALLOC_GROW_HEIGHT = 2,
591 /* ALLOC_UNSTUFF = 3, TBD and rather complicated */
592};
593
594/**
628e366d 595 * gfs2_iomap_alloc - Build a metadata tree of the requested height
9b8c81d1 596 * @inode: The GFS2 inode
628e366d
AG
597 * @iomap: The iomap structure
598 * @flags: iomap flags
5f8bd444 599 * @mp: The metapath, with proper height information calculated
9b8c81d1
SW
600 *
601 * In this routine we may have to alloc:
602 * i) Indirect blocks to grow the metadata tree height
603 * ii) Indirect blocks to fill in lower part of the metadata tree
604 * iii) Data blocks
605 *
64bc06bb
AG
606 * This function is called after gfs2_iomap_get, which works out the
607 * total number of blocks which we need via gfs2_alloc_size.
608 *
609 * We then do the actual allocation asking for an extent at a time (if
610 * enough contiguous free blocks are available, there will only be one
611 * allocation request per call) and uses the state machine to initialise
612 * the blocks in order.
9b8c81d1 613 *
628e366d
AG
614 * Right now, this function will allocate at most one indirect block
615 * worth of data -- with a default block size of 4K, that's slightly
616 * less than 2M. If this limitation is ever removed to allow huge
617 * allocations, we would probably still want to limit the iomap size we
618 * return to avoid stalling other tasks during huge writes; the next
619 * iomap iteration would then find the blocks already allocated.
620 *
9b8c81d1
SW
621 * Returns: errno on error
622 */
623
3974320c
BP
624static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
625 unsigned flags, struct metapath *mp)
9b8c81d1
SW
626{
627 struct gfs2_inode *ip = GFS2_I(inode);
628 struct gfs2_sbd *sdp = GFS2_SB(inode);
629 struct buffer_head *dibh = mp->mp_bh[0];
5f8bd444 630 u64 bn;
5af4e7a0 631 unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0;
64bc06bb 632 size_t dblks = iomap->length >> inode->i_blkbits;
5f8bd444 633 const unsigned end_of_metadata = mp->mp_fheight - 1;
628e366d 634 int ret;
9b8c81d1
SW
635 enum alloc_state state;
636 __be64 *ptr;
637 __be64 zero_bn = 0;
638
5f8bd444 639 BUG_ON(mp->mp_aheight < 1);
9b8c81d1 640 BUG_ON(dibh == NULL);
64bc06bb 641 BUG_ON(dblks < 1);
9b8c81d1 642
350a9b0a 643 gfs2_trans_add_meta(ip->i_gl, dibh);
9b8c81d1 644
628e366d
AG
645 down_write(&ip->i_rw_mutex);
646
5f8bd444 647 if (mp->mp_fheight == mp->mp_aheight) {
64bc06bb 648 /* Bottom indirect block exists */
9b8c81d1
SW
649 state = ALLOC_DATA;
650 } else {
651 /* Need to allocate indirect blocks */
5f8bd444 652 if (mp->mp_fheight == ip->i_height) {
9b8c81d1 653 /* Writing into existing tree, extend tree down */
5f8bd444 654 iblks = mp->mp_fheight - mp->mp_aheight;
9b8c81d1
SW
655 state = ALLOC_GROW_DEPTH;
656 } else {
657 /* Building up tree height */
658 state = ALLOC_GROW_HEIGHT;
5f8bd444 659 iblks = mp->mp_fheight - ip->i_height;
5af4e7a0 660 branch_start = metapath_branch_start(mp);
5f8bd444 661 iblks += (mp->mp_fheight - branch_start);
9b8c81d1
SW
662 }
663 }
664
665 /* start of the second part of the function (state machine) */
666
3974320c 667 blks = dblks + iblks;
5f8bd444 668 i = mp->mp_aheight;
9b8c81d1
SW
669 do {
670 n = blks - alloced;
628e366d
AG
671 ret = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
672 if (ret)
673 goto out;
9b8c81d1
SW
674 alloced += n;
675 if (state != ALLOC_DATA || gfs2_is_jdata(ip))
fbb27873 676 gfs2_trans_remove_revoke(sdp, bn, n);
9b8c81d1
SW
677 switch (state) {
678 /* Growing height of tree */
679 case ALLOC_GROW_HEIGHT:
680 if (i == 1) {
681 ptr = (__be64 *)(dibh->b_data +
682 sizeof(struct gfs2_dinode));
683 zero_bn = *ptr;
684 }
5f8bd444
BP
685 for (; i - 1 < mp->mp_fheight - ip->i_height && n > 0;
686 i++, n--)
9b8c81d1 687 gfs2_indirect_init(mp, ip->i_gl, i, 0, bn++);
5f8bd444 688 if (i - 1 == mp->mp_fheight - ip->i_height) {
9b8c81d1
SW
689 i--;
690 gfs2_buffer_copy_tail(mp->mp_bh[i],
691 sizeof(struct gfs2_meta_header),
692 dibh, sizeof(struct gfs2_dinode));
693 gfs2_buffer_clear_tail(dibh,
694 sizeof(struct gfs2_dinode) +
695 sizeof(__be64));
696 ptr = (__be64 *)(mp->mp_bh[i]->b_data +
697 sizeof(struct gfs2_meta_header));
698 *ptr = zero_bn;
699 state = ALLOC_GROW_DEPTH;
5f8bd444 700 for(i = branch_start; i < mp->mp_fheight; i++) {
9b8c81d1
SW
701 if (mp->mp_bh[i] == NULL)
702 break;
703 brelse(mp->mp_bh[i]);
704 mp->mp_bh[i] = NULL;
705 }
5af4e7a0 706 i = branch_start;
9b8c81d1
SW
707 }
708 if (n == 0)
709 break;
0a4c9265 710 /* fall through - To branching from existing tree */
9b8c81d1 711 case ALLOC_GROW_DEPTH:
5f8bd444 712 if (i > 1 && i < mp->mp_fheight)
350a9b0a 713 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[i-1]);
5f8bd444 714 for (; i < mp->mp_fheight && n > 0; i++, n--)
9b8c81d1
SW
715 gfs2_indirect_init(mp, ip->i_gl, i,
716 mp->mp_list[i-1], bn++);
5f8bd444 717 if (i == mp->mp_fheight)
9b8c81d1
SW
718 state = ALLOC_DATA;
719 if (n == 0)
720 break;
0a4c9265 721 /* fall through - To tree complete, adding data blocks */
9b8c81d1 722 case ALLOC_DATA:
3974320c 723 BUG_ON(n > dblks);
9b8c81d1 724 BUG_ON(mp->mp_bh[end_of_metadata] == NULL);
350a9b0a 725 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[end_of_metadata]);
3974320c 726 dblks = n;
9b8c81d1 727 ptr = metapointer(end_of_metadata, mp);
3974320c 728 iomap->addr = bn << inode->i_blkbits;
628e366d 729 iomap->flags |= IOMAP_F_MERGED | IOMAP_F_NEW;
9b8c81d1
SW
730 while (n-- > 0)
731 *ptr++ = cpu_to_be64(bn++);
732 break;
733 }
3974320c 734 } while (iomap->addr == IOMAP_NULL_ADDR);
9b8c81d1 735
d505a96a 736 iomap->type = IOMAP_MAPPED;
3974320c 737 iomap->length = (u64)dblks << inode->i_blkbits;
5f8bd444 738 ip->i_height = mp->mp_fheight;
9b8c81d1 739 gfs2_add_inode_blocks(&ip->i_inode, alloced);
628e366d
AG
740 gfs2_dinode_out(ip, dibh->b_data);
741out:
742 up_write(&ip->i_rw_mutex);
743 return ret;
9b8c81d1
SW
744}
745
7ee66c03
CH
746#define IOMAP_F_GFS2_BOUNDARY IOMAP_F_PRIVATE
747
64bc06bb
AG
748/**
749 * gfs2_alloc_size - Compute the maximum allocation size
750 * @inode: The inode
751 * @mp: The metapath
752 * @size: Requested size in blocks
753 *
754 * Compute the maximum size of the next allocation at @mp.
755 *
756 * Returns: size in blocks
757 */
758static u64 gfs2_alloc_size(struct inode *inode, struct metapath *mp, u64 size)
3974320c
BP
759{
760 struct gfs2_inode *ip = GFS2_I(inode);
64bc06bb
AG
761 struct gfs2_sbd *sdp = GFS2_SB(inode);
762 const __be64 *first, *ptr, *end;
763
764 /*
765 * For writes to stuffed files, this function is called twice via
766 * gfs2_iomap_get, before and after unstuffing. The size we return the
767 * first time needs to be large enough to get the reservation and
768 * allocation sizes right. The size we return the second time must
769 * be exact or else gfs2_iomap_alloc won't do the right thing.
770 */
771
772 if (gfs2_is_stuffed(ip) || mp->mp_fheight != mp->mp_aheight) {
773 unsigned int maxsize = mp->mp_fheight > 1 ?
774 sdp->sd_inptrs : sdp->sd_diptrs;
775 maxsize -= mp->mp_list[mp->mp_fheight - 1];
776 if (size > maxsize)
777 size = maxsize;
778 return size;
779 }
3974320c 780
64bc06bb
AG
781 first = metapointer(ip->i_height - 1, mp);
782 end = metaend(ip->i_height - 1, mp);
783 if (end - first > size)
784 end = first + size;
785 for (ptr = first; ptr < end; ptr++) {
786 if (*ptr)
787 break;
788 }
789 return ptr - first;
3974320c
BP
790}
791
792/**
628e366d 793 * gfs2_iomap_get - Map blocks from an inode to disk blocks
3974320c
BP
794 * @inode: The inode
795 * @pos: Starting position in bytes
796 * @length: Length to map, in bytes
797 * @flags: iomap flags
798 * @iomap: The iomap structure
628e366d 799 * @mp: The metapath
3974320c
BP
800 *
801 * Returns: errno
802 */
628e366d
AG
803static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
804 unsigned flags, struct iomap *iomap,
805 struct metapath *mp)
b3b94faa 806{
feaa7bba
SW
807 struct gfs2_inode *ip = GFS2_I(inode);
808 struct gfs2_sbd *sdp = GFS2_SB(inode);
d505a96a 809 loff_t size = i_size_read(inode);
9b8c81d1 810 __be64 *ptr;
3974320c 811 sector_t lblock;
628e366d
AG
812 sector_t lblock_stop;
813 int ret;
9b8c81d1 814 int eob;
628e366d 815 u64 len;
d505a96a 816 struct buffer_head *dibh = NULL, *bh;
9b8c81d1 817 u8 height;
7276b3b0 818
628e366d
AG
819 if (!length)
820 return -EINVAL;
b3b94faa 821
d505a96a
AG
822 down_read(&ip->i_rw_mutex);
823
824 ret = gfs2_meta_inode_buffer(ip, &dibh);
825 if (ret)
826 goto unlock;
c26b5aa8 827 mp->mp_bh[0] = dibh;
d505a96a 828
49edd5bf 829 if (gfs2_is_stuffed(ip)) {
d505a96a
AG
830 if (flags & IOMAP_WRITE) {
831 loff_t max_size = gfs2_max_stuffed_size(ip);
832
833 if (pos + length > max_size)
834 goto unstuff;
835 iomap->length = max_size;
836 } else {
837 if (pos >= size) {
838 if (flags & IOMAP_REPORT) {
839 ret = -ENOENT;
840 goto unlock;
841 } else {
842 /* report a hole */
843 iomap->offset = pos;
844 iomap->length = length;
845 goto do_alloc;
846 }
847 }
848 iomap->length = size;
49edd5bf 849 }
d505a96a
AG
850 iomap->addr = (ip->i_no_addr << inode->i_blkbits) +
851 sizeof(struct gfs2_dinode);
852 iomap->type = IOMAP_INLINE;
64bc06bb 853 iomap->inline_data = dibh->b_data + sizeof(struct gfs2_dinode);
d505a96a 854 goto out;
3974320c 855 }
d505a96a
AG
856
857unstuff:
3974320c 858 lblock = pos >> inode->i_blkbits;
3974320c 859 iomap->offset = lblock << inode->i_blkbits;
628e366d
AG
860 lblock_stop = (pos + length - 1) >> inode->i_blkbits;
861 len = lblock_stop - lblock + 1;
d505a96a 862 iomap->length = len << inode->i_blkbits;
628e366d 863
9b8c81d1 864 height = ip->i_height;
9a38662b 865 while ((lblock + 1) * sdp->sd_sb.sb_bsize > sdp->sd_heightsize[height])
9b8c81d1 866 height++;
628e366d 867 find_metapath(sdp, lblock, mp, height);
9b8c81d1
SW
868 if (height > ip->i_height || gfs2_is_stuffed(ip))
869 goto do_alloc;
3974320c 870
628e366d 871 ret = lookup_metapath(ip, mp);
e8b43fe0 872 if (ret)
628e366d 873 goto unlock;
3974320c 874
628e366d 875 if (mp->mp_aheight != ip->i_height)
9b8c81d1 876 goto do_alloc;
3974320c 877
628e366d 878 ptr = metapointer(ip->i_height - 1, mp);
9b8c81d1
SW
879 if (*ptr == 0)
880 goto do_alloc;
3974320c 881
628e366d 882 bh = mp->mp_bh[ip->i_height - 1];
bcfe9413 883 len = gfs2_extent_length(bh, ptr, len, &eob);
3974320c 884
628e366d
AG
885 iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits;
886 iomap->length = len << inode->i_blkbits;
887 iomap->type = IOMAP_MAPPED;
0ed91eca 888 iomap->flags |= IOMAP_F_MERGED;
9b8c81d1 889 if (eob)
7ee66c03 890 iomap->flags |= IOMAP_F_GFS2_BOUNDARY;
3974320c 891
3974320c 892out:
628e366d
AG
893 iomap->bdev = inode->i_sb->s_bdev;
894unlock:
895 up_read(&ip->i_rw_mutex);
9b8c81d1 896 return ret;
30cbf189 897
9b8c81d1 898do_alloc:
628e366d 899 iomap->addr = IOMAP_NULL_ADDR;
628e366d 900 iomap->type = IOMAP_HOLE;
628e366d 901 if (flags & IOMAP_REPORT) {
49edd5bf 902 if (pos >= size)
3974320c 903 ret = -ENOENT;
628e366d
AG
904 else if (height == ip->i_height)
905 ret = gfs2_hole_size(inode, lblock, len, mp, iomap);
49edd5bf
AG
906 else
907 iomap->length = size - pos;
64bc06bb
AG
908 } else if (flags & IOMAP_WRITE) {
909 u64 alloc_size;
910
967bcc91
AG
911 if (flags & IOMAP_DIRECT)
912 goto out; /* (see gfs2_file_direct_write) */
913
64bc06bb
AG
914 len = gfs2_alloc_size(inode, mp, len);
915 alloc_size = len << inode->i_blkbits;
916 if (alloc_size < iomap->length)
917 iomap->length = alloc_size;
918 } else {
d505a96a
AG
919 if (pos < size && height == ip->i_height)
920 ret = gfs2_hole_size(inode, lblock, len, mp, iomap);
b3b94faa 921 }
628e366d 922 goto out;
3974320c
BP
923}
924
7c70b896
BP
925/**
926 * gfs2_lblk_to_dblk - convert logical block to disk block
927 * @inode: the inode of the file we're mapping
928 * @lblock: the block relative to the start of the file
929 * @dblock: the returned dblock, if no error
930 *
931 * This function maps a single block from a file logical block (relative to
932 * the start of the file) to a file system absolute block using iomap.
933 *
934 * Returns: the absolute file system block, or an error
935 */
936int gfs2_lblk_to_dblk(struct inode *inode, u32 lblock, u64 *dblock)
937{
938 struct iomap iomap = { };
939 struct metapath mp = { .mp_aheight = 1, };
940 loff_t pos = (loff_t)lblock << inode->i_blkbits;
941 int ret;
942
943 ret = gfs2_iomap_get(inode, pos, i_blocksize(inode), 0, &iomap, &mp);
944 release_metapath(&mp);
945 if (ret == 0)
946 *dblock = iomap.addr >> inode->i_blkbits;
947
948 return ret;
949}
950
64bc06bb
AG
951static int gfs2_write_lock(struct inode *inode)
952{
953 struct gfs2_inode *ip = GFS2_I(inode);
954 struct gfs2_sbd *sdp = GFS2_SB(inode);
955 int error;
956
957 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
958 error = gfs2_glock_nq(&ip->i_gh);
959 if (error)
960 goto out_uninit;
961 if (&ip->i_inode == sdp->sd_rindex) {
962 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
963
964 error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE,
965 GL_NOCACHE, &m_ip->i_gh);
966 if (error)
967 goto out_unlock;
968 }
969 return 0;
970
971out_unlock:
972 gfs2_glock_dq(&ip->i_gh);
973out_uninit:
974 gfs2_holder_uninit(&ip->i_gh);
975 return error;
976}
977
978static void gfs2_write_unlock(struct inode *inode)
979{
980 struct gfs2_inode *ip = GFS2_I(inode);
981 struct gfs2_sbd *sdp = GFS2_SB(inode);
982
983 if (&ip->i_inode == sdp->sd_rindex) {
984 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
985
986 gfs2_glock_dq_uninit(&m_ip->i_gh);
987 }
988 gfs2_glock_dq_uninit(&ip->i_gh);
989}
990
d0a22a4b
AG
991static int gfs2_iomap_page_prepare(struct inode *inode, loff_t pos,
992 unsigned len, struct iomap *iomap)
993{
994 struct gfs2_sbd *sdp = GFS2_SB(inode);
995
996 return gfs2_trans_begin(sdp, RES_DINODE + (len >> inode->i_blkbits), 0);
997}
998
df0db3ec
AG
999static void gfs2_iomap_page_done(struct inode *inode, loff_t pos,
1000 unsigned copied, struct page *page,
1001 struct iomap *iomap)
64bc06bb
AG
1002{
1003 struct gfs2_inode *ip = GFS2_I(inode);
d0a22a4b 1004 struct gfs2_sbd *sdp = GFS2_SB(inode);
64bc06bb 1005
d0a22a4b 1006 if (page && !gfs2_is_stuffed(ip))
df0db3ec 1007 gfs2_page_add_databufs(ip, page, offset_in_page(pos), copied);
d0a22a4b 1008 gfs2_trans_end(sdp);
64bc06bb
AG
1009}
1010
df0db3ec 1011static const struct iomap_page_ops gfs2_iomap_page_ops = {
d0a22a4b 1012 .page_prepare = gfs2_iomap_page_prepare,
df0db3ec
AG
1013 .page_done = gfs2_iomap_page_done,
1014};
1015
64bc06bb
AG
1016static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
1017 loff_t length, unsigned flags,
c26b5aa8
AG
1018 struct iomap *iomap,
1019 struct metapath *mp)
64bc06bb 1020{
64bc06bb
AG
1021 struct gfs2_inode *ip = GFS2_I(inode);
1022 struct gfs2_sbd *sdp = GFS2_SB(inode);
1023 unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
1024 bool unstuff, alloc_required;
1025 int ret;
1026
1027 ret = gfs2_write_lock(inode);
1028 if (ret)
1029 return ret;
1030
1031 unstuff = gfs2_is_stuffed(ip) &&
1032 pos + length > gfs2_max_stuffed_size(ip);
1033
c26b5aa8 1034 ret = gfs2_iomap_get(inode, pos, length, flags, iomap, mp);
64bc06bb 1035 if (ret)
c26b5aa8 1036 goto out_unlock;
64bc06bb
AG
1037
1038 alloc_required = unstuff || iomap->type == IOMAP_HOLE;
1039
1040 if (alloc_required || gfs2_is_jdata(ip))
1041 gfs2_write_calc_reserv(ip, iomap->length, &data_blocks,
1042 &ind_blocks);
1043
1044 if (alloc_required) {
1045 struct gfs2_alloc_parms ap = {
1046 .target = data_blocks + ind_blocks
1047 };
1048
1049 ret = gfs2_quota_lock_check(ip, &ap);
1050 if (ret)
c26b5aa8 1051 goto out_unlock;
64bc06bb
AG
1052
1053 ret = gfs2_inplace_reserve(ip, &ap);
1054 if (ret)
1055 goto out_qunlock;
1056 }
1057
1058 rblocks = RES_DINODE + ind_blocks;
1059 if (gfs2_is_jdata(ip))
1060 rblocks += data_blocks;
1061 if (ind_blocks || data_blocks)
1062 rblocks += RES_STATFS + RES_QUOTA;
1063 if (inode == sdp->sd_rindex)
1064 rblocks += 2 * RES_STATFS;
1065 if (alloc_required)
1066 rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks);
1067
d0a22a4b
AG
1068 if (unstuff || iomap->type == IOMAP_HOLE) {
1069 struct gfs2_trans *tr;
64bc06bb 1070
d0a22a4b
AG
1071 ret = gfs2_trans_begin(sdp, rblocks,
1072 iomap->length >> inode->i_blkbits);
64bc06bb 1073 if (ret)
d0a22a4b
AG
1074 goto out_trans_fail;
1075
1076 if (unstuff) {
1077 ret = gfs2_unstuff_dinode(ip, NULL);
1078 if (ret)
1079 goto out_trans_end;
1080 release_metapath(mp);
1081 ret = gfs2_iomap_get(inode, iomap->offset,
1082 iomap->length, flags, iomap, mp);
1083 if (ret)
1084 goto out_trans_end;
1085 }
64bc06bb 1086
d0a22a4b
AG
1087 if (iomap->type == IOMAP_HOLE) {
1088 ret = gfs2_iomap_alloc(inode, iomap, flags, mp);
1089 if (ret) {
1090 gfs2_trans_end(sdp);
1091 gfs2_inplace_release(ip);
1092 punch_hole(ip, iomap->offset, iomap->length);
1093 goto out_qunlock;
1094 }
64bc06bb 1095 }
d0a22a4b
AG
1096
1097 tr = current->journal_info;
1098 if (tr->tr_num_buf_new)
1099 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
1100 else
1101 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[0]);
1102
1103 gfs2_trans_end(sdp);
64bc06bb 1104 }
d0a22a4b
AG
1105
1106 if (gfs2_is_stuffed(ip) || gfs2_is_jdata(ip))
df0db3ec 1107 iomap->page_ops = &gfs2_iomap_page_ops;
64bc06bb
AG
1108 return 0;
1109
1110out_trans_end:
1111 gfs2_trans_end(sdp);
1112out_trans_fail:
1113 if (alloc_required)
1114 gfs2_inplace_release(ip);
1115out_qunlock:
1116 if (alloc_required)
1117 gfs2_quota_unlock(ip);
c26b5aa8 1118out_unlock:
64bc06bb
AG
1119 gfs2_write_unlock(inode);
1120 return ret;
1121}
1122
628e366d
AG
1123static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
1124 unsigned flags, struct iomap *iomap)
1125{
1126 struct gfs2_inode *ip = GFS2_I(inode);
1127 struct metapath mp = { .mp_aheight = 1, };
1128 int ret;
1129
0ed91eca
AG
1130 iomap->flags |= IOMAP_F_BUFFER_HEAD;
1131
628e366d 1132 trace_gfs2_iomap_start(ip, pos, length, flags);
967bcc91 1133 if ((flags & IOMAP_WRITE) && !(flags & IOMAP_DIRECT)) {
c26b5aa8 1134 ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap, &mp);
628e366d
AG
1135 } else {
1136 ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
c26b5aa8 1137
967bcc91
AG
1138 /*
1139 * Silently fall back to buffered I/O for stuffed files or if
1140 * we've hot a hole (see gfs2_file_direct_write).
1141 */
1142 if ((flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT) &&
1143 iomap->type != IOMAP_MAPPED)
1144 ret = -ENOTBLK;
628e366d 1145 }
c26b5aa8 1146 release_metapath(&mp);
628e366d
AG
1147 trace_gfs2_iomap_end(ip, iomap, ret);
1148 return ret;
1149}
1150
64bc06bb
AG
1151static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length,
1152 ssize_t written, unsigned flags, struct iomap *iomap)
1153{
1154 struct gfs2_inode *ip = GFS2_I(inode);
1155 struct gfs2_sbd *sdp = GFS2_SB(inode);
64bc06bb 1156
967bcc91 1157 if ((flags & (IOMAP_WRITE | IOMAP_DIRECT)) != IOMAP_WRITE)
64bc06bb
AG
1158 goto out;
1159
d0a22a4b 1160 if (!gfs2_is_stuffed(ip))
64bc06bb
AG
1161 gfs2_ordered_add_inode(ip);
1162
d0a22a4b 1163 if (inode == sdp->sd_rindex)
64bc06bb 1164 adjust_fs_space(inode);
64bc06bb 1165
64bc06bb
AG
1166 gfs2_inplace_release(ip);
1167
1168 if (length != written && (iomap->flags & IOMAP_F_NEW)) {
1169 /* Deallocate blocks that were just allocated. */
1170 loff_t blockmask = i_blocksize(inode) - 1;
1171 loff_t end = (pos + length) & ~blockmask;
1172
1173 pos = (pos + written + blockmask) & ~blockmask;
1174 if (pos < end) {
1175 truncate_pagecache_range(inode, pos, end - 1);
1176 punch_hole(ip, pos, end - pos);
1177 }
1178 }
1179
1180 if (ip->i_qadata && ip->i_qadata->qa_qd_num)
1181 gfs2_quota_unlock(ip);
8d3e72a1
AG
1182 if (iomap->flags & IOMAP_F_SIZE_CHANGED)
1183 mark_inode_dirty(inode);
64bc06bb
AG
1184 gfs2_write_unlock(inode);
1185
1186out:
64bc06bb
AG
1187 return 0;
1188}
1189
628e366d
AG
1190const struct iomap_ops gfs2_iomap_ops = {
1191 .iomap_begin = gfs2_iomap_begin,
64bc06bb 1192 .iomap_end = gfs2_iomap_end,
628e366d
AG
1193};
1194
3974320c 1195/**
d39d18e0 1196 * gfs2_block_map - Map one or more blocks of an inode to a disk block
3974320c
BP
1197 * @inode: The inode
1198 * @lblock: The logical block number
1199 * @bh_map: The bh to be mapped
1200 * @create: True if its ok to alloc blocks to satify the request
1201 *
d39d18e0
AG
1202 * The size of the requested mapping is defined in bh_map->b_size.
1203 *
1204 * Clears buffer_mapped(bh_map) and leaves bh_map->b_size unchanged
1205 * when @lblock is not mapped. Sets buffer_mapped(bh_map) and
1206 * bh_map->b_size to indicate the size of the mapping when @lblock and
1207 * successive blocks are mapped, up to the requested size.
1208 *
1209 * Sets buffer_boundary() if a read of metadata will be required
1210 * before the next block can be mapped. Sets buffer_new() if new
1211 * blocks were allocated.
3974320c
BP
1212 *
1213 * Returns: errno
1214 */
1215
1216int gfs2_block_map(struct inode *inode, sector_t lblock,
1217 struct buffer_head *bh_map, int create)
1218{
1219 struct gfs2_inode *ip = GFS2_I(inode);
628e366d
AG
1220 loff_t pos = (loff_t)lblock << inode->i_blkbits;
1221 loff_t length = bh_map->b_size;
1222 struct metapath mp = { .mp_aheight = 1, };
1223 struct iomap iomap = { };
1224 int ret;
3974320c
BP
1225
1226 clear_buffer_mapped(bh_map);
1227 clear_buffer_new(bh_map);
1228 clear_buffer_boundary(bh_map);
1229 trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
1230
628e366d
AG
1231 if (create) {
1232 ret = gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, &iomap, &mp);
1233 if (!ret && iomap.type == IOMAP_HOLE)
1234 ret = gfs2_iomap_alloc(inode, &iomap, IOMAP_WRITE, &mp);
1235 release_metapath(&mp);
1236 } else {
1237 ret = gfs2_iomap_get(inode, pos, length, 0, &iomap, &mp);
1238 release_metapath(&mp);
3974320c 1239 }
628e366d
AG
1240 if (ret)
1241 goto out;
3974320c
BP
1242
1243 if (iomap.length > bh_map->b_size) {
1244 iomap.length = bh_map->b_size;
7ee66c03 1245 iomap.flags &= ~IOMAP_F_GFS2_BOUNDARY;
5f8bd444 1246 }
3974320c
BP
1247 if (iomap.addr != IOMAP_NULL_ADDR)
1248 map_bh(bh_map, inode->i_sb, iomap.addr >> inode->i_blkbits);
1249 bh_map->b_size = iomap.length;
7ee66c03 1250 if (iomap.flags & IOMAP_F_GFS2_BOUNDARY)
3974320c
BP
1251 set_buffer_boundary(bh_map);
1252 if (iomap.flags & IOMAP_F_NEW)
1253 set_buffer_new(bh_map);
1254
1255out:
1256 trace_gfs2_bmap(ip, bh_map, lblock, create, ret);
1257 return ret;
fd88de56
SW
1258}
1259
941e6d7d
SW
1260/*
1261 * Deprecated: do not use in new code
1262 */
fd88de56
SW
1263int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen)
1264{
23591256 1265 struct buffer_head bh = { .b_state = 0, .b_blocknr = 0 };
7a6bbacb 1266 int ret;
fd88de56
SW
1267 int create = *new;
1268
1269 BUG_ON(!extlen);
1270 BUG_ON(!dblock);
1271 BUG_ON(!new);
1272
47a9a527 1273 bh.b_size = BIT(inode->i_blkbits + (create ? 0 : 5));
e9e1ef2b 1274 ret = gfs2_block_map(inode, lblock, &bh, create);
7a6bbacb
SW
1275 *extlen = bh.b_size >> inode->i_blkbits;
1276 *dblock = bh.b_blocknr;
1277 if (buffer_new(&bh))
1278 *new = 1;
1279 else
1280 *new = 0;
1281 return ret;
b3b94faa
DT
1282}
1283
ba7f7290 1284/**
bdba0d5e 1285 * gfs2_block_zero_range - Deal with zeroing out data
ba7f7290
SW
1286 *
1287 * This is partly borrowed from ext3.
1288 */
bdba0d5e
AG
1289static int gfs2_block_zero_range(struct inode *inode, loff_t from,
1290 unsigned int length)
ba7f7290 1291{
bdba0d5e 1292 struct address_space *mapping = inode->i_mapping;
ba7f7290 1293 struct gfs2_inode *ip = GFS2_I(inode);
09cbfeaf
KS
1294 unsigned long index = from >> PAGE_SHIFT;
1295 unsigned offset = from & (PAGE_SIZE-1);
bdba0d5e 1296 unsigned blocksize, iblock, pos;
ba7f7290
SW
1297 struct buffer_head *bh;
1298 struct page *page;
ba7f7290
SW
1299 int err;
1300
220cca2a 1301 page = find_or_create_page(mapping, index, GFP_NOFS);
ba7f7290
SW
1302 if (!page)
1303 return 0;
1304
1305 blocksize = inode->i_sb->s_blocksize;
09cbfeaf 1306 iblock = index << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits);
ba7f7290
SW
1307
1308 if (!page_has_buffers(page))
1309 create_empty_buffers(page, blocksize, 0);
1310
1311 /* Find the buffer that contains "offset" */
1312 bh = page_buffers(page);
1313 pos = blocksize;
1314 while (offset >= pos) {
1315 bh = bh->b_this_page;
1316 iblock++;
1317 pos += blocksize;
1318 }
1319
1320 err = 0;
1321
1322 if (!buffer_mapped(bh)) {
e9e1ef2b 1323 gfs2_block_map(inode, iblock, bh, 0);
ba7f7290
SW
1324 /* unmapped? It's a hole - nothing to do */
1325 if (!buffer_mapped(bh))
1326 goto unlock;
1327 }
1328
1329 /* Ok, it's mapped. Make sure it's up-to-date */
1330 if (PageUptodate(page))
1331 set_buffer_uptodate(bh);
1332
1333 if (!buffer_uptodate(bh)) {
1334 err = -EIO;
dfec8a14 1335 ll_rw_block(REQ_OP_READ, 0, 1, &bh);
ba7f7290
SW
1336 wait_on_buffer(bh);
1337 /* Uhhuh. Read error. Complain and punt. */
1338 if (!buffer_uptodate(bh))
1339 goto unlock;
1875f2f3 1340 err = 0;
ba7f7290
SW
1341 }
1342
845802b1 1343 if (gfs2_is_jdata(ip))
350a9b0a 1344 gfs2_trans_add_data(ip->i_gl, bh);
845802b1
AG
1345 else
1346 gfs2_ordered_add_inode(ip);
ba7f7290 1347
eebd2aa3 1348 zero_user(page, offset, length);
40bc9a27 1349 mark_buffer_dirty(bh);
ba7f7290
SW
1350unlock:
1351 unlock_page(page);
09cbfeaf 1352 put_page(page);
ba7f7290
SW
1353 return err;
1354}
1355
c62baf65
FF
1356#define GFS2_JTRUNC_REVOKES 8192
1357
fa731fc4
SW
1358/**
1359 * gfs2_journaled_truncate - Wrapper for truncate_pagecache for jdata files
1360 * @inode: The inode being truncated
1361 * @oldsize: The original (larger) size
1362 * @newsize: The new smaller size
1363 *
1364 * With jdata files, we have to journal a revoke for each block which is
1365 * truncated. As a result, we need to split this into separate transactions
1366 * if the number of pages being truncated gets too large.
1367 */
1368
fa731fc4
SW
1369static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize)
1370{
1371 struct gfs2_sbd *sdp = GFS2_SB(inode);
1372 u64 max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
1373 u64 chunk;
1374 int error;
1375
1376 while (oldsize != newsize) {
e7fdf004
AG
1377 struct gfs2_trans *tr;
1378 unsigned int offs;
1379
fa731fc4
SW
1380 chunk = oldsize - newsize;
1381 if (chunk > max_chunk)
1382 chunk = max_chunk;
e7fdf004
AG
1383
1384 offs = oldsize & ~PAGE_MASK;
1385 if (offs && chunk > PAGE_SIZE)
1386 chunk = offs + ((chunk - offs) & PAGE_MASK);
1387
7caef267 1388 truncate_pagecache(inode, oldsize - chunk);
fa731fc4 1389 oldsize -= chunk;
e7fdf004
AG
1390
1391 tr = current->journal_info;
1392 if (!test_bit(TR_TOUCHED, &tr->tr_flags))
1393 continue;
1394
fa731fc4
SW
1395 gfs2_trans_end(sdp);
1396 error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
1397 if (error)
1398 return error;
1399 }
1400
1401 return 0;
1402}
1403
8b5860a3 1404static int trunc_start(struct inode *inode, u64 newsize)
b3b94faa 1405{
ff8f33c8
SW
1406 struct gfs2_inode *ip = GFS2_I(inode);
1407 struct gfs2_sbd *sdp = GFS2_SB(inode);
80990f40 1408 struct buffer_head *dibh = NULL;
b3b94faa 1409 int journaled = gfs2_is_jdata(ip);
8b5860a3 1410 u64 oldsize = inode->i_size;
b3b94faa
DT
1411 int error;
1412
fa731fc4
SW
1413 if (journaled)
1414 error = gfs2_trans_begin(sdp, RES_DINODE + RES_JDATA, GFS2_JTRUNC_REVOKES);
1415 else
1416 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
b3b94faa
DT
1417 if (error)
1418 return error;
1419
1420 error = gfs2_meta_inode_buffer(ip, &dibh);
1421 if (error)
1422 goto out;
1423
350a9b0a 1424 gfs2_trans_add_meta(ip->i_gl, dibh);
ff8f33c8 1425
b3b94faa 1426 if (gfs2_is_stuffed(ip)) {
ff8f33c8 1427 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize);
b3b94faa 1428 } else {
bdba0d5e
AG
1429 unsigned int blocksize = i_blocksize(inode);
1430 unsigned int offs = newsize & (blocksize - 1);
1431 if (offs) {
1432 error = gfs2_block_zero_range(inode, newsize,
1433 blocksize - offs);
ff8f33c8 1434 if (error)
80990f40 1435 goto out;
b3b94faa 1436 }
ff8f33c8 1437 ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG;
b3b94faa
DT
1438 }
1439
ff8f33c8 1440 i_size_write(inode, newsize);
078cd827 1441 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
ff8f33c8 1442 gfs2_dinode_out(ip, dibh->b_data);
b3b94faa 1443
fa731fc4
SW
1444 if (journaled)
1445 error = gfs2_journaled_truncate(inode, oldsize, newsize);
1446 else
7caef267 1447 truncate_pagecache(inode, newsize);
fa731fc4 1448
a91ea69f 1449out:
80990f40
AG
1450 brelse(dibh);
1451 if (current->journal_info)
1452 gfs2_trans_end(sdp);
b3b94faa
DT
1453 return error;
1454}
1455
628e366d
AG
1456int gfs2_iomap_get_alloc(struct inode *inode, loff_t pos, loff_t length,
1457 struct iomap *iomap)
1458{
1459 struct metapath mp = { .mp_aheight = 1, };
1460 int ret;
1461
1462 ret = gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, iomap, &mp);
1463 if (!ret && iomap->type == IOMAP_HOLE)
1464 ret = gfs2_iomap_alloc(inode, iomap, IOMAP_WRITE, &mp);
1465 release_metapath(&mp);
1466 return ret;
1467}
1468
d552a2b9
BP
1469/**
1470 * sweep_bh_for_rgrps - find an rgrp in a meta buffer and free blocks therein
1471 * @ip: inode
1472 * @rg_gh: holder of resource group glock
5cf26b1e
AG
1473 * @bh: buffer head to sweep
1474 * @start: starting point in bh
1475 * @end: end point in bh
1476 * @meta: true if bh points to metadata (rather than data)
d552a2b9 1477 * @btotal: place to keep count of total blocks freed
d552a2b9
BP
1478 *
1479 * We sweep a metadata buffer (provided by the metapath) for blocks we need to
1480 * free, and free them all. However, we do it one rgrp at a time. If this
1481 * block has references to multiple rgrps, we break it into individual
1482 * transactions. This allows other processes to use the rgrps while we're
1483 * focused on a single one, for better concurrency / performance.
1484 * At every transaction boundary, we rewrite the inode into the journal.
1485 * That way the bitmaps are kept consistent with the inode and we can recover
1486 * if we're interrupted by power-outages.
1487 *
1488 * Returns: 0, or return code if an error occurred.
1489 * *btotal has the total number of blocks freed
1490 */
1491static int sweep_bh_for_rgrps(struct gfs2_inode *ip, struct gfs2_holder *rd_gh,
5cf26b1e
AG
1492 struct buffer_head *bh, __be64 *start, __be64 *end,
1493 bool meta, u32 *btotal)
b3b94faa 1494{
9b8c81d1 1495 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
d552a2b9
BP
1496 struct gfs2_rgrpd *rgd;
1497 struct gfs2_trans *tr;
5cf26b1e 1498 __be64 *p;
d552a2b9
BP
1499 int blks_outside_rgrp;
1500 u64 bn, bstart, isize_blks;
1501 s64 blen; /* needs to be s64 or gfs2_add_inode_blocks breaks */
d552a2b9
BP
1502 int ret = 0;
1503 bool buf_in_tr = false; /* buffer was added to transaction */
1504
d552a2b9 1505more_rgrps:
5cf26b1e
AG
1506 rgd = NULL;
1507 if (gfs2_holder_initialized(rd_gh)) {
1508 rgd = gfs2_glock2rgrp(rd_gh->gh_gl);
1509 gfs2_assert_withdraw(sdp,
1510 gfs2_glock_is_locked_by_me(rd_gh->gh_gl));
1511 }
d552a2b9
BP
1512 blks_outside_rgrp = 0;
1513 bstart = 0;
1514 blen = 0;
d552a2b9 1515
5cf26b1e 1516 for (p = start; p < end; p++) {
d552a2b9
BP
1517 if (!*p)
1518 continue;
1519 bn = be64_to_cpu(*p);
5cf26b1e
AG
1520
1521 if (rgd) {
1522 if (!rgrp_contains_block(rgd, bn)) {
1523 blks_outside_rgrp++;
1524 continue;
1525 }
d552a2b9 1526 } else {
90bcab99 1527 rgd = gfs2_blk2rgrpd(sdp, bn, true);
5cf26b1e
AG
1528 if (unlikely(!rgd)) {
1529 ret = -EIO;
1530 goto out;
1531 }
d552a2b9
BP
1532 ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
1533 0, rd_gh);
1534 if (ret)
1535 goto out;
1536
1537 /* Must be done with the rgrp glock held: */
1538 if (gfs2_rs_active(&ip->i_res) &&
1539 rgd == ip->i_res.rs_rbm.rgd)
1540 gfs2_rs_deltree(&ip->i_res);
1541 }
1542
d552a2b9
BP
1543 /* The size of our transactions will be unknown until we
1544 actually process all the metadata blocks that relate to
1545 the rgrp. So we estimate. We know it can't be more than
1546 the dinode's i_blocks and we don't want to exceed the
1547 journal flush threshold, sd_log_thresh2. */
1548 if (current->journal_info == NULL) {
1549 unsigned int jblocks_rqsted, revokes;
1550
1551 jblocks_rqsted = rgd->rd_length + RES_DINODE +
1552 RES_INDIRECT;
1553 isize_blks = gfs2_get_inode_blocks(&ip->i_inode);
1554 if (isize_blks > atomic_read(&sdp->sd_log_thresh2))
1555 jblocks_rqsted +=
1556 atomic_read(&sdp->sd_log_thresh2);
1557 else
1558 jblocks_rqsted += isize_blks;
1559 revokes = jblocks_rqsted;
1560 if (meta)
5cf26b1e 1561 revokes += end - start;
d552a2b9
BP
1562 else if (ip->i_depth)
1563 revokes += sdp->sd_inptrs;
1564 ret = gfs2_trans_begin(sdp, jblocks_rqsted, revokes);
1565 if (ret)
1566 goto out_unlock;
1567 down_write(&ip->i_rw_mutex);
1568 }
1569 /* check if we will exceed the transaction blocks requested */
1570 tr = current->journal_info;
1571 if (tr->tr_num_buf_new + RES_STATFS +
1572 RES_QUOTA >= atomic_read(&sdp->sd_log_thresh2)) {
1573 /* We set blks_outside_rgrp to ensure the loop will
1574 be repeated for the same rgrp, but with a new
1575 transaction. */
1576 blks_outside_rgrp++;
1577 /* This next part is tricky. If the buffer was added
1578 to the transaction, we've already set some block
1579 pointers to 0, so we better follow through and free
1580 them, or we will introduce corruption (so break).
1581 This may be impossible, or at least rare, but I
1582 decided to cover the case regardless.
1583
1584 If the buffer was not added to the transaction
1585 (this call), doing so would exceed our transaction
1586 size, so we need to end the transaction and start a
1587 new one (so goto). */
1588
1589 if (buf_in_tr)
1590 break;
1591 goto out_unlock;
1592 }
1593
1594 gfs2_trans_add_meta(ip->i_gl, bh);
1595 buf_in_tr = true;
1596 *p = 0;
1597 if (bstart + blen == bn) {
1598 blen++;
1599 continue;
1600 }
1601 if (bstart) {
0ddeded4 1602 __gfs2_free_blocks(ip, rgd, bstart, (u32)blen, meta);
d552a2b9
BP
1603 (*btotal) += blen;
1604 gfs2_add_inode_blocks(&ip->i_inode, -blen);
1605 }
1606 bstart = bn;
1607 blen = 1;
1608 }
1609 if (bstart) {
0ddeded4 1610 __gfs2_free_blocks(ip, rgd, bstart, (u32)blen, meta);
d552a2b9
BP
1611 (*btotal) += blen;
1612 gfs2_add_inode_blocks(&ip->i_inode, -blen);
1613 }
1614out_unlock:
1615 if (!ret && blks_outside_rgrp) { /* If buffer still has non-zero blocks
1616 outside the rgrp we just processed,
1617 do it all over again. */
1618 if (current->journal_info) {
5cf26b1e
AG
1619 struct buffer_head *dibh;
1620
1621 ret = gfs2_meta_inode_buffer(ip, &dibh);
1622 if (ret)
1623 goto out;
d552a2b9
BP
1624
1625 /* Every transaction boundary, we rewrite the dinode
1626 to keep its di_blocks current in case of failure. */
1627 ip->i_inode.i_mtime = ip->i_inode.i_ctime =
b32c8c76 1628 current_time(&ip->i_inode);
d552a2b9
BP
1629 gfs2_trans_add_meta(ip->i_gl, dibh);
1630 gfs2_dinode_out(ip, dibh->b_data);
5cf26b1e 1631 brelse(dibh);
d552a2b9
BP
1632 up_write(&ip->i_rw_mutex);
1633 gfs2_trans_end(sdp);
1634 }
1635 gfs2_glock_dq_uninit(rd_gh);
1636 cond_resched();
1637 goto more_rgrps;
1638 }
1639out:
1640 return ret;
1641}
1642
10d2cf94
AG
1643static bool mp_eq_to_hgt(struct metapath *mp, __u16 *list, unsigned int h)
1644{
1645 if (memcmp(mp->mp_list, list, h * sizeof(mp->mp_list[0])))
1646 return false;
1647 return true;
1648}
1649
d552a2b9
BP
1650/**
1651 * find_nonnull_ptr - find a non-null pointer given a metapath and height
d552a2b9
BP
1652 * @mp: starting metapath
1653 * @h: desired height to search
1654 *
10d2cf94 1655 * Assumes the metapath is valid (with buffers) out to height h.
d552a2b9
BP
1656 * Returns: true if a non-null pointer was found in the metapath buffer
1657 * false if all remaining pointers are NULL in the buffer
1658 */
1659static bool find_nonnull_ptr(struct gfs2_sbd *sdp, struct metapath *mp,
10d2cf94
AG
1660 unsigned int h,
1661 __u16 *end_list, unsigned int end_aligned)
d552a2b9 1662{
10d2cf94
AG
1663 struct buffer_head *bh = mp->mp_bh[h];
1664 __be64 *first, *ptr, *end;
1665
1666 first = metaptr1(h, mp);
1667 ptr = first + mp->mp_list[h];
1668 end = (__be64 *)(bh->b_data + bh->b_size);
1669 if (end_list && mp_eq_to_hgt(mp, end_list, h)) {
1670 bool keep_end = h < end_aligned;
1671 end = first + end_list[h] + keep_end;
1672 }
d552a2b9 1673
10d2cf94 1674 while (ptr < end) {
c4a9d189 1675 if (*ptr) { /* if we have a non-null pointer */
10d2cf94 1676 mp->mp_list[h] = ptr - first;
c4a9d189
BP
1677 h++;
1678 if (h < GFS2_MAX_META_HEIGHT)
10d2cf94 1679 mp->mp_list[h] = 0;
d552a2b9 1680 return true;
c4a9d189 1681 }
10d2cf94 1682 ptr++;
d552a2b9 1683 }
10d2cf94 1684 return false;
d552a2b9
BP
1685}
1686
1687enum dealloc_states {
1688 DEALLOC_MP_FULL = 0, /* Strip a metapath with all buffers read in */
1689 DEALLOC_MP_LOWER = 1, /* lower the metapath strip height */
1690 DEALLOC_FILL_MP = 2, /* Fill in the metapath to the given height. */
1691 DEALLOC_DONE = 3, /* process complete */
1692};
b3b94faa 1693
5cf26b1e
AG
1694static inline void
1695metapointer_range(struct metapath *mp, int height,
1696 __u16 *start_list, unsigned int start_aligned,
10d2cf94 1697 __u16 *end_list, unsigned int end_aligned,
5cf26b1e
AG
1698 __be64 **start, __be64 **end)
1699{
1700 struct buffer_head *bh = mp->mp_bh[height];
1701 __be64 *first;
1702
1703 first = metaptr1(height, mp);
1704 *start = first;
1705 if (mp_eq_to_hgt(mp, start_list, height)) {
1706 bool keep_start = height < start_aligned;
1707 *start = first + start_list[height] + keep_start;
1708 }
1709 *end = (__be64 *)(bh->b_data + bh->b_size);
10d2cf94
AG
1710 if (end_list && mp_eq_to_hgt(mp, end_list, height)) {
1711 bool keep_end = height < end_aligned;
1712 *end = first + end_list[height] + keep_end;
1713 }
1714}
1715
1716static inline bool walk_done(struct gfs2_sbd *sdp,
1717 struct metapath *mp, int height,
1718 __u16 *end_list, unsigned int end_aligned)
1719{
1720 __u16 end;
1721
1722 if (end_list) {
1723 bool keep_end = height < end_aligned;
1724 if (!mp_eq_to_hgt(mp, end_list, height))
1725 return false;
1726 end = end_list[height] + keep_end;
1727 } else
1728 end = (height > 0) ? sdp->sd_inptrs : sdp->sd_diptrs;
1729 return mp->mp_list[height] >= end;
5cf26b1e
AG
1730}
1731
d552a2b9 1732/**
10d2cf94 1733 * punch_hole - deallocate blocks in a file
d552a2b9 1734 * @ip: inode to truncate
10d2cf94
AG
1735 * @offset: the start of the hole
1736 * @length: the size of the hole (or 0 for truncate)
1737 *
1738 * Punch a hole into a file or truncate a file at a given position. This
1739 * function operates in whole blocks (@offset and @length are rounded
1740 * accordingly); partially filled blocks must be cleared otherwise.
d552a2b9 1741 *
10d2cf94
AG
1742 * This function works from the bottom up, and from the right to the left. In
1743 * other words, it strips off the highest layer (data) before stripping any of
1744 * the metadata. Doing it this way is best in case the operation is interrupted
1745 * by power failure, etc. The dinode is rewritten in every transaction to
1746 * guarantee integrity.
d552a2b9 1747 */
10d2cf94 1748static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length)
d552a2b9
BP
1749{
1750 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
bb491ce6 1751 u64 maxsize = sdp->sd_heightsize[ip->i_height];
10d2cf94 1752 struct metapath mp = {};
d552a2b9
BP
1753 struct buffer_head *dibh, *bh;
1754 struct gfs2_holder rd_gh;
cb7f0903 1755 unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift;
10d2cf94
AG
1756 u64 lblock = (offset + (1 << bsize_shift) - 1) >> bsize_shift;
1757 __u16 start_list[GFS2_MAX_META_HEIGHT];
1758 __u16 __end_list[GFS2_MAX_META_HEIGHT], *end_list = NULL;
4e56a641 1759 unsigned int start_aligned, uninitialized_var(end_aligned);
d552a2b9
BP
1760 unsigned int strip_h = ip->i_height - 1;
1761 u32 btotal = 0;
1762 int ret, state;
1763 int mp_h; /* metapath buffers are read in to this height */
d552a2b9 1764 u64 prev_bnr = 0;
5cf26b1e 1765 __be64 *start, *end;
b3b94faa 1766
bb491ce6
AG
1767 if (offset >= maxsize) {
1768 /*
1769 * The starting point lies beyond the allocated meta-data;
1770 * there are no blocks do deallocate.
1771 */
1772 return 0;
1773 }
1774
10d2cf94
AG
1775 /*
1776 * The start position of the hole is defined by lblock, start_list, and
1777 * start_aligned. The end position of the hole is defined by lend,
1778 * end_list, and end_aligned.
1779 *
1780 * start_aligned and end_aligned define down to which height the start
1781 * and end positions are aligned to the metadata tree (i.e., the
1782 * position is a multiple of the metadata granularity at the height
1783 * above). This determines at which heights additional meta pointers
1784 * needs to be preserved for the remaining data.
1785 */
b3b94faa 1786
10d2cf94 1787 if (length) {
10d2cf94
AG
1788 u64 end_offset = offset + length;
1789 u64 lend;
1790
1791 /*
1792 * Clip the end at the maximum file size for the given height:
1793 * that's how far the metadata goes; files bigger than that
1794 * will have additional layers of indirection.
1795 */
1796 if (end_offset > maxsize)
1797 end_offset = maxsize;
1798 lend = end_offset >> bsize_shift;
1799
1800 if (lblock >= lend)
1801 return 0;
1802
1803 find_metapath(sdp, lend, &mp, ip->i_height);
1804 end_list = __end_list;
1805 memcpy(end_list, mp.mp_list, sizeof(mp.mp_list));
1806
1807 for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) {
1808 if (end_list[mp_h])
1809 break;
1810 }
1811 end_aligned = mp_h;
1812 }
1813
1814 find_metapath(sdp, lblock, &mp, ip->i_height);
cb7f0903
AG
1815 memcpy(start_list, mp.mp_list, sizeof(start_list));
1816
cb7f0903
AG
1817 for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) {
1818 if (start_list[mp_h])
1819 break;
1820 }
1821 start_aligned = mp_h;
d552a2b9
BP
1822
1823 ret = gfs2_meta_inode_buffer(ip, &dibh);
1824 if (ret)
1825 return ret;
b3b94faa 1826
d552a2b9
BP
1827 mp.mp_bh[0] = dibh;
1828 ret = lookup_metapath(ip, &mp);
e8b43fe0
AG
1829 if (ret)
1830 goto out_metapath;
c3ce5aa9
AG
1831
1832 /* issue read-ahead on metadata */
5cf26b1e
AG
1833 for (mp_h = 0; mp_h < mp.mp_aheight - 1; mp_h++) {
1834 metapointer_range(&mp, mp_h, start_list, start_aligned,
10d2cf94 1835 end_list, end_aligned, &start, &end);
5cf26b1e
AG
1836 gfs2_metapath_ra(ip->i_gl, start, end);
1837 }
c3ce5aa9 1838
e8b43fe0 1839 if (mp.mp_aheight == ip->i_height)
d552a2b9
BP
1840 state = DEALLOC_MP_FULL; /* We have a complete metapath */
1841 else
1842 state = DEALLOC_FILL_MP; /* deal with partial metapath */
b3b94faa 1843
d552a2b9
BP
1844 ret = gfs2_rindex_update(sdp);
1845 if (ret)
1846 goto out_metapath;
1847
1848 ret = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
1849 if (ret)
1850 goto out_metapath;
1851 gfs2_holder_mark_uninitialized(&rd_gh);
1852
1853 mp_h = strip_h;
1854
1855 while (state != DEALLOC_DONE) {
1856 switch (state) {
1857 /* Truncate a full metapath at the given strip height.
1858 * Note that strip_h == mp_h in order to be in this state. */
1859 case DEALLOC_MP_FULL:
d552a2b9
BP
1860 bh = mp.mp_bh[mp_h];
1861 gfs2_assert_withdraw(sdp, bh);
1862 if (gfs2_assert_withdraw(sdp,
1863 prev_bnr != bh->b_blocknr)) {
1864 printk(KERN_EMERG "GFS2: fsid=%s:inode %llu, "
1865 "block:%llu, i_h:%u, s_h:%u, mp_h:%u\n",
1866 sdp->sd_fsname,
1867 (unsigned long long)ip->i_no_addr,
1868 prev_bnr, ip->i_height, strip_h, mp_h);
1869 }
1870 prev_bnr = bh->b_blocknr;
cb7f0903 1871
5cf26b1e
AG
1872 if (gfs2_metatype_check(sdp, bh,
1873 (mp_h ? GFS2_METATYPE_IN :
1874 GFS2_METATYPE_DI))) {
1875 ret = -EIO;
1876 goto out;
1877 }
1878
10d2cf94
AG
1879 /*
1880 * Below, passing end_aligned as 0 gives us the
1881 * metapointer range excluding the end point: the end
1882 * point is the first metapath we must not deallocate!
1883 */
1884
5cf26b1e 1885 metapointer_range(&mp, mp_h, start_list, start_aligned,
10d2cf94 1886 end_list, 0 /* end_aligned */,
5cf26b1e
AG
1887 &start, &end);
1888 ret = sweep_bh_for_rgrps(ip, &rd_gh, mp.mp_bh[mp_h],
1889 start, end,
1890 mp_h != ip->i_height - 1,
1891 &btotal);
cb7f0903 1892
d552a2b9
BP
1893 /* If we hit an error or just swept dinode buffer,
1894 just exit. */
1895 if (ret || !mp_h) {
1896 state = DEALLOC_DONE;
1897 break;
1898 }
1899 state = DEALLOC_MP_LOWER;
1900 break;
1901
1902 /* lower the metapath strip height */
1903 case DEALLOC_MP_LOWER:
1904 /* We're done with the current buffer, so release it,
1905 unless it's the dinode buffer. Then back up to the
1906 previous pointer. */
1907 if (mp_h) {
1908 brelse(mp.mp_bh[mp_h]);
1909 mp.mp_bh[mp_h] = NULL;
1910 }
1911 /* If we can't get any lower in height, we've stripped
1912 off all we can. Next step is to back up and start
1913 stripping the previous level of metadata. */
1914 if (mp_h == 0) {
1915 strip_h--;
cb7f0903 1916 memcpy(mp.mp_list, start_list, sizeof(start_list));
d552a2b9
BP
1917 mp_h = strip_h;
1918 state = DEALLOC_FILL_MP;
1919 break;
1920 }
1921 mp.mp_list[mp_h] = 0;
1922 mp_h--; /* search one metadata height down */
d552a2b9 1923 mp.mp_list[mp_h]++;
10d2cf94
AG
1924 if (walk_done(sdp, &mp, mp_h, end_list, end_aligned))
1925 break;
d552a2b9
BP
1926 /* Here we've found a part of the metapath that is not
1927 * allocated. We need to search at that height for the
1928 * next non-null pointer. */
10d2cf94 1929 if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned)) {
d552a2b9
BP
1930 state = DEALLOC_FILL_MP;
1931 mp_h++;
1932 }
1933 /* No more non-null pointers at this height. Back up
1934 to the previous height and try again. */
1935 break; /* loop around in the same state */
1936
1937 /* Fill the metapath with buffers to the given height. */
1938 case DEALLOC_FILL_MP:
1939 /* Fill the buffers out to the current height. */
1940 ret = fillup_metapath(ip, &mp, mp_h);
c3ce5aa9 1941 if (ret < 0)
d552a2b9 1942 goto out;
c3ce5aa9 1943
e7445ced
AG
1944 /* On the first pass, issue read-ahead on metadata. */
1945 if (mp.mp_aheight > 1 && strip_h == ip->i_height - 1) {
1946 unsigned int height = mp.mp_aheight - 1;
1947
1948 /* No read-ahead for data blocks. */
1949 if (mp.mp_aheight - 1 == strip_h)
1950 height--;
1951
1952 for (; height >= mp.mp_aheight - ret; height--) {
1953 metapointer_range(&mp, height,
5cf26b1e 1954 start_list, start_aligned,
10d2cf94 1955 end_list, end_aligned,
5cf26b1e
AG
1956 &start, &end);
1957 gfs2_metapath_ra(ip->i_gl, start, end);
1958 }
c3ce5aa9 1959 }
d552a2b9
BP
1960
1961 /* If buffers found for the entire strip height */
e8b43fe0 1962 if (mp.mp_aheight - 1 == strip_h) {
d552a2b9
BP
1963 state = DEALLOC_MP_FULL;
1964 break;
1965 }
e8b43fe0
AG
1966 if (mp.mp_aheight < ip->i_height) /* We have a partial height */
1967 mp_h = mp.mp_aheight - 1;
d552a2b9
BP
1968
1969 /* If we find a non-null block pointer, crawl a bit
1970 higher up in the metapath and try again, otherwise
1971 we need to look lower for a new starting point. */
10d2cf94 1972 if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned))
d552a2b9
BP
1973 mp_h++;
1974 else
1975 state = DEALLOC_MP_LOWER;
b3b94faa 1976 break;
d552a2b9 1977 }
b3b94faa
DT
1978 }
1979
d552a2b9
BP
1980 if (btotal) {
1981 if (current->journal_info == NULL) {
1982 ret = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS +
1983 RES_QUOTA, 0);
1984 if (ret)
1985 goto out;
1986 down_write(&ip->i_rw_mutex);
1987 }
1988 gfs2_statfs_change(sdp, 0, +btotal, 0);
1989 gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid,
1990 ip->i_inode.i_gid);
b32c8c76 1991 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
d552a2b9
BP
1992 gfs2_trans_add_meta(ip->i_gl, dibh);
1993 gfs2_dinode_out(ip, dibh->b_data);
1994 up_write(&ip->i_rw_mutex);
1995 gfs2_trans_end(sdp);
1996 }
b3b94faa 1997
d552a2b9
BP
1998out:
1999 if (gfs2_holder_initialized(&rd_gh))
2000 gfs2_glock_dq_uninit(&rd_gh);
2001 if (current->journal_info) {
2002 up_write(&ip->i_rw_mutex);
2003 gfs2_trans_end(sdp);
2004 cond_resched();
2005 }
2006 gfs2_quota_unhold(ip);
2007out_metapath:
2008 release_metapath(&mp);
2009 return ret;
b3b94faa
DT
2010}
2011
2012static int trunc_end(struct gfs2_inode *ip)
2013{
feaa7bba 2014 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
b3b94faa
DT
2015 struct buffer_head *dibh;
2016 int error;
2017
2018 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
2019 if (error)
2020 return error;
2021
2022 down_write(&ip->i_rw_mutex);
2023
2024 error = gfs2_meta_inode_buffer(ip, &dibh);
2025 if (error)
2026 goto out;
2027
a2e0f799 2028 if (!i_size_read(&ip->i_inode)) {
ecc30c79 2029 ip->i_height = 0;
ce276b06 2030 ip->i_goal = ip->i_no_addr;
b3b94faa 2031 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
45138990 2032 gfs2_ordered_del_inode(ip);
b3b94faa 2033 }
078cd827 2034 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
383f01fb 2035 ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG;
b3b94faa 2036
350a9b0a 2037 gfs2_trans_add_meta(ip->i_gl, dibh);
539e5d6b 2038 gfs2_dinode_out(ip, dibh->b_data);
b3b94faa
DT
2039 brelse(dibh);
2040
a91ea69f 2041out:
b3b94faa 2042 up_write(&ip->i_rw_mutex);
b3b94faa 2043 gfs2_trans_end(sdp);
b3b94faa
DT
2044 return error;
2045}
2046
2047/**
2048 * do_shrink - make a file smaller
ff8f33c8 2049 * @inode: the inode
ff8f33c8 2050 * @newsize: the size to make the file
b3b94faa 2051 *
ff8f33c8
SW
2052 * Called with an exclusive lock on @inode. The @size must
2053 * be equal to or smaller than the current inode size.
b3b94faa
DT
2054 *
2055 * Returns: errno
2056 */
2057
8b5860a3 2058static int do_shrink(struct inode *inode, u64 newsize)
b3b94faa 2059{
ff8f33c8 2060 struct gfs2_inode *ip = GFS2_I(inode);
b3b94faa
DT
2061 int error;
2062
8b5860a3 2063 error = trunc_start(inode, newsize);
b3b94faa
DT
2064 if (error < 0)
2065 return error;
ff8f33c8 2066 if (gfs2_is_stuffed(ip))
b3b94faa
DT
2067 return 0;
2068
10d2cf94 2069 error = punch_hole(ip, newsize, 0);
ff8f33c8 2070 if (error == 0)
b3b94faa
DT
2071 error = trunc_end(ip);
2072
2073 return error;
2074}
2075
ff8f33c8 2076void gfs2_trim_blocks(struct inode *inode)
a13b8c5f 2077{
ff8f33c8
SW
2078 int ret;
2079
8b5860a3 2080 ret = do_shrink(inode, inode->i_size);
ff8f33c8
SW
2081 WARN_ON(ret != 0);
2082}
2083
2084/**
2085 * do_grow - Touch and update inode size
2086 * @inode: The inode
2087 * @size: The new size
2088 *
2089 * This function updates the timestamps on the inode and
2090 * may also increase the size of the inode. This function
2091 * must not be called with @size any smaller than the current
2092 * inode size.
2093 *
2094 * Although it is not strictly required to unstuff files here,
2095 * earlier versions of GFS2 have a bug in the stuffed file reading
2096 * code which will result in a buffer overrun if the size is larger
2097 * than the max stuffed file size. In order to prevent this from
25985edc 2098 * occurring, such files are unstuffed, but in other cases we can
ff8f33c8
SW
2099 * just update the inode size directly.
2100 *
2101 * Returns: 0 on success, or -ve on error
2102 */
2103
2104static int do_grow(struct inode *inode, u64 size)
2105{
2106 struct gfs2_inode *ip = GFS2_I(inode);
2107 struct gfs2_sbd *sdp = GFS2_SB(inode);
7b9cff46 2108 struct gfs2_alloc_parms ap = { .target = 1, };
a13b8c5f
WC
2109 struct buffer_head *dibh;
2110 int error;
2f7ee358 2111 int unstuff = 0;
a13b8c5f 2112
235628c5 2113 if (gfs2_is_stuffed(ip) && size > gfs2_max_stuffed_size(ip)) {
b8fbf471 2114 error = gfs2_quota_lock_check(ip, &ap);
ff8f33c8 2115 if (error)
5407e242 2116 return error;
ff8f33c8 2117
7b9cff46 2118 error = gfs2_inplace_reserve(ip, &ap);
ff8f33c8
SW
2119 if (error)
2120 goto do_grow_qunlock;
2f7ee358 2121 unstuff = 1;
ff8f33c8
SW
2122 }
2123
a01aedfe 2124 error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT +
bc020561
BP
2125 (unstuff &&
2126 gfs2_is_jdata(ip) ? RES_JDATA : 0) +
a01aedfe
BP
2127 (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF ?
2128 0 : RES_QUOTA), 0);
a13b8c5f 2129 if (error)
ff8f33c8 2130 goto do_grow_release;
a13b8c5f 2131
2f7ee358 2132 if (unstuff) {
ff8f33c8
SW
2133 error = gfs2_unstuff_dinode(ip, NULL);
2134 if (error)
2135 goto do_end_trans;
2136 }
a13b8c5f
WC
2137
2138 error = gfs2_meta_inode_buffer(ip, &dibh);
2139 if (error)
ff8f33c8 2140 goto do_end_trans;
a13b8c5f 2141
ff8f33c8 2142 i_size_write(inode, size);
078cd827 2143 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
350a9b0a 2144 gfs2_trans_add_meta(ip->i_gl, dibh);
a13b8c5f
WC
2145 gfs2_dinode_out(ip, dibh->b_data);
2146 brelse(dibh);
2147
ff8f33c8 2148do_end_trans:
a13b8c5f 2149 gfs2_trans_end(sdp);
ff8f33c8 2150do_grow_release:
2f7ee358 2151 if (unstuff) {
ff8f33c8
SW
2152 gfs2_inplace_release(ip);
2153do_grow_qunlock:
2154 gfs2_quota_unlock(ip);
ff8f33c8 2155 }
a13b8c5f
WC
2156 return error;
2157}
2158
b3b94faa 2159/**
ff8f33c8
SW
2160 * gfs2_setattr_size - make a file a given size
2161 * @inode: the inode
2162 * @newsize: the size to make the file
b3b94faa 2163 *
ff8f33c8 2164 * The file size can grow, shrink, or stay the same size. This
3e7aafc3 2165 * is called holding i_rwsem and an exclusive glock on the inode
ff8f33c8 2166 * in question.
b3b94faa
DT
2167 *
2168 * Returns: errno
2169 */
2170
ff8f33c8 2171int gfs2_setattr_size(struct inode *inode, u64 newsize)
b3b94faa 2172{
af5c2697 2173 struct gfs2_inode *ip = GFS2_I(inode);
ff8f33c8 2174 int ret;
b3b94faa 2175
ff8f33c8 2176 BUG_ON(!S_ISREG(inode->i_mode));
b3b94faa 2177
ff8f33c8
SW
2178 ret = inode_newsize_ok(inode, newsize);
2179 if (ret)
2180 return ret;
b3b94faa 2181
562c72aa
CH
2182 inode_dio_wait(inode);
2183
b54e9a0b 2184 ret = gfs2_rsqa_alloc(ip);
d2b47cfb 2185 if (ret)
2b3dcf35 2186 goto out;
d2b47cfb 2187
8b5860a3 2188 if (newsize >= inode->i_size) {
2b3dcf35
BP
2189 ret = do_grow(inode, newsize);
2190 goto out;
2191 }
ff8f33c8 2192
8b5860a3 2193 ret = do_shrink(inode, newsize);
2b3dcf35 2194out:
a097dc7e 2195 gfs2_rsqa_delete(ip, NULL);
2b3dcf35 2196 return ret;
b3b94faa
DT
2197}
2198
2199int gfs2_truncatei_resume(struct gfs2_inode *ip)
2200{
2201 int error;
10d2cf94 2202 error = punch_hole(ip, i_size_read(&ip->i_inode), 0);
b3b94faa
DT
2203 if (!error)
2204 error = trunc_end(ip);
2205 return error;
2206}
2207
2208int gfs2_file_dealloc(struct gfs2_inode *ip)
2209{
10d2cf94 2210 return punch_hole(ip, 0, 0);
b3b94faa
DT
2211}
2212
b50f227b
SW
2213/**
2214 * gfs2_free_journal_extents - Free cached journal bmap info
2215 * @jd: The journal
2216 *
2217 */
2218
2219void gfs2_free_journal_extents(struct gfs2_jdesc *jd)
2220{
2221 struct gfs2_journal_extent *jext;
2222
2223 while(!list_empty(&jd->extent_list)) {
2224 jext = list_entry(jd->extent_list.next, struct gfs2_journal_extent, list);
2225 list_del(&jext->list);
2226 kfree(jext);
2227 }
2228}
2229
2230/**
2231 * gfs2_add_jextent - Add or merge a new extent to extent cache
2232 * @jd: The journal descriptor
2233 * @lblock: The logical block at start of new extent
c62baf65 2234 * @dblock: The physical block at start of new extent
b50f227b
SW
2235 * @blocks: Size of extent in fs blocks
2236 *
2237 * Returns: 0 on success or -ENOMEM
2238 */
2239
2240static int gfs2_add_jextent(struct gfs2_jdesc *jd, u64 lblock, u64 dblock, u64 blocks)
2241{
2242 struct gfs2_journal_extent *jext;
2243
2244 if (!list_empty(&jd->extent_list)) {
2245 jext = list_entry(jd->extent_list.prev, struct gfs2_journal_extent, list);
2246 if ((jext->dblock + jext->blocks) == dblock) {
2247 jext->blocks += blocks;
2248 return 0;
2249 }
2250 }
2251
2252 jext = kzalloc(sizeof(struct gfs2_journal_extent), GFP_NOFS);
2253 if (jext == NULL)
2254 return -ENOMEM;
2255 jext->dblock = dblock;
2256 jext->lblock = lblock;
2257 jext->blocks = blocks;
2258 list_add_tail(&jext->list, &jd->extent_list);
2259 jd->nr_extents++;
2260 return 0;
2261}
2262
2263/**
2264 * gfs2_map_journal_extents - Cache journal bmap info
2265 * @sdp: The super block
2266 * @jd: The journal to map
2267 *
2268 * Create a reusable "extent" mapping from all logical
2269 * blocks to all physical blocks for the given journal. This will save
2270 * us time when writing journal blocks. Most journals will have only one
2271 * extent that maps all their logical blocks. That's because gfs2.mkfs
2272 * arranges the journal blocks sequentially to maximize performance.
2273 * So the extent would map the first block for the entire file length.
2274 * However, gfs2_jadd can happen while file activity is happening, so
2275 * those journals may not be sequential. Less likely is the case where
2276 * the users created their own journals by mounting the metafs and
2277 * laying it out. But it's still possible. These journals might have
2278 * several extents.
2279 *
2280 * Returns: 0 on success, or error on failure
2281 */
2282
2283int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd)
2284{
2285 u64 lblock = 0;
2286 u64 lblock_stop;
2287 struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
2288 struct buffer_head bh;
2289 unsigned int shift = sdp->sd_sb.sb_bsize_shift;
2290 u64 size;
2291 int rc;
98583b3e 2292 ktime_t start, end;
b50f227b 2293
98583b3e 2294 start = ktime_get();
b50f227b
SW
2295 lblock_stop = i_size_read(jd->jd_inode) >> shift;
2296 size = (lblock_stop - lblock) << shift;
2297 jd->nr_extents = 0;
2298 WARN_ON(!list_empty(&jd->extent_list));
2299
2300 do {
2301 bh.b_state = 0;
2302 bh.b_blocknr = 0;
2303 bh.b_size = size;
2304 rc = gfs2_block_map(jd->jd_inode, lblock, &bh, 0);
2305 if (rc || !buffer_mapped(&bh))
2306 goto fail;
2307 rc = gfs2_add_jextent(jd, lblock, bh.b_blocknr, bh.b_size >> shift);
2308 if (rc)
2309 goto fail;
2310 size -= bh.b_size;
2311 lblock += (bh.b_size >> ip->i_inode.i_blkbits);
2312 } while(size > 0);
2313
98583b3e
AD
2314 end = ktime_get();
2315 fs_info(sdp, "journal %d mapped with %u extents in %lldms\n", jd->jd_jid,
2316 jd->nr_extents, ktime_ms_delta(end, start));
b50f227b
SW
2317 return 0;
2318
2319fail:
2320 fs_warn(sdp, "error %d mapping journal %u at offset %llu (extent %u)\n",
2321 rc, jd->jd_jid,
2322 (unsigned long long)(i_size_read(jd->jd_inode) - size),
2323 jd->nr_extents);
2324 fs_warn(sdp, "bmap=%d lblock=%llu block=%llu, state=0x%08lx, size=%llu\n",
2325 rc, (unsigned long long)lblock, (unsigned long long)bh.b_blocknr,
2326 bh.b_state, (unsigned long long)bh.b_size);
2327 gfs2_free_journal_extents(jd);
2328 return rc;
2329}
2330
b3b94faa
DT
2331/**
2332 * gfs2_write_alloc_required - figure out if a write will require an allocation
2333 * @ip: the file being written to
2334 * @offset: the offset to write to
2335 * @len: the number of bytes being written
b3b94faa 2336 *
461cb419 2337 * Returns: 1 if an alloc is required, 0 otherwise
b3b94faa
DT
2338 */
2339
cd915493 2340int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
461cb419 2341 unsigned int len)
b3b94faa 2342{
feaa7bba 2343 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
941e6d7d
SW
2344 struct buffer_head bh;
2345 unsigned int shift;
2346 u64 lblock, lblock_stop, size;
7ed122e4 2347 u64 end_of_file;
b3b94faa 2348
b3b94faa
DT
2349 if (!len)
2350 return 0;
2351
2352 if (gfs2_is_stuffed(ip)) {
235628c5 2353 if (offset + len > gfs2_max_stuffed_size(ip))
461cb419 2354 return 1;
b3b94faa
DT
2355 return 0;
2356 }
2357
941e6d7d 2358 shift = sdp->sd_sb.sb_bsize_shift;
7ed122e4 2359 BUG_ON(gfs2_is_dir(ip));
a2e0f799 2360 end_of_file = (i_size_read(&ip->i_inode) + sdp->sd_sb.sb_bsize - 1) >> shift;
7ed122e4
SW
2361 lblock = offset >> shift;
2362 lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
77612578 2363 if (lblock_stop > end_of_file && ip != GFS2_I(sdp->sd_rindex))
461cb419 2364 return 1;
b3b94faa 2365
941e6d7d
SW
2366 size = (lblock_stop - lblock) << shift;
2367 do {
2368 bh.b_state = 0;
2369 bh.b_size = size;
2370 gfs2_block_map(&ip->i_inode, lblock, &bh, 0);
2371 if (!buffer_mapped(&bh))
461cb419 2372 return 1;
941e6d7d
SW
2373 size -= bh.b_size;
2374 lblock += (bh.b_size >> ip->i_inode.i_blkbits);
2375 } while(size > 0);
b3b94faa
DT
2376
2377 return 0;
2378}
2379
4e56a641
AG
2380static int stuffed_zero_range(struct inode *inode, loff_t offset, loff_t length)
2381{
2382 struct gfs2_inode *ip = GFS2_I(inode);
2383 struct buffer_head *dibh;
2384 int error;
2385
2386 if (offset >= inode->i_size)
2387 return 0;
2388 if (offset + length > inode->i_size)
2389 length = inode->i_size - offset;
2390
2391 error = gfs2_meta_inode_buffer(ip, &dibh);
2392 if (error)
2393 return error;
2394 gfs2_trans_add_meta(ip->i_gl, dibh);
2395 memset(dibh->b_data + sizeof(struct gfs2_dinode) + offset, 0,
2396 length);
2397 brelse(dibh);
2398 return 0;
2399}
2400
2401static int gfs2_journaled_truncate_range(struct inode *inode, loff_t offset,
2402 loff_t length)
2403{
2404 struct gfs2_sbd *sdp = GFS2_SB(inode);
2405 loff_t max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
2406 int error;
2407
2408 while (length) {
2409 struct gfs2_trans *tr;
2410 loff_t chunk;
2411 unsigned int offs;
2412
2413 chunk = length;
2414 if (chunk > max_chunk)
2415 chunk = max_chunk;
2416
2417 offs = offset & ~PAGE_MASK;
2418 if (offs && chunk > PAGE_SIZE)
2419 chunk = offs + ((chunk - offs) & PAGE_MASK);
2420
2421 truncate_pagecache_range(inode, offset, chunk);
2422 offset += chunk;
2423 length -= chunk;
2424
2425 tr = current->journal_info;
2426 if (!test_bit(TR_TOUCHED, &tr->tr_flags))
2427 continue;
2428
2429 gfs2_trans_end(sdp);
2430 error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
2431 if (error)
2432 return error;
2433 }
2434 return 0;
2435}
2436
2437int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length)
2438{
2439 struct inode *inode = file_inode(file);
2440 struct gfs2_inode *ip = GFS2_I(inode);
2441 struct gfs2_sbd *sdp = GFS2_SB(inode);
2442 int error;
2443
2444 if (gfs2_is_jdata(ip))
2445 error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_JDATA,
2446 GFS2_JTRUNC_REVOKES);
2447 else
2448 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
2449 if (error)
2450 return error;
2451
2452 if (gfs2_is_stuffed(ip)) {
2453 error = stuffed_zero_range(inode, offset, length);
2454 if (error)
2455 goto out;
2456 } else {
00251a16 2457 unsigned int start_off, end_len, blocksize;
4e56a641
AG
2458
2459 blocksize = i_blocksize(inode);
2460 start_off = offset & (blocksize - 1);
00251a16 2461 end_len = (offset + length) & (blocksize - 1);
4e56a641
AG
2462 if (start_off) {
2463 unsigned int len = length;
2464 if (length > blocksize - start_off)
2465 len = blocksize - start_off;
2466 error = gfs2_block_zero_range(inode, offset, len);
2467 if (error)
2468 goto out;
2469 if (start_off + length < blocksize)
00251a16 2470 end_len = 0;
4e56a641 2471 }
00251a16 2472 if (end_len) {
4e56a641 2473 error = gfs2_block_zero_range(inode,
00251a16 2474 offset + length - end_len, end_len);
4e56a641
AG
2475 if (error)
2476 goto out;
2477 }
2478 }
2479
2480 if (gfs2_is_jdata(ip)) {
2481 BUG_ON(!current->journal_info);
2482 gfs2_journaled_truncate_range(inode, offset, length);
2483 } else
2484 truncate_pagecache_range(inode, offset, offset + length - 1);
2485
2486 file_update_time(file);
2487 mark_inode_dirty(inode);
2488
2489 if (current->journal_info)
2490 gfs2_trans_end(sdp);
2491
2492 if (!gfs2_is_stuffed(ip))
2493 error = punch_hole(ip, offset, length);
2494
2495out:
2496 if (current->journal_info)
2497 gfs2_trans_end(sdp);
2498 return error;
2499}