]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - fs/gfs2/bmap.c
Linux 5.3-rc3
[mirror_ubuntu-jammy-kernel.git] / fs / gfs2 / bmap.c
CommitLineData
7336d0e6 1// SPDX-License-Identifier: GPL-2.0-only
b3b94faa
DT
2/*
3 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3a8a9a10 4 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
b3b94faa
DT
5 */
6
b3b94faa
DT
7#include <linux/spinlock.h>
8#include <linux/completion.h>
9#include <linux/buffer_head.h>
64dd153c 10#include <linux/blkdev.h>
5c676f6d 11#include <linux/gfs2_ondisk.h>
71b86f56 12#include <linux/crc32.h>
3974320c 13#include <linux/iomap.h>
98583b3e 14#include <linux/ktime.h>
b3b94faa
DT
15
16#include "gfs2.h"
5c676f6d 17#include "incore.h"
b3b94faa
DT
18#include "bmap.h"
19#include "glock.h"
20#include "inode.h"
b3b94faa 21#include "meta_io.h"
b3b94faa
DT
22#include "quota.h"
23#include "rgrp.h"
45138990 24#include "log.h"
4c16c36a 25#include "super.h"
b3b94faa 26#include "trans.h"
18ec7d5c 27#include "dir.h"
5c676f6d 28#include "util.h"
64bc06bb 29#include "aops.h"
63997775 30#include "trace_gfs2.h"
b3b94faa
DT
31
32/* This doesn't need to be that large as max 64 bit pointers in a 4k
33 * block is 512, so __u16 is fine for that. It saves stack space to
34 * keep it small.
35 */
36struct metapath {
dbac6710 37 struct buffer_head *mp_bh[GFS2_MAX_META_HEIGHT];
b3b94faa 38 __u16 mp_list[GFS2_MAX_META_HEIGHT];
5f8bd444
BP
39 int mp_fheight; /* find_metapath height */
40 int mp_aheight; /* actual height (lookup height) */
b3b94faa
DT
41};
42
64bc06bb
AG
43static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length);
44
f25ef0c1
SW
45/**
46 * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page
47 * @ip: the inode
48 * @dibh: the dinode buffer
49 * @block: the block number that was allocated
ff8f33c8 50 * @page: The (optional) page. This is looked up if @page is NULL
f25ef0c1
SW
51 *
52 * Returns: errno
53 */
54
55static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
cd915493 56 u64 block, struct page *page)
f25ef0c1 57{
f25ef0c1
SW
58 struct inode *inode = &ip->i_inode;
59 struct buffer_head *bh;
60 int release = 0;
61
62 if (!page || page->index) {
220cca2a 63 page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS);
f25ef0c1
SW
64 if (!page)
65 return -ENOMEM;
66 release = 1;
67 }
68
69 if (!PageUptodate(page)) {
70 void *kaddr = kmap(page);
602c89d2
SW
71 u64 dsize = i_size_read(inode);
72
235628c5
AG
73 if (dsize > gfs2_max_stuffed_size(ip))
74 dsize = gfs2_max_stuffed_size(ip);
f25ef0c1 75
602c89d2 76 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
09cbfeaf 77 memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
f25ef0c1
SW
78 kunmap(page);
79
80 SetPageUptodate(page);
81 }
82
83 if (!page_has_buffers(page))
47a9a527
FF
84 create_empty_buffers(page, BIT(inode->i_blkbits),
85 BIT(BH_Uptodate));
f25ef0c1
SW
86
87 bh = page_buffers(page);
88
89 if (!buffer_mapped(bh))
90 map_bh(bh, inode->i_sb, block);
91
92 set_buffer_uptodate(bh);
845802b1 93 if (gfs2_is_jdata(ip))
350a9b0a 94 gfs2_trans_add_data(ip->i_gl, bh);
845802b1
AG
95 else {
96 mark_buffer_dirty(bh);
97 gfs2_ordered_add_inode(ip);
98 }
f25ef0c1
SW
99
100 if (release) {
101 unlock_page(page);
09cbfeaf 102 put_page(page);
f25ef0c1
SW
103 }
104
105 return 0;
106}
107
b3b94faa
DT
108/**
109 * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big
110 * @ip: The GFS2 inode to unstuff
ff8f33c8 111 * @page: The (optional) page. This is looked up if the @page is NULL
b3b94faa
DT
112 *
113 * This routine unstuffs a dinode and returns it to a "normal" state such
114 * that the height can be grown in the traditional way.
115 *
116 * Returns: errno
117 */
118
f25ef0c1 119int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
b3b94faa
DT
120{
121 struct buffer_head *bh, *dibh;
48516ced 122 struct gfs2_dinode *di;
cd915493 123 u64 block = 0;
18ec7d5c 124 int isdir = gfs2_is_dir(ip);
b3b94faa
DT
125 int error;
126
127 down_write(&ip->i_rw_mutex);
128
129 error = gfs2_meta_inode_buffer(ip, &dibh);
130 if (error)
131 goto out;
907b9bce 132
a2e0f799 133 if (i_size_read(&ip->i_inode)) {
b3b94faa
DT
134 /* Get a free block, fill it with the stuffed data,
135 and write it out to disk */
136
b45e41d7 137 unsigned int n = 1;
6e87ed0f 138 error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL);
09010978
SW
139 if (error)
140 goto out_brelse;
18ec7d5c 141 if (isdir) {
fbb27873 142 gfs2_trans_remove_revoke(GFS2_SB(&ip->i_inode), block, 1);
61e085a8 143 error = gfs2_dir_get_new_buffer(ip, block, &bh);
b3b94faa
DT
144 if (error)
145 goto out_brelse;
48516ced 146 gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_meta_header),
b3b94faa
DT
147 dibh, sizeof(struct gfs2_dinode));
148 brelse(bh);
149 } else {
f25ef0c1 150 error = gfs2_unstuffer_page(ip, dibh, block, page);
b3b94faa
DT
151 if (error)
152 goto out_brelse;
153 }
154 }
155
156 /* Set up the pointer to the new block */
157
350a9b0a 158 gfs2_trans_add_meta(ip->i_gl, dibh);
48516ced 159 di = (struct gfs2_dinode *)dibh->b_data;
b3b94faa
DT
160 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
161
a2e0f799 162 if (i_size_read(&ip->i_inode)) {
48516ced 163 *(__be64 *)(di + 1) = cpu_to_be64(block);
77658aad
SW
164 gfs2_add_inode_blocks(&ip->i_inode, 1);
165 di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
b3b94faa
DT
166 }
167
ecc30c79 168 ip->i_height = 1;
48516ced 169 di->di_height = cpu_to_be16(1);
b3b94faa 170
a91ea69f 171out_brelse:
b3b94faa 172 brelse(dibh);
a91ea69f 173out:
b3b94faa 174 up_write(&ip->i_rw_mutex);
b3b94faa
DT
175 return error;
176}
177
b3b94faa
DT
178
179/**
180 * find_metapath - Find path through the metadata tree
9b8c81d1 181 * @sdp: The superblock
b3b94faa 182 * @block: The disk block to look up
07e23d68 183 * @mp: The metapath to return the result in
9b8c81d1 184 * @height: The pre-calculated height of the metadata tree
b3b94faa
DT
185 *
186 * This routine returns a struct metapath structure that defines a path
187 * through the metadata of inode "ip" to get to block "block".
188 *
189 * Example:
190 * Given: "ip" is a height 3 file, "offset" is 101342453, and this is a
191 * filesystem with a blocksize of 4096.
192 *
193 * find_metapath() would return a struct metapath structure set to:
07e23d68 194 * mp_fheight = 3, mp_list[0] = 0, mp_list[1] = 48, and mp_list[2] = 165.
b3b94faa
DT
195 *
196 * That means that in order to get to the block containing the byte at
197 * offset 101342453, we would load the indirect block pointed to by pointer
198 * 0 in the dinode. We would then load the indirect block pointed to by
199 * pointer 48 in that indirect block. We would then load the data block
200 * pointed to by pointer 165 in that indirect block.
201 *
202 * ----------------------------------------
203 * | Dinode | |
204 * | | 4|
205 * | |0 1 2 3 4 5 9|
206 * | | 6|
207 * ----------------------------------------
208 * |
209 * |
210 * V
211 * ----------------------------------------
212 * | Indirect Block |
213 * | 5|
214 * | 4 4 4 4 4 5 5 1|
215 * |0 5 6 7 8 9 0 1 2|
216 * ----------------------------------------
217 * |
218 * |
219 * V
220 * ----------------------------------------
221 * | Indirect Block |
222 * | 1 1 1 1 1 5|
223 * | 6 6 6 6 6 1|
224 * |0 3 4 5 6 7 2|
225 * ----------------------------------------
226 * |
227 * |
228 * V
229 * ----------------------------------------
230 * | Data block containing offset |
231 * | 101342453 |
232 * | |
233 * | |
234 * ----------------------------------------
235 *
236 */
237
9b8c81d1
SW
238static void find_metapath(const struct gfs2_sbd *sdp, u64 block,
239 struct metapath *mp, unsigned int height)
b3b94faa 240{
b3b94faa
DT
241 unsigned int i;
242
5f8bd444 243 mp->mp_fheight = height;
9b8c81d1 244 for (i = height; i--;)
7eabb77e 245 mp->mp_list[i] = do_div(block, sdp->sd_inptrs);
b3b94faa
DT
246}
247
5af4e7a0 248static inline unsigned int metapath_branch_start(const struct metapath *mp)
9b8c81d1 249{
5af4e7a0
BM
250 if (mp->mp_list[0] == 0)
251 return 2;
252 return 1;
9b8c81d1
SW
253}
254
d552a2b9 255/**
20cdc193 256 * metaptr1 - Return the first possible metadata pointer in a metapath buffer
d552a2b9
BP
257 * @height: The metadata height (0 = dinode)
258 * @mp: The metapath
259 */
260static inline __be64 *metaptr1(unsigned int height, const struct metapath *mp)
261{
262 struct buffer_head *bh = mp->mp_bh[height];
263 if (height == 0)
264 return ((__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)));
265 return ((__be64 *)(bh->b_data + sizeof(struct gfs2_meta_header)));
266}
267
b3b94faa
DT
268/**
269 * metapointer - Return pointer to start of metadata in a buffer
b3b94faa
DT
270 * @height: The metadata height (0 = dinode)
271 * @mp: The metapath
272 *
273 * Return a pointer to the block number of the next height of the metadata
274 * tree given a buffer containing the pointer to the current height of the
275 * metadata tree.
276 */
277
9b8c81d1 278static inline __be64 *metapointer(unsigned int height, const struct metapath *mp)
b3b94faa 279{
d552a2b9
BP
280 __be64 *p = metaptr1(height, mp);
281 return p + mp->mp_list[height];
b3b94faa
DT
282}
283
7841b9f0
AG
284static inline const __be64 *metaend(unsigned int height, const struct metapath *mp)
285{
286 const struct buffer_head *bh = mp->mp_bh[height];
287 return (const __be64 *)(bh->b_data + bh->b_size);
288}
289
290static void clone_metapath(struct metapath *clone, struct metapath *mp)
291{
292 unsigned int hgt;
293
294 *clone = *mp;
295 for (hgt = 0; hgt < mp->mp_aheight; hgt++)
296 get_bh(clone->mp_bh[hgt]);
297}
298
5cf26b1e 299static void gfs2_metapath_ra(struct gfs2_glock *gl, __be64 *start, __be64 *end)
b99b98dc 300{
b99b98dc
SW
301 const __be64 *t;
302
5cf26b1e 303 for (t = start; t < end; t++) {
c3ce5aa9
AG
304 struct buffer_head *rabh;
305
b99b98dc
SW
306 if (!*t)
307 continue;
308
309 rabh = gfs2_getbuf(gl, be64_to_cpu(*t), CREATE);
310 if (trylock_buffer(rabh)) {
311 if (!buffer_uptodate(rabh)) {
312 rabh->b_end_io = end_buffer_read_sync;
e477b24b
CL
313 submit_bh(REQ_OP_READ,
314 REQ_RAHEAD | REQ_META | REQ_PRIO,
315 rabh);
b99b98dc
SW
316 continue;
317 }
318 unlock_buffer(rabh);
319 }
320 brelse(rabh);
321 }
322}
323
e8b43fe0
AG
324static int __fillup_metapath(struct gfs2_inode *ip, struct metapath *mp,
325 unsigned int x, unsigned int h)
d552a2b9 326{
e8b43fe0
AG
327 for (; x < h; x++) {
328 __be64 *ptr = metapointer(x, mp);
329 u64 dblock = be64_to_cpu(*ptr);
330 int ret;
d552a2b9 331
e8b43fe0
AG
332 if (!dblock)
333 break;
334 ret = gfs2_meta_indirect_buffer(ip, x + 1, dblock, &mp->mp_bh[x + 1]);
335 if (ret)
336 return ret;
337 }
338 mp->mp_aheight = x + 1;
339 return 0;
d552a2b9
BP
340}
341
b3b94faa 342/**
9b8c81d1
SW
343 * lookup_metapath - Walk the metadata tree to a specific point
344 * @ip: The inode
b3b94faa 345 * @mp: The metapath
b3b94faa 346 *
9b8c81d1
SW
347 * Assumes that the inode's buffer has already been looked up and
348 * hooked onto mp->mp_bh[0] and that the metapath has been initialised
349 * by find_metapath().
350 *
351 * If this function encounters part of the tree which has not been
352 * allocated, it returns the current height of the tree at the point
353 * at which it found the unallocated block. Blocks which are found are
354 * added to the mp->mp_bh[] list.
b3b94faa 355 *
e8b43fe0 356 * Returns: error
b3b94faa
DT
357 */
358
9b8c81d1 359static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp)
11707ea0 360{
e8b43fe0 361 return __fillup_metapath(ip, mp, 0, ip->i_height - 1);
dbac6710
SW
362}
363
d552a2b9
BP
364/**
365 * fillup_metapath - fill up buffers for the metadata path to a specific height
366 * @ip: The inode
367 * @mp: The metapath
368 * @h: The height to which it should be mapped
369 *
370 * Similar to lookup_metapath, but does lookups for a range of heights
371 *
c3ce5aa9 372 * Returns: error or the number of buffers filled
d552a2b9
BP
373 */
374
375static int fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h)
376{
e8b43fe0 377 unsigned int x = 0;
c3ce5aa9 378 int ret;
d552a2b9
BP
379
380 if (h) {
381 /* find the first buffer we need to look up. */
e8b43fe0
AG
382 for (x = h - 1; x > 0; x--) {
383 if (mp->mp_bh[x])
384 break;
d552a2b9
BP
385 }
386 }
c3ce5aa9
AG
387 ret = __fillup_metapath(ip, mp, x, h);
388 if (ret)
389 return ret;
390 return mp->mp_aheight - x - 1;
d552a2b9
BP
391}
392
64bc06bb 393static void release_metapath(struct metapath *mp)
dbac6710
SW
394{
395 int i;
396
9b8c81d1
SW
397 for (i = 0; i < GFS2_MAX_META_HEIGHT; i++) {
398 if (mp->mp_bh[i] == NULL)
399 break;
400 brelse(mp->mp_bh[i]);
64bc06bb 401 mp->mp_bh[i] = NULL;
9b8c81d1 402 }
11707ea0
SW
403}
404
30cbf189
SW
405/**
406 * gfs2_extent_length - Returns length of an extent of blocks
bcfe9413
AG
407 * @bh: The metadata block
408 * @ptr: Current position in @bh
409 * @limit: Max extent length to return
30cbf189
SW
410 * @eob: Set to 1 if we hit "end of block"
411 *
30cbf189
SW
412 * Returns: The length of the extent (minimum of one block)
413 */
414
bcfe9413 415static inline unsigned int gfs2_extent_length(struct buffer_head *bh, __be64 *ptr, size_t limit, int *eob)
30cbf189 416{
bcfe9413 417 const __be64 *end = (__be64 *)(bh->b_data + bh->b_size);
30cbf189
SW
418 const __be64 *first = ptr;
419 u64 d = be64_to_cpu(*ptr);
420
421 *eob = 0;
422 do {
423 ptr++;
424 if (ptr >= end)
425 break;
bcfe9413 426 d++;
30cbf189
SW
427 } while(be64_to_cpu(*ptr) == d);
428 if (ptr >= end)
429 *eob = 1;
bcfe9413 430 return ptr - first;
30cbf189
SW
431}
432
7841b9f0
AG
433typedef const __be64 *(*gfs2_metadata_walker)(
434 struct metapath *mp,
435 const __be64 *start, const __be64 *end,
436 u64 factor, void *data);
437
438#define WALK_STOP ((__be64 *)0)
439#define WALK_NEXT ((__be64 *)1)
440
441static int gfs2_walk_metadata(struct inode *inode, sector_t lblock,
442 u64 len, struct metapath *mp, gfs2_metadata_walker walker,
443 void *data)
444{
445 struct metapath clone;
446 struct gfs2_inode *ip = GFS2_I(inode);
447 struct gfs2_sbd *sdp = GFS2_SB(inode);
448 const __be64 *start, *end, *ptr;
449 u64 factor = 1;
450 unsigned int hgt;
451 int ret = 0;
452
453 for (hgt = ip->i_height - 1; hgt >= mp->mp_aheight; hgt--)
454 factor *= sdp->sd_inptrs;
455
456 for (;;) {
457 u64 step;
458
459 /* Walk indirect block. */
460 start = metapointer(hgt, mp);
461 end = metaend(hgt, mp);
462
463 step = (end - start) * factor;
464 if (step > len)
465 end = start + DIV_ROUND_UP_ULL(len, factor);
466
467 ptr = walker(mp, start, end, factor, data);
468 if (ptr == WALK_STOP)
469 break;
470 if (step >= len)
471 break;
472 len -= step;
473 if (ptr != WALK_NEXT) {
474 BUG_ON(!*ptr);
475 mp->mp_list[hgt] += ptr - start;
476 goto fill_up_metapath;
477 }
478
479lower_metapath:
480 /* Decrease height of metapath. */
481 if (mp != &clone) {
482 clone_metapath(&clone, mp);
483 mp = &clone;
484 }
485 brelse(mp->mp_bh[hgt]);
486 mp->mp_bh[hgt] = NULL;
487 if (!hgt)
488 break;
489 hgt--;
490 factor *= sdp->sd_inptrs;
491
492 /* Advance in metadata tree. */
493 (mp->mp_list[hgt])++;
494 start = metapointer(hgt, mp);
495 end = metaend(hgt, mp);
496 if (start >= end) {
497 mp->mp_list[hgt] = 0;
498 if (!hgt)
499 break;
500 goto lower_metapath;
501 }
502
503fill_up_metapath:
504 /* Increase height of metapath. */
505 if (mp != &clone) {
506 clone_metapath(&clone, mp);
507 mp = &clone;
508 }
509 ret = fillup_metapath(ip, mp, ip->i_height - 1);
510 if (ret < 0)
511 break;
512 hgt += ret;
513 for (; ret; ret--)
514 do_div(factor, sdp->sd_inptrs);
515 mp->mp_aheight = hgt + 1;
516 }
517 if (mp == &clone)
518 release_metapath(mp);
519 return ret;
520}
521
522struct gfs2_hole_walker_args {
523 u64 blocks;
524};
525
526static const __be64 *gfs2_hole_walker(struct metapath *mp,
527 const __be64 *start, const __be64 *end,
528 u64 factor, void *data)
529{
530 struct gfs2_hole_walker_args *args = data;
531 const __be64 *ptr;
532
533 for (ptr = start; ptr < end; ptr++) {
534 if (*ptr) {
535 args->blocks += (ptr - start) * factor;
536 if (mp->mp_aheight == mp->mp_fheight)
537 return WALK_STOP;
538 return ptr; /* increase height */
539 }
540 }
541 args->blocks += (end - start) * factor;
542 return WALK_NEXT;
543}
544
545/**
546 * gfs2_hole_size - figure out the size of a hole
547 * @inode: The inode
548 * @lblock: The logical starting block number
549 * @len: How far to look (in blocks)
550 * @mp: The metapath at lblock
551 * @iomap: The iomap to store the hole size in
552 *
553 * This function modifies @mp.
554 *
555 * Returns: errno on error
556 */
557static int gfs2_hole_size(struct inode *inode, sector_t lblock, u64 len,
558 struct metapath *mp, struct iomap *iomap)
559{
560 struct gfs2_hole_walker_args args = { };
561 int ret = 0;
562
563 ret = gfs2_walk_metadata(inode, lblock, len, mp, gfs2_hole_walker, &args);
564 if (!ret)
565 iomap->length = args.blocks << inode->i_blkbits;
566 return ret;
567}
568
9b8c81d1
SW
569static inline __be64 *gfs2_indirect_init(struct metapath *mp,
570 struct gfs2_glock *gl, unsigned int i,
571 unsigned offset, u64 bn)
572{
573 __be64 *ptr = (__be64 *)(mp->mp_bh[i - 1]->b_data +
574 ((i > 1) ? sizeof(struct gfs2_meta_header) :
575 sizeof(struct gfs2_dinode)));
576 BUG_ON(i < 1);
577 BUG_ON(mp->mp_bh[i] != NULL);
578 mp->mp_bh[i] = gfs2_meta_new(gl, bn);
350a9b0a 579 gfs2_trans_add_meta(gl, mp->mp_bh[i]);
9b8c81d1
SW
580 gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN);
581 gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header));
582 ptr += offset;
583 *ptr = cpu_to_be64(bn);
584 return ptr;
585}
586
587enum alloc_state {
588 ALLOC_DATA = 0,
589 ALLOC_GROW_DEPTH = 1,
590 ALLOC_GROW_HEIGHT = 2,
591 /* ALLOC_UNSTUFF = 3, TBD and rather complicated */
592};
593
594/**
628e366d 595 * gfs2_iomap_alloc - Build a metadata tree of the requested height
9b8c81d1 596 * @inode: The GFS2 inode
628e366d 597 * @iomap: The iomap structure
5f8bd444 598 * @mp: The metapath, with proper height information calculated
9b8c81d1
SW
599 *
600 * In this routine we may have to alloc:
601 * i) Indirect blocks to grow the metadata tree height
602 * ii) Indirect blocks to fill in lower part of the metadata tree
603 * iii) Data blocks
604 *
64bc06bb
AG
605 * This function is called after gfs2_iomap_get, which works out the
606 * total number of blocks which we need via gfs2_alloc_size.
607 *
608 * We then do the actual allocation asking for an extent at a time (if
609 * enough contiguous free blocks are available, there will only be one
610 * allocation request per call) and uses the state machine to initialise
611 * the blocks in order.
9b8c81d1 612 *
628e366d
AG
613 * Right now, this function will allocate at most one indirect block
614 * worth of data -- with a default block size of 4K, that's slightly
615 * less than 2M. If this limitation is ever removed to allow huge
616 * allocations, we would probably still want to limit the iomap size we
617 * return to avoid stalling other tasks during huge writes; the next
618 * iomap iteration would then find the blocks already allocated.
619 *
9b8c81d1
SW
620 * Returns: errno on error
621 */
622
3974320c 623static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
bb4cb25d 624 struct metapath *mp)
9b8c81d1
SW
625{
626 struct gfs2_inode *ip = GFS2_I(inode);
627 struct gfs2_sbd *sdp = GFS2_SB(inode);
628 struct buffer_head *dibh = mp->mp_bh[0];
5f8bd444 629 u64 bn;
5af4e7a0 630 unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0;
64bc06bb 631 size_t dblks = iomap->length >> inode->i_blkbits;
5f8bd444 632 const unsigned end_of_metadata = mp->mp_fheight - 1;
628e366d 633 int ret;
9b8c81d1
SW
634 enum alloc_state state;
635 __be64 *ptr;
636 __be64 zero_bn = 0;
637
5f8bd444 638 BUG_ON(mp->mp_aheight < 1);
9b8c81d1 639 BUG_ON(dibh == NULL);
64bc06bb 640 BUG_ON(dblks < 1);
9b8c81d1 641
350a9b0a 642 gfs2_trans_add_meta(ip->i_gl, dibh);
9b8c81d1 643
628e366d
AG
644 down_write(&ip->i_rw_mutex);
645
5f8bd444 646 if (mp->mp_fheight == mp->mp_aheight) {
64bc06bb 647 /* Bottom indirect block exists */
9b8c81d1
SW
648 state = ALLOC_DATA;
649 } else {
650 /* Need to allocate indirect blocks */
5f8bd444 651 if (mp->mp_fheight == ip->i_height) {
9b8c81d1 652 /* Writing into existing tree, extend tree down */
5f8bd444 653 iblks = mp->mp_fheight - mp->mp_aheight;
9b8c81d1
SW
654 state = ALLOC_GROW_DEPTH;
655 } else {
656 /* Building up tree height */
657 state = ALLOC_GROW_HEIGHT;
5f8bd444 658 iblks = mp->mp_fheight - ip->i_height;
5af4e7a0 659 branch_start = metapath_branch_start(mp);
5f8bd444 660 iblks += (mp->mp_fheight - branch_start);
9b8c81d1
SW
661 }
662 }
663
664 /* start of the second part of the function (state machine) */
665
3974320c 666 blks = dblks + iblks;
5f8bd444 667 i = mp->mp_aheight;
9b8c81d1
SW
668 do {
669 n = blks - alloced;
628e366d
AG
670 ret = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
671 if (ret)
672 goto out;
9b8c81d1
SW
673 alloced += n;
674 if (state != ALLOC_DATA || gfs2_is_jdata(ip))
fbb27873 675 gfs2_trans_remove_revoke(sdp, bn, n);
9b8c81d1
SW
676 switch (state) {
677 /* Growing height of tree */
678 case ALLOC_GROW_HEIGHT:
679 if (i == 1) {
680 ptr = (__be64 *)(dibh->b_data +
681 sizeof(struct gfs2_dinode));
682 zero_bn = *ptr;
683 }
5f8bd444
BP
684 for (; i - 1 < mp->mp_fheight - ip->i_height && n > 0;
685 i++, n--)
9b8c81d1 686 gfs2_indirect_init(mp, ip->i_gl, i, 0, bn++);
5f8bd444 687 if (i - 1 == mp->mp_fheight - ip->i_height) {
9b8c81d1
SW
688 i--;
689 gfs2_buffer_copy_tail(mp->mp_bh[i],
690 sizeof(struct gfs2_meta_header),
691 dibh, sizeof(struct gfs2_dinode));
692 gfs2_buffer_clear_tail(dibh,
693 sizeof(struct gfs2_dinode) +
694 sizeof(__be64));
695 ptr = (__be64 *)(mp->mp_bh[i]->b_data +
696 sizeof(struct gfs2_meta_header));
697 *ptr = zero_bn;
698 state = ALLOC_GROW_DEPTH;
5f8bd444 699 for(i = branch_start; i < mp->mp_fheight; i++) {
9b8c81d1
SW
700 if (mp->mp_bh[i] == NULL)
701 break;
702 brelse(mp->mp_bh[i]);
703 mp->mp_bh[i] = NULL;
704 }
5af4e7a0 705 i = branch_start;
9b8c81d1
SW
706 }
707 if (n == 0)
708 break;
0a4c9265 709 /* fall through - To branching from existing tree */
9b8c81d1 710 case ALLOC_GROW_DEPTH:
5f8bd444 711 if (i > 1 && i < mp->mp_fheight)
350a9b0a 712 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[i-1]);
5f8bd444 713 for (; i < mp->mp_fheight && n > 0; i++, n--)
9b8c81d1
SW
714 gfs2_indirect_init(mp, ip->i_gl, i,
715 mp->mp_list[i-1], bn++);
5f8bd444 716 if (i == mp->mp_fheight)
9b8c81d1
SW
717 state = ALLOC_DATA;
718 if (n == 0)
719 break;
0a4c9265 720 /* fall through - To tree complete, adding data blocks */
9b8c81d1 721 case ALLOC_DATA:
3974320c 722 BUG_ON(n > dblks);
9b8c81d1 723 BUG_ON(mp->mp_bh[end_of_metadata] == NULL);
350a9b0a 724 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[end_of_metadata]);
3974320c 725 dblks = n;
9b8c81d1 726 ptr = metapointer(end_of_metadata, mp);
3974320c 727 iomap->addr = bn << inode->i_blkbits;
628e366d 728 iomap->flags |= IOMAP_F_MERGED | IOMAP_F_NEW;
9b8c81d1
SW
729 while (n-- > 0)
730 *ptr++ = cpu_to_be64(bn++);
731 break;
732 }
3974320c 733 } while (iomap->addr == IOMAP_NULL_ADDR);
9b8c81d1 734
d505a96a 735 iomap->type = IOMAP_MAPPED;
3974320c 736 iomap->length = (u64)dblks << inode->i_blkbits;
5f8bd444 737 ip->i_height = mp->mp_fheight;
9b8c81d1 738 gfs2_add_inode_blocks(&ip->i_inode, alloced);
628e366d
AG
739 gfs2_dinode_out(ip, dibh->b_data);
740out:
741 up_write(&ip->i_rw_mutex);
742 return ret;
9b8c81d1
SW
743}
744
7ee66c03
CH
745#define IOMAP_F_GFS2_BOUNDARY IOMAP_F_PRIVATE
746
64bc06bb
AG
747/**
748 * gfs2_alloc_size - Compute the maximum allocation size
749 * @inode: The inode
750 * @mp: The metapath
751 * @size: Requested size in blocks
752 *
753 * Compute the maximum size of the next allocation at @mp.
754 *
755 * Returns: size in blocks
756 */
757static u64 gfs2_alloc_size(struct inode *inode, struct metapath *mp, u64 size)
3974320c
BP
758{
759 struct gfs2_inode *ip = GFS2_I(inode);
64bc06bb
AG
760 struct gfs2_sbd *sdp = GFS2_SB(inode);
761 const __be64 *first, *ptr, *end;
762
763 /*
764 * For writes to stuffed files, this function is called twice via
765 * gfs2_iomap_get, before and after unstuffing. The size we return the
766 * first time needs to be large enough to get the reservation and
767 * allocation sizes right. The size we return the second time must
768 * be exact or else gfs2_iomap_alloc won't do the right thing.
769 */
770
771 if (gfs2_is_stuffed(ip) || mp->mp_fheight != mp->mp_aheight) {
772 unsigned int maxsize = mp->mp_fheight > 1 ?
773 sdp->sd_inptrs : sdp->sd_diptrs;
774 maxsize -= mp->mp_list[mp->mp_fheight - 1];
775 if (size > maxsize)
776 size = maxsize;
777 return size;
778 }
3974320c 779
64bc06bb
AG
780 first = metapointer(ip->i_height - 1, mp);
781 end = metaend(ip->i_height - 1, mp);
782 if (end - first > size)
783 end = first + size;
784 for (ptr = first; ptr < end; ptr++) {
785 if (*ptr)
786 break;
787 }
788 return ptr - first;
3974320c
BP
789}
790
791/**
628e366d 792 * gfs2_iomap_get - Map blocks from an inode to disk blocks
3974320c
BP
793 * @inode: The inode
794 * @pos: Starting position in bytes
795 * @length: Length to map, in bytes
796 * @flags: iomap flags
797 * @iomap: The iomap structure
628e366d 798 * @mp: The metapath
3974320c
BP
799 *
800 * Returns: errno
801 */
628e366d
AG
802static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
803 unsigned flags, struct iomap *iomap,
804 struct metapath *mp)
b3b94faa 805{
feaa7bba
SW
806 struct gfs2_inode *ip = GFS2_I(inode);
807 struct gfs2_sbd *sdp = GFS2_SB(inode);
d505a96a 808 loff_t size = i_size_read(inode);
9b8c81d1 809 __be64 *ptr;
3974320c 810 sector_t lblock;
628e366d
AG
811 sector_t lblock_stop;
812 int ret;
9b8c81d1 813 int eob;
628e366d 814 u64 len;
d505a96a 815 struct buffer_head *dibh = NULL, *bh;
9b8c81d1 816 u8 height;
7276b3b0 817
628e366d
AG
818 if (!length)
819 return -EINVAL;
b3b94faa 820
d505a96a
AG
821 down_read(&ip->i_rw_mutex);
822
823 ret = gfs2_meta_inode_buffer(ip, &dibh);
824 if (ret)
825 goto unlock;
c26b5aa8 826 mp->mp_bh[0] = dibh;
d505a96a 827
49edd5bf 828 if (gfs2_is_stuffed(ip)) {
d505a96a
AG
829 if (flags & IOMAP_WRITE) {
830 loff_t max_size = gfs2_max_stuffed_size(ip);
831
832 if (pos + length > max_size)
833 goto unstuff;
834 iomap->length = max_size;
835 } else {
836 if (pos >= size) {
837 if (flags & IOMAP_REPORT) {
838 ret = -ENOENT;
839 goto unlock;
840 } else {
841 /* report a hole */
842 iomap->offset = pos;
843 iomap->length = length;
844 goto do_alloc;
845 }
846 }
847 iomap->length = size;
49edd5bf 848 }
d505a96a
AG
849 iomap->addr = (ip->i_no_addr << inode->i_blkbits) +
850 sizeof(struct gfs2_dinode);
851 iomap->type = IOMAP_INLINE;
64bc06bb 852 iomap->inline_data = dibh->b_data + sizeof(struct gfs2_dinode);
d505a96a 853 goto out;
3974320c 854 }
d505a96a
AG
855
856unstuff:
3974320c 857 lblock = pos >> inode->i_blkbits;
3974320c 858 iomap->offset = lblock << inode->i_blkbits;
628e366d
AG
859 lblock_stop = (pos + length - 1) >> inode->i_blkbits;
860 len = lblock_stop - lblock + 1;
d505a96a 861 iomap->length = len << inode->i_blkbits;
628e366d 862
9b8c81d1 863 height = ip->i_height;
9a38662b 864 while ((lblock + 1) * sdp->sd_sb.sb_bsize > sdp->sd_heightsize[height])
9b8c81d1 865 height++;
628e366d 866 find_metapath(sdp, lblock, mp, height);
9b8c81d1
SW
867 if (height > ip->i_height || gfs2_is_stuffed(ip))
868 goto do_alloc;
3974320c 869
628e366d 870 ret = lookup_metapath(ip, mp);
e8b43fe0 871 if (ret)
628e366d 872 goto unlock;
3974320c 873
628e366d 874 if (mp->mp_aheight != ip->i_height)
9b8c81d1 875 goto do_alloc;
3974320c 876
628e366d 877 ptr = metapointer(ip->i_height - 1, mp);
9b8c81d1
SW
878 if (*ptr == 0)
879 goto do_alloc;
3974320c 880
628e366d 881 bh = mp->mp_bh[ip->i_height - 1];
bcfe9413 882 len = gfs2_extent_length(bh, ptr, len, &eob);
3974320c 883
628e366d
AG
884 iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits;
885 iomap->length = len << inode->i_blkbits;
886 iomap->type = IOMAP_MAPPED;
0ed91eca 887 iomap->flags |= IOMAP_F_MERGED;
9b8c81d1 888 if (eob)
7ee66c03 889 iomap->flags |= IOMAP_F_GFS2_BOUNDARY;
3974320c 890
3974320c 891out:
628e366d
AG
892 iomap->bdev = inode->i_sb->s_bdev;
893unlock:
894 up_read(&ip->i_rw_mutex);
9b8c81d1 895 return ret;
30cbf189 896
9b8c81d1 897do_alloc:
628e366d 898 iomap->addr = IOMAP_NULL_ADDR;
628e366d 899 iomap->type = IOMAP_HOLE;
628e366d 900 if (flags & IOMAP_REPORT) {
49edd5bf 901 if (pos >= size)
3974320c 902 ret = -ENOENT;
628e366d
AG
903 else if (height == ip->i_height)
904 ret = gfs2_hole_size(inode, lblock, len, mp, iomap);
49edd5bf
AG
905 else
906 iomap->length = size - pos;
64bc06bb
AG
907 } else if (flags & IOMAP_WRITE) {
908 u64 alloc_size;
909
967bcc91
AG
910 if (flags & IOMAP_DIRECT)
911 goto out; /* (see gfs2_file_direct_write) */
912
64bc06bb
AG
913 len = gfs2_alloc_size(inode, mp, len);
914 alloc_size = len << inode->i_blkbits;
915 if (alloc_size < iomap->length)
916 iomap->length = alloc_size;
917 } else {
d505a96a
AG
918 if (pos < size && height == ip->i_height)
919 ret = gfs2_hole_size(inode, lblock, len, mp, iomap);
b3b94faa 920 }
628e366d 921 goto out;
3974320c
BP
922}
923
7c70b896
BP
924/**
925 * gfs2_lblk_to_dblk - convert logical block to disk block
926 * @inode: the inode of the file we're mapping
927 * @lblock: the block relative to the start of the file
928 * @dblock: the returned dblock, if no error
929 *
930 * This function maps a single block from a file logical block (relative to
931 * the start of the file) to a file system absolute block using iomap.
932 *
933 * Returns: the absolute file system block, or an error
934 */
935int gfs2_lblk_to_dblk(struct inode *inode, u32 lblock, u64 *dblock)
936{
937 struct iomap iomap = { };
938 struct metapath mp = { .mp_aheight = 1, };
939 loff_t pos = (loff_t)lblock << inode->i_blkbits;
940 int ret;
941
942 ret = gfs2_iomap_get(inode, pos, i_blocksize(inode), 0, &iomap, &mp);
943 release_metapath(&mp);
944 if (ret == 0)
945 *dblock = iomap.addr >> inode->i_blkbits;
946
947 return ret;
948}
949
64bc06bb
AG
950static int gfs2_write_lock(struct inode *inode)
951{
952 struct gfs2_inode *ip = GFS2_I(inode);
953 struct gfs2_sbd *sdp = GFS2_SB(inode);
954 int error;
955
956 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
957 error = gfs2_glock_nq(&ip->i_gh);
958 if (error)
959 goto out_uninit;
960 if (&ip->i_inode == sdp->sd_rindex) {
961 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
962
963 error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE,
964 GL_NOCACHE, &m_ip->i_gh);
965 if (error)
966 goto out_unlock;
967 }
968 return 0;
969
970out_unlock:
971 gfs2_glock_dq(&ip->i_gh);
972out_uninit:
973 gfs2_holder_uninit(&ip->i_gh);
974 return error;
975}
976
977static void gfs2_write_unlock(struct inode *inode)
978{
979 struct gfs2_inode *ip = GFS2_I(inode);
980 struct gfs2_sbd *sdp = GFS2_SB(inode);
981
982 if (&ip->i_inode == sdp->sd_rindex) {
983 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
984
985 gfs2_glock_dq_uninit(&m_ip->i_gh);
986 }
987 gfs2_glock_dq_uninit(&ip->i_gh);
988}
989
d0a22a4b
AG
990static int gfs2_iomap_page_prepare(struct inode *inode, loff_t pos,
991 unsigned len, struct iomap *iomap)
992{
2741b672 993 unsigned int blockmask = i_blocksize(inode) - 1;
d0a22a4b 994 struct gfs2_sbd *sdp = GFS2_SB(inode);
2741b672 995 unsigned int blocks;
d0a22a4b 996
2741b672
AG
997 blocks = ((pos & blockmask) + len + blockmask) >> inode->i_blkbits;
998 return gfs2_trans_begin(sdp, RES_DINODE + blocks, 0);
d0a22a4b
AG
999}
1000
df0db3ec
AG
1001static void gfs2_iomap_page_done(struct inode *inode, loff_t pos,
1002 unsigned copied, struct page *page,
1003 struct iomap *iomap)
64bc06bb 1004{
706cb549 1005 struct gfs2_trans *tr = current->journal_info;
64bc06bb 1006 struct gfs2_inode *ip = GFS2_I(inode);
d0a22a4b 1007 struct gfs2_sbd *sdp = GFS2_SB(inode);
64bc06bb 1008
d0a22a4b 1009 if (page && !gfs2_is_stuffed(ip))
df0db3ec 1010 gfs2_page_add_databufs(ip, page, offset_in_page(pos), copied);
706cb549
AG
1011
1012 if (tr->tr_num_buf_new)
1013 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
1014
d0a22a4b 1015 gfs2_trans_end(sdp);
64bc06bb
AG
1016}
1017
df0db3ec 1018static const struct iomap_page_ops gfs2_iomap_page_ops = {
d0a22a4b 1019 .page_prepare = gfs2_iomap_page_prepare,
df0db3ec
AG
1020 .page_done = gfs2_iomap_page_done,
1021};
1022
64bc06bb
AG
1023static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
1024 loff_t length, unsigned flags,
c26b5aa8
AG
1025 struct iomap *iomap,
1026 struct metapath *mp)
64bc06bb 1027{
64bc06bb
AG
1028 struct gfs2_inode *ip = GFS2_I(inode);
1029 struct gfs2_sbd *sdp = GFS2_SB(inode);
1030 unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
1031 bool unstuff, alloc_required;
1032 int ret;
1033
1034 ret = gfs2_write_lock(inode);
1035 if (ret)
1036 return ret;
1037
1038 unstuff = gfs2_is_stuffed(ip) &&
1039 pos + length > gfs2_max_stuffed_size(ip);
1040
c26b5aa8 1041 ret = gfs2_iomap_get(inode, pos, length, flags, iomap, mp);
64bc06bb 1042 if (ret)
c26b5aa8 1043 goto out_unlock;
64bc06bb
AG
1044
1045 alloc_required = unstuff || iomap->type == IOMAP_HOLE;
1046
1047 if (alloc_required || gfs2_is_jdata(ip))
1048 gfs2_write_calc_reserv(ip, iomap->length, &data_blocks,
1049 &ind_blocks);
1050
1051 if (alloc_required) {
1052 struct gfs2_alloc_parms ap = {
1053 .target = data_blocks + ind_blocks
1054 };
1055
1056 ret = gfs2_quota_lock_check(ip, &ap);
1057 if (ret)
c26b5aa8 1058 goto out_unlock;
64bc06bb
AG
1059
1060 ret = gfs2_inplace_reserve(ip, &ap);
1061 if (ret)
1062 goto out_qunlock;
1063 }
1064
1065 rblocks = RES_DINODE + ind_blocks;
1066 if (gfs2_is_jdata(ip))
1067 rblocks += data_blocks;
1068 if (ind_blocks || data_blocks)
1069 rblocks += RES_STATFS + RES_QUOTA;
1070 if (inode == sdp->sd_rindex)
1071 rblocks += 2 * RES_STATFS;
1072 if (alloc_required)
1073 rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks);
1074
d0a22a4b
AG
1075 if (unstuff || iomap->type == IOMAP_HOLE) {
1076 struct gfs2_trans *tr;
64bc06bb 1077
d0a22a4b
AG
1078 ret = gfs2_trans_begin(sdp, rblocks,
1079 iomap->length >> inode->i_blkbits);
64bc06bb 1080 if (ret)
d0a22a4b
AG
1081 goto out_trans_fail;
1082
1083 if (unstuff) {
1084 ret = gfs2_unstuff_dinode(ip, NULL);
1085 if (ret)
1086 goto out_trans_end;
1087 release_metapath(mp);
1088 ret = gfs2_iomap_get(inode, iomap->offset,
1089 iomap->length, flags, iomap, mp);
1090 if (ret)
1091 goto out_trans_end;
1092 }
64bc06bb 1093
d0a22a4b 1094 if (iomap->type == IOMAP_HOLE) {
bb4cb25d 1095 ret = gfs2_iomap_alloc(inode, iomap, mp);
d0a22a4b
AG
1096 if (ret) {
1097 gfs2_trans_end(sdp);
1098 gfs2_inplace_release(ip);
1099 punch_hole(ip, iomap->offset, iomap->length);
1100 goto out_qunlock;
1101 }
64bc06bb 1102 }
d0a22a4b
AG
1103
1104 tr = current->journal_info;
1105 if (tr->tr_num_buf_new)
1106 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
d0a22a4b
AG
1107
1108 gfs2_trans_end(sdp);
64bc06bb 1109 }
d0a22a4b
AG
1110
1111 if (gfs2_is_stuffed(ip) || gfs2_is_jdata(ip))
df0db3ec 1112 iomap->page_ops = &gfs2_iomap_page_ops;
64bc06bb
AG
1113 return 0;
1114
1115out_trans_end:
1116 gfs2_trans_end(sdp);
1117out_trans_fail:
1118 if (alloc_required)
1119 gfs2_inplace_release(ip);
1120out_qunlock:
1121 if (alloc_required)
1122 gfs2_quota_unlock(ip);
c26b5aa8 1123out_unlock:
64bc06bb
AG
1124 gfs2_write_unlock(inode);
1125 return ret;
1126}
1127
628e366d
AG
1128static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
1129 unsigned flags, struct iomap *iomap)
1130{
1131 struct gfs2_inode *ip = GFS2_I(inode);
1132 struct metapath mp = { .mp_aheight = 1, };
1133 int ret;
1134
0ed91eca
AG
1135 iomap->flags |= IOMAP_F_BUFFER_HEAD;
1136
628e366d 1137 trace_gfs2_iomap_start(ip, pos, length, flags);
967bcc91 1138 if ((flags & IOMAP_WRITE) && !(flags & IOMAP_DIRECT)) {
c26b5aa8 1139 ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap, &mp);
628e366d
AG
1140 } else {
1141 ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
c26b5aa8 1142
967bcc91
AG
1143 /*
1144 * Silently fall back to buffered I/O for stuffed files or if
1145 * we've hot a hole (see gfs2_file_direct_write).
1146 */
1147 if ((flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT) &&
1148 iomap->type != IOMAP_MAPPED)
1149 ret = -ENOTBLK;
628e366d 1150 }
c26b5aa8 1151 release_metapath(&mp);
628e366d
AG
1152 trace_gfs2_iomap_end(ip, iomap, ret);
1153 return ret;
1154}
1155
64bc06bb
AG
1156static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length,
1157 ssize_t written, unsigned flags, struct iomap *iomap)
1158{
1159 struct gfs2_inode *ip = GFS2_I(inode);
1160 struct gfs2_sbd *sdp = GFS2_SB(inode);
64bc06bb 1161
967bcc91 1162 if ((flags & (IOMAP_WRITE | IOMAP_DIRECT)) != IOMAP_WRITE)
64bc06bb
AG
1163 goto out;
1164
d0a22a4b 1165 if (!gfs2_is_stuffed(ip))
64bc06bb
AG
1166 gfs2_ordered_add_inode(ip);
1167
d0a22a4b 1168 if (inode == sdp->sd_rindex)
64bc06bb 1169 adjust_fs_space(inode);
64bc06bb 1170
64bc06bb
AG
1171 gfs2_inplace_release(ip);
1172
1173 if (length != written && (iomap->flags & IOMAP_F_NEW)) {
1174 /* Deallocate blocks that were just allocated. */
1175 loff_t blockmask = i_blocksize(inode) - 1;
1176 loff_t end = (pos + length) & ~blockmask;
1177
1178 pos = (pos + written + blockmask) & ~blockmask;
1179 if (pos < end) {
1180 truncate_pagecache_range(inode, pos, end - 1);
1181 punch_hole(ip, pos, end - pos);
1182 }
1183 }
1184
1185 if (ip->i_qadata && ip->i_qadata->qa_qd_num)
1186 gfs2_quota_unlock(ip);
706cb549
AG
1187
1188 if (unlikely(!written))
1189 goto out_unlock;
1190
8d3e72a1
AG
1191 if (iomap->flags & IOMAP_F_SIZE_CHANGED)
1192 mark_inode_dirty(inode);
706cb549 1193 set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
64bc06bb 1194
706cb549
AG
1195out_unlock:
1196 gfs2_write_unlock(inode);
64bc06bb 1197out:
64bc06bb
AG
1198 return 0;
1199}
1200
628e366d
AG
1201const struct iomap_ops gfs2_iomap_ops = {
1202 .iomap_begin = gfs2_iomap_begin,
64bc06bb 1203 .iomap_end = gfs2_iomap_end,
628e366d
AG
1204};
1205
3974320c 1206/**
d39d18e0 1207 * gfs2_block_map - Map one or more blocks of an inode to a disk block
3974320c
BP
1208 * @inode: The inode
1209 * @lblock: The logical block number
1210 * @bh_map: The bh to be mapped
1211 * @create: True if its ok to alloc blocks to satify the request
1212 *
d39d18e0
AG
1213 * The size of the requested mapping is defined in bh_map->b_size.
1214 *
1215 * Clears buffer_mapped(bh_map) and leaves bh_map->b_size unchanged
1216 * when @lblock is not mapped. Sets buffer_mapped(bh_map) and
1217 * bh_map->b_size to indicate the size of the mapping when @lblock and
1218 * successive blocks are mapped, up to the requested size.
1219 *
1220 * Sets buffer_boundary() if a read of metadata will be required
1221 * before the next block can be mapped. Sets buffer_new() if new
1222 * blocks were allocated.
3974320c
BP
1223 *
1224 * Returns: errno
1225 */
1226
1227int gfs2_block_map(struct inode *inode, sector_t lblock,
1228 struct buffer_head *bh_map, int create)
1229{
1230 struct gfs2_inode *ip = GFS2_I(inode);
628e366d
AG
1231 loff_t pos = (loff_t)lblock << inode->i_blkbits;
1232 loff_t length = bh_map->b_size;
1233 struct metapath mp = { .mp_aheight = 1, };
1234 struct iomap iomap = { };
1235 int ret;
3974320c
BP
1236
1237 clear_buffer_mapped(bh_map);
1238 clear_buffer_new(bh_map);
1239 clear_buffer_boundary(bh_map);
1240 trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
1241
628e366d
AG
1242 if (create) {
1243 ret = gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, &iomap, &mp);
1244 if (!ret && iomap.type == IOMAP_HOLE)
bb4cb25d 1245 ret = gfs2_iomap_alloc(inode, &iomap, &mp);
628e366d
AG
1246 release_metapath(&mp);
1247 } else {
1248 ret = gfs2_iomap_get(inode, pos, length, 0, &iomap, &mp);
1249 release_metapath(&mp);
3974320c 1250 }
628e366d
AG
1251 if (ret)
1252 goto out;
3974320c
BP
1253
1254 if (iomap.length > bh_map->b_size) {
1255 iomap.length = bh_map->b_size;
7ee66c03 1256 iomap.flags &= ~IOMAP_F_GFS2_BOUNDARY;
5f8bd444 1257 }
3974320c
BP
1258 if (iomap.addr != IOMAP_NULL_ADDR)
1259 map_bh(bh_map, inode->i_sb, iomap.addr >> inode->i_blkbits);
1260 bh_map->b_size = iomap.length;
7ee66c03 1261 if (iomap.flags & IOMAP_F_GFS2_BOUNDARY)
3974320c
BP
1262 set_buffer_boundary(bh_map);
1263 if (iomap.flags & IOMAP_F_NEW)
1264 set_buffer_new(bh_map);
1265
1266out:
1267 trace_gfs2_bmap(ip, bh_map, lblock, create, ret);
1268 return ret;
fd88de56
SW
1269}
1270
941e6d7d
SW
1271/*
1272 * Deprecated: do not use in new code
1273 */
fd88de56
SW
1274int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen)
1275{
23591256 1276 struct buffer_head bh = { .b_state = 0, .b_blocknr = 0 };
7a6bbacb 1277 int ret;
fd88de56
SW
1278 int create = *new;
1279
1280 BUG_ON(!extlen);
1281 BUG_ON(!dblock);
1282 BUG_ON(!new);
1283
47a9a527 1284 bh.b_size = BIT(inode->i_blkbits + (create ? 0 : 5));
e9e1ef2b 1285 ret = gfs2_block_map(inode, lblock, &bh, create);
7a6bbacb
SW
1286 *extlen = bh.b_size >> inode->i_blkbits;
1287 *dblock = bh.b_blocknr;
1288 if (buffer_new(&bh))
1289 *new = 1;
1290 else
1291 *new = 0;
1292 return ret;
b3b94faa
DT
1293}
1294
ba7f7290 1295/**
bdba0d5e 1296 * gfs2_block_zero_range - Deal with zeroing out data
ba7f7290
SW
1297 *
1298 * This is partly borrowed from ext3.
1299 */
bdba0d5e
AG
1300static int gfs2_block_zero_range(struct inode *inode, loff_t from,
1301 unsigned int length)
ba7f7290 1302{
bdba0d5e 1303 struct address_space *mapping = inode->i_mapping;
ba7f7290 1304 struct gfs2_inode *ip = GFS2_I(inode);
09cbfeaf
KS
1305 unsigned long index = from >> PAGE_SHIFT;
1306 unsigned offset = from & (PAGE_SIZE-1);
bdba0d5e 1307 unsigned blocksize, iblock, pos;
ba7f7290
SW
1308 struct buffer_head *bh;
1309 struct page *page;
ba7f7290
SW
1310 int err;
1311
220cca2a 1312 page = find_or_create_page(mapping, index, GFP_NOFS);
ba7f7290
SW
1313 if (!page)
1314 return 0;
1315
1316 blocksize = inode->i_sb->s_blocksize;
09cbfeaf 1317 iblock = index << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits);
ba7f7290
SW
1318
1319 if (!page_has_buffers(page))
1320 create_empty_buffers(page, blocksize, 0);
1321
1322 /* Find the buffer that contains "offset" */
1323 bh = page_buffers(page);
1324 pos = blocksize;
1325 while (offset >= pos) {
1326 bh = bh->b_this_page;
1327 iblock++;
1328 pos += blocksize;
1329 }
1330
1331 err = 0;
1332
1333 if (!buffer_mapped(bh)) {
e9e1ef2b 1334 gfs2_block_map(inode, iblock, bh, 0);
ba7f7290
SW
1335 /* unmapped? It's a hole - nothing to do */
1336 if (!buffer_mapped(bh))
1337 goto unlock;
1338 }
1339
1340 /* Ok, it's mapped. Make sure it's up-to-date */
1341 if (PageUptodate(page))
1342 set_buffer_uptodate(bh);
1343
1344 if (!buffer_uptodate(bh)) {
1345 err = -EIO;
dfec8a14 1346 ll_rw_block(REQ_OP_READ, 0, 1, &bh);
ba7f7290
SW
1347 wait_on_buffer(bh);
1348 /* Uhhuh. Read error. Complain and punt. */
1349 if (!buffer_uptodate(bh))
1350 goto unlock;
1875f2f3 1351 err = 0;
ba7f7290
SW
1352 }
1353
845802b1 1354 if (gfs2_is_jdata(ip))
350a9b0a 1355 gfs2_trans_add_data(ip->i_gl, bh);
845802b1
AG
1356 else
1357 gfs2_ordered_add_inode(ip);
ba7f7290 1358
eebd2aa3 1359 zero_user(page, offset, length);
40bc9a27 1360 mark_buffer_dirty(bh);
ba7f7290
SW
1361unlock:
1362 unlock_page(page);
09cbfeaf 1363 put_page(page);
ba7f7290
SW
1364 return err;
1365}
1366
c62baf65
FF
1367#define GFS2_JTRUNC_REVOKES 8192
1368
fa731fc4
SW
1369/**
1370 * gfs2_journaled_truncate - Wrapper for truncate_pagecache for jdata files
1371 * @inode: The inode being truncated
1372 * @oldsize: The original (larger) size
1373 * @newsize: The new smaller size
1374 *
1375 * With jdata files, we have to journal a revoke for each block which is
1376 * truncated. As a result, we need to split this into separate transactions
1377 * if the number of pages being truncated gets too large.
1378 */
1379
fa731fc4
SW
1380static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize)
1381{
1382 struct gfs2_sbd *sdp = GFS2_SB(inode);
1383 u64 max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
1384 u64 chunk;
1385 int error;
1386
1387 while (oldsize != newsize) {
e7fdf004
AG
1388 struct gfs2_trans *tr;
1389 unsigned int offs;
1390
fa731fc4
SW
1391 chunk = oldsize - newsize;
1392 if (chunk > max_chunk)
1393 chunk = max_chunk;
e7fdf004
AG
1394
1395 offs = oldsize & ~PAGE_MASK;
1396 if (offs && chunk > PAGE_SIZE)
1397 chunk = offs + ((chunk - offs) & PAGE_MASK);
1398
7caef267 1399 truncate_pagecache(inode, oldsize - chunk);
fa731fc4 1400 oldsize -= chunk;
e7fdf004
AG
1401
1402 tr = current->journal_info;
1403 if (!test_bit(TR_TOUCHED, &tr->tr_flags))
1404 continue;
1405
fa731fc4
SW
1406 gfs2_trans_end(sdp);
1407 error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
1408 if (error)
1409 return error;
1410 }
1411
1412 return 0;
1413}
1414
8b5860a3 1415static int trunc_start(struct inode *inode, u64 newsize)
b3b94faa 1416{
ff8f33c8
SW
1417 struct gfs2_inode *ip = GFS2_I(inode);
1418 struct gfs2_sbd *sdp = GFS2_SB(inode);
80990f40 1419 struct buffer_head *dibh = NULL;
b3b94faa 1420 int journaled = gfs2_is_jdata(ip);
8b5860a3 1421 u64 oldsize = inode->i_size;
b3b94faa
DT
1422 int error;
1423
fa731fc4
SW
1424 if (journaled)
1425 error = gfs2_trans_begin(sdp, RES_DINODE + RES_JDATA, GFS2_JTRUNC_REVOKES);
1426 else
1427 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
b3b94faa
DT
1428 if (error)
1429 return error;
1430
1431 error = gfs2_meta_inode_buffer(ip, &dibh);
1432 if (error)
1433 goto out;
1434
350a9b0a 1435 gfs2_trans_add_meta(ip->i_gl, dibh);
ff8f33c8 1436
b3b94faa 1437 if (gfs2_is_stuffed(ip)) {
ff8f33c8 1438 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize);
b3b94faa 1439 } else {
bdba0d5e
AG
1440 unsigned int blocksize = i_blocksize(inode);
1441 unsigned int offs = newsize & (blocksize - 1);
1442 if (offs) {
1443 error = gfs2_block_zero_range(inode, newsize,
1444 blocksize - offs);
ff8f33c8 1445 if (error)
80990f40 1446 goto out;
b3b94faa 1447 }
ff8f33c8 1448 ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG;
b3b94faa
DT
1449 }
1450
ff8f33c8 1451 i_size_write(inode, newsize);
078cd827 1452 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
ff8f33c8 1453 gfs2_dinode_out(ip, dibh->b_data);
b3b94faa 1454
fa731fc4
SW
1455 if (journaled)
1456 error = gfs2_journaled_truncate(inode, oldsize, newsize);
1457 else
7caef267 1458 truncate_pagecache(inode, newsize);
fa731fc4 1459
a91ea69f 1460out:
80990f40
AG
1461 brelse(dibh);
1462 if (current->journal_info)
1463 gfs2_trans_end(sdp);
b3b94faa
DT
1464 return error;
1465}
1466
628e366d
AG
1467int gfs2_iomap_get_alloc(struct inode *inode, loff_t pos, loff_t length,
1468 struct iomap *iomap)
1469{
1470 struct metapath mp = { .mp_aheight = 1, };
1471 int ret;
1472
1473 ret = gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, iomap, &mp);
1474 if (!ret && iomap->type == IOMAP_HOLE)
bb4cb25d 1475 ret = gfs2_iomap_alloc(inode, iomap, &mp);
628e366d
AG
1476 release_metapath(&mp);
1477 return ret;
1478}
1479
d552a2b9
BP
1480/**
1481 * sweep_bh_for_rgrps - find an rgrp in a meta buffer and free blocks therein
1482 * @ip: inode
1483 * @rg_gh: holder of resource group glock
5cf26b1e
AG
1484 * @bh: buffer head to sweep
1485 * @start: starting point in bh
1486 * @end: end point in bh
1487 * @meta: true if bh points to metadata (rather than data)
d552a2b9 1488 * @btotal: place to keep count of total blocks freed
d552a2b9
BP
1489 *
1490 * We sweep a metadata buffer (provided by the metapath) for blocks we need to
1491 * free, and free them all. However, we do it one rgrp at a time. If this
1492 * block has references to multiple rgrps, we break it into individual
1493 * transactions. This allows other processes to use the rgrps while we're
1494 * focused on a single one, for better concurrency / performance.
1495 * At every transaction boundary, we rewrite the inode into the journal.
1496 * That way the bitmaps are kept consistent with the inode and we can recover
1497 * if we're interrupted by power-outages.
1498 *
1499 * Returns: 0, or return code if an error occurred.
1500 * *btotal has the total number of blocks freed
1501 */
1502static int sweep_bh_for_rgrps(struct gfs2_inode *ip, struct gfs2_holder *rd_gh,
5cf26b1e
AG
1503 struct buffer_head *bh, __be64 *start, __be64 *end,
1504 bool meta, u32 *btotal)
b3b94faa 1505{
9b8c81d1 1506 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
d552a2b9
BP
1507 struct gfs2_rgrpd *rgd;
1508 struct gfs2_trans *tr;
5cf26b1e 1509 __be64 *p;
d552a2b9
BP
1510 int blks_outside_rgrp;
1511 u64 bn, bstart, isize_blks;
1512 s64 blen; /* needs to be s64 or gfs2_add_inode_blocks breaks */
d552a2b9
BP
1513 int ret = 0;
1514 bool buf_in_tr = false; /* buffer was added to transaction */
1515
d552a2b9 1516more_rgrps:
5cf26b1e
AG
1517 rgd = NULL;
1518 if (gfs2_holder_initialized(rd_gh)) {
1519 rgd = gfs2_glock2rgrp(rd_gh->gh_gl);
1520 gfs2_assert_withdraw(sdp,
1521 gfs2_glock_is_locked_by_me(rd_gh->gh_gl));
1522 }
d552a2b9
BP
1523 blks_outside_rgrp = 0;
1524 bstart = 0;
1525 blen = 0;
d552a2b9 1526
5cf26b1e 1527 for (p = start; p < end; p++) {
d552a2b9
BP
1528 if (!*p)
1529 continue;
1530 bn = be64_to_cpu(*p);
5cf26b1e
AG
1531
1532 if (rgd) {
1533 if (!rgrp_contains_block(rgd, bn)) {
1534 blks_outside_rgrp++;
1535 continue;
1536 }
d552a2b9 1537 } else {
90bcab99 1538 rgd = gfs2_blk2rgrpd(sdp, bn, true);
5cf26b1e
AG
1539 if (unlikely(!rgd)) {
1540 ret = -EIO;
1541 goto out;
1542 }
d552a2b9
BP
1543 ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
1544 0, rd_gh);
1545 if (ret)
1546 goto out;
1547
1548 /* Must be done with the rgrp glock held: */
1549 if (gfs2_rs_active(&ip->i_res) &&
1550 rgd == ip->i_res.rs_rbm.rgd)
1551 gfs2_rs_deltree(&ip->i_res);
1552 }
1553
d552a2b9
BP
1554 /* The size of our transactions will be unknown until we
1555 actually process all the metadata blocks that relate to
1556 the rgrp. So we estimate. We know it can't be more than
1557 the dinode's i_blocks and we don't want to exceed the
1558 journal flush threshold, sd_log_thresh2. */
1559 if (current->journal_info == NULL) {
1560 unsigned int jblocks_rqsted, revokes;
1561
1562 jblocks_rqsted = rgd->rd_length + RES_DINODE +
1563 RES_INDIRECT;
1564 isize_blks = gfs2_get_inode_blocks(&ip->i_inode);
1565 if (isize_blks > atomic_read(&sdp->sd_log_thresh2))
1566 jblocks_rqsted +=
1567 atomic_read(&sdp->sd_log_thresh2);
1568 else
1569 jblocks_rqsted += isize_blks;
1570 revokes = jblocks_rqsted;
1571 if (meta)
5cf26b1e 1572 revokes += end - start;
d552a2b9
BP
1573 else if (ip->i_depth)
1574 revokes += sdp->sd_inptrs;
1575 ret = gfs2_trans_begin(sdp, jblocks_rqsted, revokes);
1576 if (ret)
1577 goto out_unlock;
1578 down_write(&ip->i_rw_mutex);
1579 }
1580 /* check if we will exceed the transaction blocks requested */
1581 tr = current->journal_info;
1582 if (tr->tr_num_buf_new + RES_STATFS +
1583 RES_QUOTA >= atomic_read(&sdp->sd_log_thresh2)) {
1584 /* We set blks_outside_rgrp to ensure the loop will
1585 be repeated for the same rgrp, but with a new
1586 transaction. */
1587 blks_outside_rgrp++;
1588 /* This next part is tricky. If the buffer was added
1589 to the transaction, we've already set some block
1590 pointers to 0, so we better follow through and free
1591 them, or we will introduce corruption (so break).
1592 This may be impossible, or at least rare, but I
1593 decided to cover the case regardless.
1594
1595 If the buffer was not added to the transaction
1596 (this call), doing so would exceed our transaction
1597 size, so we need to end the transaction and start a
1598 new one (so goto). */
1599
1600 if (buf_in_tr)
1601 break;
1602 goto out_unlock;
1603 }
1604
1605 gfs2_trans_add_meta(ip->i_gl, bh);
1606 buf_in_tr = true;
1607 *p = 0;
1608 if (bstart + blen == bn) {
1609 blen++;
1610 continue;
1611 }
1612 if (bstart) {
0ddeded4 1613 __gfs2_free_blocks(ip, rgd, bstart, (u32)blen, meta);
d552a2b9
BP
1614 (*btotal) += blen;
1615 gfs2_add_inode_blocks(&ip->i_inode, -blen);
1616 }
1617 bstart = bn;
1618 blen = 1;
1619 }
1620 if (bstart) {
0ddeded4 1621 __gfs2_free_blocks(ip, rgd, bstart, (u32)blen, meta);
d552a2b9
BP
1622 (*btotal) += blen;
1623 gfs2_add_inode_blocks(&ip->i_inode, -blen);
1624 }
1625out_unlock:
1626 if (!ret && blks_outside_rgrp) { /* If buffer still has non-zero blocks
1627 outside the rgrp we just processed,
1628 do it all over again. */
1629 if (current->journal_info) {
5cf26b1e
AG
1630 struct buffer_head *dibh;
1631
1632 ret = gfs2_meta_inode_buffer(ip, &dibh);
1633 if (ret)
1634 goto out;
d552a2b9
BP
1635
1636 /* Every transaction boundary, we rewrite the dinode
1637 to keep its di_blocks current in case of failure. */
1638 ip->i_inode.i_mtime = ip->i_inode.i_ctime =
b32c8c76 1639 current_time(&ip->i_inode);
d552a2b9
BP
1640 gfs2_trans_add_meta(ip->i_gl, dibh);
1641 gfs2_dinode_out(ip, dibh->b_data);
5cf26b1e 1642 brelse(dibh);
d552a2b9
BP
1643 up_write(&ip->i_rw_mutex);
1644 gfs2_trans_end(sdp);
1645 }
1646 gfs2_glock_dq_uninit(rd_gh);
1647 cond_resched();
1648 goto more_rgrps;
1649 }
1650out:
1651 return ret;
1652}
1653
10d2cf94
AG
1654static bool mp_eq_to_hgt(struct metapath *mp, __u16 *list, unsigned int h)
1655{
1656 if (memcmp(mp->mp_list, list, h * sizeof(mp->mp_list[0])))
1657 return false;
1658 return true;
1659}
1660
d552a2b9
BP
1661/**
1662 * find_nonnull_ptr - find a non-null pointer given a metapath and height
d552a2b9
BP
1663 * @mp: starting metapath
1664 * @h: desired height to search
1665 *
10d2cf94 1666 * Assumes the metapath is valid (with buffers) out to height h.
d552a2b9
BP
1667 * Returns: true if a non-null pointer was found in the metapath buffer
1668 * false if all remaining pointers are NULL in the buffer
1669 */
1670static bool find_nonnull_ptr(struct gfs2_sbd *sdp, struct metapath *mp,
10d2cf94
AG
1671 unsigned int h,
1672 __u16 *end_list, unsigned int end_aligned)
d552a2b9 1673{
10d2cf94
AG
1674 struct buffer_head *bh = mp->mp_bh[h];
1675 __be64 *first, *ptr, *end;
1676
1677 first = metaptr1(h, mp);
1678 ptr = first + mp->mp_list[h];
1679 end = (__be64 *)(bh->b_data + bh->b_size);
1680 if (end_list && mp_eq_to_hgt(mp, end_list, h)) {
1681 bool keep_end = h < end_aligned;
1682 end = first + end_list[h] + keep_end;
1683 }
d552a2b9 1684
10d2cf94 1685 while (ptr < end) {
c4a9d189 1686 if (*ptr) { /* if we have a non-null pointer */
10d2cf94 1687 mp->mp_list[h] = ptr - first;
c4a9d189
BP
1688 h++;
1689 if (h < GFS2_MAX_META_HEIGHT)
10d2cf94 1690 mp->mp_list[h] = 0;
d552a2b9 1691 return true;
c4a9d189 1692 }
10d2cf94 1693 ptr++;
d552a2b9 1694 }
10d2cf94 1695 return false;
d552a2b9
BP
1696}
1697
1698enum dealloc_states {
1699 DEALLOC_MP_FULL = 0, /* Strip a metapath with all buffers read in */
1700 DEALLOC_MP_LOWER = 1, /* lower the metapath strip height */
1701 DEALLOC_FILL_MP = 2, /* Fill in the metapath to the given height. */
1702 DEALLOC_DONE = 3, /* process complete */
1703};
b3b94faa 1704
5cf26b1e
AG
1705static inline void
1706metapointer_range(struct metapath *mp, int height,
1707 __u16 *start_list, unsigned int start_aligned,
10d2cf94 1708 __u16 *end_list, unsigned int end_aligned,
5cf26b1e
AG
1709 __be64 **start, __be64 **end)
1710{
1711 struct buffer_head *bh = mp->mp_bh[height];
1712 __be64 *first;
1713
1714 first = metaptr1(height, mp);
1715 *start = first;
1716 if (mp_eq_to_hgt(mp, start_list, height)) {
1717 bool keep_start = height < start_aligned;
1718 *start = first + start_list[height] + keep_start;
1719 }
1720 *end = (__be64 *)(bh->b_data + bh->b_size);
10d2cf94
AG
1721 if (end_list && mp_eq_to_hgt(mp, end_list, height)) {
1722 bool keep_end = height < end_aligned;
1723 *end = first + end_list[height] + keep_end;
1724 }
1725}
1726
1727static inline bool walk_done(struct gfs2_sbd *sdp,
1728 struct metapath *mp, int height,
1729 __u16 *end_list, unsigned int end_aligned)
1730{
1731 __u16 end;
1732
1733 if (end_list) {
1734 bool keep_end = height < end_aligned;
1735 if (!mp_eq_to_hgt(mp, end_list, height))
1736 return false;
1737 end = end_list[height] + keep_end;
1738 } else
1739 end = (height > 0) ? sdp->sd_inptrs : sdp->sd_diptrs;
1740 return mp->mp_list[height] >= end;
5cf26b1e
AG
1741}
1742
d552a2b9 1743/**
10d2cf94 1744 * punch_hole - deallocate blocks in a file
d552a2b9 1745 * @ip: inode to truncate
10d2cf94
AG
1746 * @offset: the start of the hole
1747 * @length: the size of the hole (or 0 for truncate)
1748 *
1749 * Punch a hole into a file or truncate a file at a given position. This
1750 * function operates in whole blocks (@offset and @length are rounded
1751 * accordingly); partially filled blocks must be cleared otherwise.
d552a2b9 1752 *
10d2cf94
AG
1753 * This function works from the bottom up, and from the right to the left. In
1754 * other words, it strips off the highest layer (data) before stripping any of
1755 * the metadata. Doing it this way is best in case the operation is interrupted
1756 * by power failure, etc. The dinode is rewritten in every transaction to
1757 * guarantee integrity.
d552a2b9 1758 */
10d2cf94 1759static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length)
d552a2b9
BP
1760{
1761 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
bb491ce6 1762 u64 maxsize = sdp->sd_heightsize[ip->i_height];
10d2cf94 1763 struct metapath mp = {};
d552a2b9
BP
1764 struct buffer_head *dibh, *bh;
1765 struct gfs2_holder rd_gh;
cb7f0903 1766 unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift;
10d2cf94
AG
1767 u64 lblock = (offset + (1 << bsize_shift) - 1) >> bsize_shift;
1768 __u16 start_list[GFS2_MAX_META_HEIGHT];
1769 __u16 __end_list[GFS2_MAX_META_HEIGHT], *end_list = NULL;
4e56a641 1770 unsigned int start_aligned, uninitialized_var(end_aligned);
d552a2b9
BP
1771 unsigned int strip_h = ip->i_height - 1;
1772 u32 btotal = 0;
1773 int ret, state;
1774 int mp_h; /* metapath buffers are read in to this height */
d552a2b9 1775 u64 prev_bnr = 0;
5cf26b1e 1776 __be64 *start, *end;
b3b94faa 1777
bb491ce6
AG
1778 if (offset >= maxsize) {
1779 /*
1780 * The starting point lies beyond the allocated meta-data;
1781 * there are no blocks do deallocate.
1782 */
1783 return 0;
1784 }
1785
10d2cf94
AG
1786 /*
1787 * The start position of the hole is defined by lblock, start_list, and
1788 * start_aligned. The end position of the hole is defined by lend,
1789 * end_list, and end_aligned.
1790 *
1791 * start_aligned and end_aligned define down to which height the start
1792 * and end positions are aligned to the metadata tree (i.e., the
1793 * position is a multiple of the metadata granularity at the height
1794 * above). This determines at which heights additional meta pointers
1795 * needs to be preserved for the remaining data.
1796 */
b3b94faa 1797
10d2cf94 1798 if (length) {
10d2cf94
AG
1799 u64 end_offset = offset + length;
1800 u64 lend;
1801
1802 /*
1803 * Clip the end at the maximum file size for the given height:
1804 * that's how far the metadata goes; files bigger than that
1805 * will have additional layers of indirection.
1806 */
1807 if (end_offset > maxsize)
1808 end_offset = maxsize;
1809 lend = end_offset >> bsize_shift;
1810
1811 if (lblock >= lend)
1812 return 0;
1813
1814 find_metapath(sdp, lend, &mp, ip->i_height);
1815 end_list = __end_list;
1816 memcpy(end_list, mp.mp_list, sizeof(mp.mp_list));
1817
1818 for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) {
1819 if (end_list[mp_h])
1820 break;
1821 }
1822 end_aligned = mp_h;
1823 }
1824
1825 find_metapath(sdp, lblock, &mp, ip->i_height);
cb7f0903
AG
1826 memcpy(start_list, mp.mp_list, sizeof(start_list));
1827
cb7f0903
AG
1828 for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) {
1829 if (start_list[mp_h])
1830 break;
1831 }
1832 start_aligned = mp_h;
d552a2b9
BP
1833
1834 ret = gfs2_meta_inode_buffer(ip, &dibh);
1835 if (ret)
1836 return ret;
b3b94faa 1837
d552a2b9
BP
1838 mp.mp_bh[0] = dibh;
1839 ret = lookup_metapath(ip, &mp);
e8b43fe0
AG
1840 if (ret)
1841 goto out_metapath;
c3ce5aa9
AG
1842
1843 /* issue read-ahead on metadata */
5cf26b1e
AG
1844 for (mp_h = 0; mp_h < mp.mp_aheight - 1; mp_h++) {
1845 metapointer_range(&mp, mp_h, start_list, start_aligned,
10d2cf94 1846 end_list, end_aligned, &start, &end);
5cf26b1e
AG
1847 gfs2_metapath_ra(ip->i_gl, start, end);
1848 }
c3ce5aa9 1849
e8b43fe0 1850 if (mp.mp_aheight == ip->i_height)
d552a2b9
BP
1851 state = DEALLOC_MP_FULL; /* We have a complete metapath */
1852 else
1853 state = DEALLOC_FILL_MP; /* deal with partial metapath */
b3b94faa 1854
d552a2b9
BP
1855 ret = gfs2_rindex_update(sdp);
1856 if (ret)
1857 goto out_metapath;
1858
1859 ret = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
1860 if (ret)
1861 goto out_metapath;
1862 gfs2_holder_mark_uninitialized(&rd_gh);
1863
1864 mp_h = strip_h;
1865
1866 while (state != DEALLOC_DONE) {
1867 switch (state) {
1868 /* Truncate a full metapath at the given strip height.
1869 * Note that strip_h == mp_h in order to be in this state. */
1870 case DEALLOC_MP_FULL:
d552a2b9
BP
1871 bh = mp.mp_bh[mp_h];
1872 gfs2_assert_withdraw(sdp, bh);
1873 if (gfs2_assert_withdraw(sdp,
1874 prev_bnr != bh->b_blocknr)) {
f29e62ee
BP
1875 fs_emerg(sdp, "inode %llu, block:%llu, i_h:%u,"
1876 "s_h:%u, mp_h:%u\n",
d552a2b9
BP
1877 (unsigned long long)ip->i_no_addr,
1878 prev_bnr, ip->i_height, strip_h, mp_h);
1879 }
1880 prev_bnr = bh->b_blocknr;
cb7f0903 1881
5cf26b1e
AG
1882 if (gfs2_metatype_check(sdp, bh,
1883 (mp_h ? GFS2_METATYPE_IN :
1884 GFS2_METATYPE_DI))) {
1885 ret = -EIO;
1886 goto out;
1887 }
1888
10d2cf94
AG
1889 /*
1890 * Below, passing end_aligned as 0 gives us the
1891 * metapointer range excluding the end point: the end
1892 * point is the first metapath we must not deallocate!
1893 */
1894
5cf26b1e 1895 metapointer_range(&mp, mp_h, start_list, start_aligned,
10d2cf94 1896 end_list, 0 /* end_aligned */,
5cf26b1e
AG
1897 &start, &end);
1898 ret = sweep_bh_for_rgrps(ip, &rd_gh, mp.mp_bh[mp_h],
1899 start, end,
1900 mp_h != ip->i_height - 1,
1901 &btotal);
cb7f0903 1902
d552a2b9
BP
1903 /* If we hit an error or just swept dinode buffer,
1904 just exit. */
1905 if (ret || !mp_h) {
1906 state = DEALLOC_DONE;
1907 break;
1908 }
1909 state = DEALLOC_MP_LOWER;
1910 break;
1911
1912 /* lower the metapath strip height */
1913 case DEALLOC_MP_LOWER:
1914 /* We're done with the current buffer, so release it,
1915 unless it's the dinode buffer. Then back up to the
1916 previous pointer. */
1917 if (mp_h) {
1918 brelse(mp.mp_bh[mp_h]);
1919 mp.mp_bh[mp_h] = NULL;
1920 }
1921 /* If we can't get any lower in height, we've stripped
1922 off all we can. Next step is to back up and start
1923 stripping the previous level of metadata. */
1924 if (mp_h == 0) {
1925 strip_h--;
cb7f0903 1926 memcpy(mp.mp_list, start_list, sizeof(start_list));
d552a2b9
BP
1927 mp_h = strip_h;
1928 state = DEALLOC_FILL_MP;
1929 break;
1930 }
1931 mp.mp_list[mp_h] = 0;
1932 mp_h--; /* search one metadata height down */
d552a2b9 1933 mp.mp_list[mp_h]++;
10d2cf94
AG
1934 if (walk_done(sdp, &mp, mp_h, end_list, end_aligned))
1935 break;
d552a2b9
BP
1936 /* Here we've found a part of the metapath that is not
1937 * allocated. We need to search at that height for the
1938 * next non-null pointer. */
10d2cf94 1939 if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned)) {
d552a2b9
BP
1940 state = DEALLOC_FILL_MP;
1941 mp_h++;
1942 }
1943 /* No more non-null pointers at this height. Back up
1944 to the previous height and try again. */
1945 break; /* loop around in the same state */
1946
1947 /* Fill the metapath with buffers to the given height. */
1948 case DEALLOC_FILL_MP:
1949 /* Fill the buffers out to the current height. */
1950 ret = fillup_metapath(ip, &mp, mp_h);
c3ce5aa9 1951 if (ret < 0)
d552a2b9 1952 goto out;
c3ce5aa9 1953
e7445ced
AG
1954 /* On the first pass, issue read-ahead on metadata. */
1955 if (mp.mp_aheight > 1 && strip_h == ip->i_height - 1) {
1956 unsigned int height = mp.mp_aheight - 1;
1957
1958 /* No read-ahead for data blocks. */
1959 if (mp.mp_aheight - 1 == strip_h)
1960 height--;
1961
1962 for (; height >= mp.mp_aheight - ret; height--) {
1963 metapointer_range(&mp, height,
5cf26b1e 1964 start_list, start_aligned,
10d2cf94 1965 end_list, end_aligned,
5cf26b1e
AG
1966 &start, &end);
1967 gfs2_metapath_ra(ip->i_gl, start, end);
1968 }
c3ce5aa9 1969 }
d552a2b9
BP
1970
1971 /* If buffers found for the entire strip height */
e8b43fe0 1972 if (mp.mp_aheight - 1 == strip_h) {
d552a2b9
BP
1973 state = DEALLOC_MP_FULL;
1974 break;
1975 }
e8b43fe0
AG
1976 if (mp.mp_aheight < ip->i_height) /* We have a partial height */
1977 mp_h = mp.mp_aheight - 1;
d552a2b9
BP
1978
1979 /* If we find a non-null block pointer, crawl a bit
1980 higher up in the metapath and try again, otherwise
1981 we need to look lower for a new starting point. */
10d2cf94 1982 if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned))
d552a2b9
BP
1983 mp_h++;
1984 else
1985 state = DEALLOC_MP_LOWER;
b3b94faa 1986 break;
d552a2b9 1987 }
b3b94faa
DT
1988 }
1989
d552a2b9
BP
1990 if (btotal) {
1991 if (current->journal_info == NULL) {
1992 ret = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS +
1993 RES_QUOTA, 0);
1994 if (ret)
1995 goto out;
1996 down_write(&ip->i_rw_mutex);
1997 }
1998 gfs2_statfs_change(sdp, 0, +btotal, 0);
1999 gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid,
2000 ip->i_inode.i_gid);
b32c8c76 2001 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
d552a2b9
BP
2002 gfs2_trans_add_meta(ip->i_gl, dibh);
2003 gfs2_dinode_out(ip, dibh->b_data);
2004 up_write(&ip->i_rw_mutex);
2005 gfs2_trans_end(sdp);
2006 }
b3b94faa 2007
d552a2b9
BP
2008out:
2009 if (gfs2_holder_initialized(&rd_gh))
2010 gfs2_glock_dq_uninit(&rd_gh);
2011 if (current->journal_info) {
2012 up_write(&ip->i_rw_mutex);
2013 gfs2_trans_end(sdp);
2014 cond_resched();
2015 }
2016 gfs2_quota_unhold(ip);
2017out_metapath:
2018 release_metapath(&mp);
2019 return ret;
b3b94faa
DT
2020}
2021
2022static int trunc_end(struct gfs2_inode *ip)
2023{
feaa7bba 2024 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
b3b94faa
DT
2025 struct buffer_head *dibh;
2026 int error;
2027
2028 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
2029 if (error)
2030 return error;
2031
2032 down_write(&ip->i_rw_mutex);
2033
2034 error = gfs2_meta_inode_buffer(ip, &dibh);
2035 if (error)
2036 goto out;
2037
a2e0f799 2038 if (!i_size_read(&ip->i_inode)) {
ecc30c79 2039 ip->i_height = 0;
ce276b06 2040 ip->i_goal = ip->i_no_addr;
b3b94faa 2041 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
45138990 2042 gfs2_ordered_del_inode(ip);
b3b94faa 2043 }
078cd827 2044 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
383f01fb 2045 ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG;
b3b94faa 2046
350a9b0a 2047 gfs2_trans_add_meta(ip->i_gl, dibh);
539e5d6b 2048 gfs2_dinode_out(ip, dibh->b_data);
b3b94faa
DT
2049 brelse(dibh);
2050
a91ea69f 2051out:
b3b94faa 2052 up_write(&ip->i_rw_mutex);
b3b94faa 2053 gfs2_trans_end(sdp);
b3b94faa
DT
2054 return error;
2055}
2056
2057/**
2058 * do_shrink - make a file smaller
ff8f33c8 2059 * @inode: the inode
ff8f33c8 2060 * @newsize: the size to make the file
b3b94faa 2061 *
ff8f33c8
SW
2062 * Called with an exclusive lock on @inode. The @size must
2063 * be equal to or smaller than the current inode size.
b3b94faa
DT
2064 *
2065 * Returns: errno
2066 */
2067
8b5860a3 2068static int do_shrink(struct inode *inode, u64 newsize)
b3b94faa 2069{
ff8f33c8 2070 struct gfs2_inode *ip = GFS2_I(inode);
b3b94faa
DT
2071 int error;
2072
8b5860a3 2073 error = trunc_start(inode, newsize);
b3b94faa
DT
2074 if (error < 0)
2075 return error;
ff8f33c8 2076 if (gfs2_is_stuffed(ip))
b3b94faa
DT
2077 return 0;
2078
10d2cf94 2079 error = punch_hole(ip, newsize, 0);
ff8f33c8 2080 if (error == 0)
b3b94faa
DT
2081 error = trunc_end(ip);
2082
2083 return error;
2084}
2085
ff8f33c8 2086void gfs2_trim_blocks(struct inode *inode)
a13b8c5f 2087{
ff8f33c8
SW
2088 int ret;
2089
8b5860a3 2090 ret = do_shrink(inode, inode->i_size);
ff8f33c8
SW
2091 WARN_ON(ret != 0);
2092}
2093
2094/**
2095 * do_grow - Touch and update inode size
2096 * @inode: The inode
2097 * @size: The new size
2098 *
2099 * This function updates the timestamps on the inode and
2100 * may also increase the size of the inode. This function
2101 * must not be called with @size any smaller than the current
2102 * inode size.
2103 *
2104 * Although it is not strictly required to unstuff files here,
2105 * earlier versions of GFS2 have a bug in the stuffed file reading
2106 * code which will result in a buffer overrun if the size is larger
2107 * than the max stuffed file size. In order to prevent this from
25985edc 2108 * occurring, such files are unstuffed, but in other cases we can
ff8f33c8
SW
2109 * just update the inode size directly.
2110 *
2111 * Returns: 0 on success, or -ve on error
2112 */
2113
2114static int do_grow(struct inode *inode, u64 size)
2115{
2116 struct gfs2_inode *ip = GFS2_I(inode);
2117 struct gfs2_sbd *sdp = GFS2_SB(inode);
7b9cff46 2118 struct gfs2_alloc_parms ap = { .target = 1, };
a13b8c5f
WC
2119 struct buffer_head *dibh;
2120 int error;
2f7ee358 2121 int unstuff = 0;
a13b8c5f 2122
235628c5 2123 if (gfs2_is_stuffed(ip) && size > gfs2_max_stuffed_size(ip)) {
b8fbf471 2124 error = gfs2_quota_lock_check(ip, &ap);
ff8f33c8 2125 if (error)
5407e242 2126 return error;
ff8f33c8 2127
7b9cff46 2128 error = gfs2_inplace_reserve(ip, &ap);
ff8f33c8
SW
2129 if (error)
2130 goto do_grow_qunlock;
2f7ee358 2131 unstuff = 1;
ff8f33c8
SW
2132 }
2133
a01aedfe 2134 error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT +
bc020561
BP
2135 (unstuff &&
2136 gfs2_is_jdata(ip) ? RES_JDATA : 0) +
a01aedfe
BP
2137 (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF ?
2138 0 : RES_QUOTA), 0);
a13b8c5f 2139 if (error)
ff8f33c8 2140 goto do_grow_release;
a13b8c5f 2141
2f7ee358 2142 if (unstuff) {
ff8f33c8
SW
2143 error = gfs2_unstuff_dinode(ip, NULL);
2144 if (error)
2145 goto do_end_trans;
2146 }
a13b8c5f
WC
2147
2148 error = gfs2_meta_inode_buffer(ip, &dibh);
2149 if (error)
ff8f33c8 2150 goto do_end_trans;
a13b8c5f 2151
ff8f33c8 2152 i_size_write(inode, size);
078cd827 2153 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
350a9b0a 2154 gfs2_trans_add_meta(ip->i_gl, dibh);
a13b8c5f
WC
2155 gfs2_dinode_out(ip, dibh->b_data);
2156 brelse(dibh);
2157
ff8f33c8 2158do_end_trans:
a13b8c5f 2159 gfs2_trans_end(sdp);
ff8f33c8 2160do_grow_release:
2f7ee358 2161 if (unstuff) {
ff8f33c8
SW
2162 gfs2_inplace_release(ip);
2163do_grow_qunlock:
2164 gfs2_quota_unlock(ip);
ff8f33c8 2165 }
a13b8c5f
WC
2166 return error;
2167}
2168
b3b94faa 2169/**
ff8f33c8
SW
2170 * gfs2_setattr_size - make a file a given size
2171 * @inode: the inode
2172 * @newsize: the size to make the file
b3b94faa 2173 *
ff8f33c8 2174 * The file size can grow, shrink, or stay the same size. This
3e7aafc3 2175 * is called holding i_rwsem and an exclusive glock on the inode
ff8f33c8 2176 * in question.
b3b94faa
DT
2177 *
2178 * Returns: errno
2179 */
2180
ff8f33c8 2181int gfs2_setattr_size(struct inode *inode, u64 newsize)
b3b94faa 2182{
af5c2697 2183 struct gfs2_inode *ip = GFS2_I(inode);
ff8f33c8 2184 int ret;
b3b94faa 2185
ff8f33c8 2186 BUG_ON(!S_ISREG(inode->i_mode));
b3b94faa 2187
ff8f33c8
SW
2188 ret = inode_newsize_ok(inode, newsize);
2189 if (ret)
2190 return ret;
b3b94faa 2191
562c72aa
CH
2192 inode_dio_wait(inode);
2193
b54e9a0b 2194 ret = gfs2_rsqa_alloc(ip);
d2b47cfb 2195 if (ret)
2b3dcf35 2196 goto out;
d2b47cfb 2197
8b5860a3 2198 if (newsize >= inode->i_size) {
2b3dcf35
BP
2199 ret = do_grow(inode, newsize);
2200 goto out;
2201 }
ff8f33c8 2202
8b5860a3 2203 ret = do_shrink(inode, newsize);
2b3dcf35 2204out:
a097dc7e 2205 gfs2_rsqa_delete(ip, NULL);
2b3dcf35 2206 return ret;
b3b94faa
DT
2207}
2208
2209int gfs2_truncatei_resume(struct gfs2_inode *ip)
2210{
2211 int error;
10d2cf94 2212 error = punch_hole(ip, i_size_read(&ip->i_inode), 0);
b3b94faa
DT
2213 if (!error)
2214 error = trunc_end(ip);
2215 return error;
2216}
2217
2218int gfs2_file_dealloc(struct gfs2_inode *ip)
2219{
10d2cf94 2220 return punch_hole(ip, 0, 0);
b3b94faa
DT
2221}
2222
b50f227b
SW
2223/**
2224 * gfs2_free_journal_extents - Free cached journal bmap info
2225 * @jd: The journal
2226 *
2227 */
2228
2229void gfs2_free_journal_extents(struct gfs2_jdesc *jd)
2230{
2231 struct gfs2_journal_extent *jext;
2232
2233 while(!list_empty(&jd->extent_list)) {
2234 jext = list_entry(jd->extent_list.next, struct gfs2_journal_extent, list);
2235 list_del(&jext->list);
2236 kfree(jext);
2237 }
2238}
2239
2240/**
2241 * gfs2_add_jextent - Add or merge a new extent to extent cache
2242 * @jd: The journal descriptor
2243 * @lblock: The logical block at start of new extent
c62baf65 2244 * @dblock: The physical block at start of new extent
b50f227b
SW
2245 * @blocks: Size of extent in fs blocks
2246 *
2247 * Returns: 0 on success or -ENOMEM
2248 */
2249
2250static int gfs2_add_jextent(struct gfs2_jdesc *jd, u64 lblock, u64 dblock, u64 blocks)
2251{
2252 struct gfs2_journal_extent *jext;
2253
2254 if (!list_empty(&jd->extent_list)) {
2255 jext = list_entry(jd->extent_list.prev, struct gfs2_journal_extent, list);
2256 if ((jext->dblock + jext->blocks) == dblock) {
2257 jext->blocks += blocks;
2258 return 0;
2259 }
2260 }
2261
2262 jext = kzalloc(sizeof(struct gfs2_journal_extent), GFP_NOFS);
2263 if (jext == NULL)
2264 return -ENOMEM;
2265 jext->dblock = dblock;
2266 jext->lblock = lblock;
2267 jext->blocks = blocks;
2268 list_add_tail(&jext->list, &jd->extent_list);
2269 jd->nr_extents++;
2270 return 0;
2271}
2272
2273/**
2274 * gfs2_map_journal_extents - Cache journal bmap info
2275 * @sdp: The super block
2276 * @jd: The journal to map
2277 *
2278 * Create a reusable "extent" mapping from all logical
2279 * blocks to all physical blocks for the given journal. This will save
2280 * us time when writing journal blocks. Most journals will have only one
2281 * extent that maps all their logical blocks. That's because gfs2.mkfs
2282 * arranges the journal blocks sequentially to maximize performance.
2283 * So the extent would map the first block for the entire file length.
2284 * However, gfs2_jadd can happen while file activity is happening, so
2285 * those journals may not be sequential. Less likely is the case where
2286 * the users created their own journals by mounting the metafs and
2287 * laying it out. But it's still possible. These journals might have
2288 * several extents.
2289 *
2290 * Returns: 0 on success, or error on failure
2291 */
2292
2293int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd)
2294{
2295 u64 lblock = 0;
2296 u64 lblock_stop;
2297 struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
2298 struct buffer_head bh;
2299 unsigned int shift = sdp->sd_sb.sb_bsize_shift;
2300 u64 size;
2301 int rc;
98583b3e 2302 ktime_t start, end;
b50f227b 2303
98583b3e 2304 start = ktime_get();
b50f227b
SW
2305 lblock_stop = i_size_read(jd->jd_inode) >> shift;
2306 size = (lblock_stop - lblock) << shift;
2307 jd->nr_extents = 0;
2308 WARN_ON(!list_empty(&jd->extent_list));
2309
2310 do {
2311 bh.b_state = 0;
2312 bh.b_blocknr = 0;
2313 bh.b_size = size;
2314 rc = gfs2_block_map(jd->jd_inode, lblock, &bh, 0);
2315 if (rc || !buffer_mapped(&bh))
2316 goto fail;
2317 rc = gfs2_add_jextent(jd, lblock, bh.b_blocknr, bh.b_size >> shift);
2318 if (rc)
2319 goto fail;
2320 size -= bh.b_size;
2321 lblock += (bh.b_size >> ip->i_inode.i_blkbits);
2322 } while(size > 0);
2323
98583b3e
AD
2324 end = ktime_get();
2325 fs_info(sdp, "journal %d mapped with %u extents in %lldms\n", jd->jd_jid,
2326 jd->nr_extents, ktime_ms_delta(end, start));
b50f227b
SW
2327 return 0;
2328
2329fail:
2330 fs_warn(sdp, "error %d mapping journal %u at offset %llu (extent %u)\n",
2331 rc, jd->jd_jid,
2332 (unsigned long long)(i_size_read(jd->jd_inode) - size),
2333 jd->nr_extents);
2334 fs_warn(sdp, "bmap=%d lblock=%llu block=%llu, state=0x%08lx, size=%llu\n",
2335 rc, (unsigned long long)lblock, (unsigned long long)bh.b_blocknr,
2336 bh.b_state, (unsigned long long)bh.b_size);
2337 gfs2_free_journal_extents(jd);
2338 return rc;
2339}
2340
b3b94faa
DT
2341/**
2342 * gfs2_write_alloc_required - figure out if a write will require an allocation
2343 * @ip: the file being written to
2344 * @offset: the offset to write to
2345 * @len: the number of bytes being written
b3b94faa 2346 *
461cb419 2347 * Returns: 1 if an alloc is required, 0 otherwise
b3b94faa
DT
2348 */
2349
cd915493 2350int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
461cb419 2351 unsigned int len)
b3b94faa 2352{
feaa7bba 2353 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
941e6d7d
SW
2354 struct buffer_head bh;
2355 unsigned int shift;
2356 u64 lblock, lblock_stop, size;
7ed122e4 2357 u64 end_of_file;
b3b94faa 2358
b3b94faa
DT
2359 if (!len)
2360 return 0;
2361
2362 if (gfs2_is_stuffed(ip)) {
235628c5 2363 if (offset + len > gfs2_max_stuffed_size(ip))
461cb419 2364 return 1;
b3b94faa
DT
2365 return 0;
2366 }
2367
941e6d7d 2368 shift = sdp->sd_sb.sb_bsize_shift;
7ed122e4 2369 BUG_ON(gfs2_is_dir(ip));
a2e0f799 2370 end_of_file = (i_size_read(&ip->i_inode) + sdp->sd_sb.sb_bsize - 1) >> shift;
7ed122e4
SW
2371 lblock = offset >> shift;
2372 lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
77612578 2373 if (lblock_stop > end_of_file && ip != GFS2_I(sdp->sd_rindex))
461cb419 2374 return 1;
b3b94faa 2375
941e6d7d
SW
2376 size = (lblock_stop - lblock) << shift;
2377 do {
2378 bh.b_state = 0;
2379 bh.b_size = size;
2380 gfs2_block_map(&ip->i_inode, lblock, &bh, 0);
2381 if (!buffer_mapped(&bh))
461cb419 2382 return 1;
941e6d7d
SW
2383 size -= bh.b_size;
2384 lblock += (bh.b_size >> ip->i_inode.i_blkbits);
2385 } while(size > 0);
b3b94faa
DT
2386
2387 return 0;
2388}
2389
4e56a641
AG
2390static int stuffed_zero_range(struct inode *inode, loff_t offset, loff_t length)
2391{
2392 struct gfs2_inode *ip = GFS2_I(inode);
2393 struct buffer_head *dibh;
2394 int error;
2395
2396 if (offset >= inode->i_size)
2397 return 0;
2398 if (offset + length > inode->i_size)
2399 length = inode->i_size - offset;
2400
2401 error = gfs2_meta_inode_buffer(ip, &dibh);
2402 if (error)
2403 return error;
2404 gfs2_trans_add_meta(ip->i_gl, dibh);
2405 memset(dibh->b_data + sizeof(struct gfs2_dinode) + offset, 0,
2406 length);
2407 brelse(dibh);
2408 return 0;
2409}
2410
2411static int gfs2_journaled_truncate_range(struct inode *inode, loff_t offset,
2412 loff_t length)
2413{
2414 struct gfs2_sbd *sdp = GFS2_SB(inode);
2415 loff_t max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
2416 int error;
2417
2418 while (length) {
2419 struct gfs2_trans *tr;
2420 loff_t chunk;
2421 unsigned int offs;
2422
2423 chunk = length;
2424 if (chunk > max_chunk)
2425 chunk = max_chunk;
2426
2427 offs = offset & ~PAGE_MASK;
2428 if (offs && chunk > PAGE_SIZE)
2429 chunk = offs + ((chunk - offs) & PAGE_MASK);
2430
2431 truncate_pagecache_range(inode, offset, chunk);
2432 offset += chunk;
2433 length -= chunk;
2434
2435 tr = current->journal_info;
2436 if (!test_bit(TR_TOUCHED, &tr->tr_flags))
2437 continue;
2438
2439 gfs2_trans_end(sdp);
2440 error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
2441 if (error)
2442 return error;
2443 }
2444 return 0;
2445}
2446
2447int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length)
2448{
2449 struct inode *inode = file_inode(file);
2450 struct gfs2_inode *ip = GFS2_I(inode);
2451 struct gfs2_sbd *sdp = GFS2_SB(inode);
2452 int error;
2453
2454 if (gfs2_is_jdata(ip))
2455 error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_JDATA,
2456 GFS2_JTRUNC_REVOKES);
2457 else
2458 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
2459 if (error)
2460 return error;
2461
2462 if (gfs2_is_stuffed(ip)) {
2463 error = stuffed_zero_range(inode, offset, length);
2464 if (error)
2465 goto out;
2466 } else {
00251a16 2467 unsigned int start_off, end_len, blocksize;
4e56a641
AG
2468
2469 blocksize = i_blocksize(inode);
2470 start_off = offset & (blocksize - 1);
00251a16 2471 end_len = (offset + length) & (blocksize - 1);
4e56a641
AG
2472 if (start_off) {
2473 unsigned int len = length;
2474 if (length > blocksize - start_off)
2475 len = blocksize - start_off;
2476 error = gfs2_block_zero_range(inode, offset, len);
2477 if (error)
2478 goto out;
2479 if (start_off + length < blocksize)
00251a16 2480 end_len = 0;
4e56a641 2481 }
00251a16 2482 if (end_len) {
4e56a641 2483 error = gfs2_block_zero_range(inode,
00251a16 2484 offset + length - end_len, end_len);
4e56a641
AG
2485 if (error)
2486 goto out;
2487 }
2488 }
2489
2490 if (gfs2_is_jdata(ip)) {
2491 BUG_ON(!current->journal_info);
2492 gfs2_journaled_truncate_range(inode, offset, length);
2493 } else
2494 truncate_pagecache_range(inode, offset, offset + length - 1);
2495
2496 file_update_time(file);
2497 mark_inode_dirty(inode);
2498
2499 if (current->journal_info)
2500 gfs2_trans_end(sdp);
2501
2502 if (!gfs2_is_stuffed(ip))
2503 error = punch_hole(ip, offset, length);
2504
2505out:
2506 if (current->journal_info)
2507 gfs2_trans_end(sdp);
2508 return error;
2509}