]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - fs/gfs2/bmap.c
gfs2: Remove ordered write mode handling from gfs2_trans_add_data
[mirror_ubuntu-jammy-kernel.git] / fs / gfs2 / bmap.c
CommitLineData
b3b94faa
DT
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3a8a9a10 3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
b3b94faa
DT
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
e9fc2aa0 7 * of the GNU General Public License version 2.
b3b94faa
DT
8 */
9
b3b94faa
DT
10#include <linux/spinlock.h>
11#include <linux/completion.h>
12#include <linux/buffer_head.h>
64dd153c 13#include <linux/blkdev.h>
5c676f6d 14#include <linux/gfs2_ondisk.h>
71b86f56 15#include <linux/crc32.h>
3974320c 16#include <linux/iomap.h>
b3b94faa
DT
17
18#include "gfs2.h"
5c676f6d 19#include "incore.h"
b3b94faa
DT
20#include "bmap.h"
21#include "glock.h"
22#include "inode.h"
b3b94faa 23#include "meta_io.h"
b3b94faa
DT
24#include "quota.h"
25#include "rgrp.h"
45138990 26#include "log.h"
4c16c36a 27#include "super.h"
b3b94faa 28#include "trans.h"
18ec7d5c 29#include "dir.h"
5c676f6d 30#include "util.h"
63997775 31#include "trace_gfs2.h"
b3b94faa
DT
32
33/* This doesn't need to be that large as max 64 bit pointers in a 4k
34 * block is 512, so __u16 is fine for that. It saves stack space to
35 * keep it small.
36 */
37struct metapath {
dbac6710 38 struct buffer_head *mp_bh[GFS2_MAX_META_HEIGHT];
b3b94faa 39 __u16 mp_list[GFS2_MAX_META_HEIGHT];
5f8bd444
BP
40 int mp_fheight; /* find_metapath height */
41 int mp_aheight; /* actual height (lookup height) */
b3b94faa
DT
42};
43
f25ef0c1
SW
44/**
45 * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page
46 * @ip: the inode
47 * @dibh: the dinode buffer
48 * @block: the block number that was allocated
ff8f33c8 49 * @page: The (optional) page. This is looked up if @page is NULL
f25ef0c1
SW
50 *
51 * Returns: errno
52 */
53
54static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
cd915493 55 u64 block, struct page *page)
f25ef0c1 56{
f25ef0c1
SW
57 struct inode *inode = &ip->i_inode;
58 struct buffer_head *bh;
59 int release = 0;
60
61 if (!page || page->index) {
220cca2a 62 page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS);
f25ef0c1
SW
63 if (!page)
64 return -ENOMEM;
65 release = 1;
66 }
67
68 if (!PageUptodate(page)) {
69 void *kaddr = kmap(page);
602c89d2
SW
70 u64 dsize = i_size_read(inode);
71
235628c5
AG
72 if (dsize > gfs2_max_stuffed_size(ip))
73 dsize = gfs2_max_stuffed_size(ip);
f25ef0c1 74
602c89d2 75 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
09cbfeaf 76 memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
f25ef0c1
SW
77 kunmap(page);
78
79 SetPageUptodate(page);
80 }
81
82 if (!page_has_buffers(page))
47a9a527
FF
83 create_empty_buffers(page, BIT(inode->i_blkbits),
84 BIT(BH_Uptodate));
f25ef0c1
SW
85
86 bh = page_buffers(page);
87
88 if (!buffer_mapped(bh))
89 map_bh(bh, inode->i_sb, block);
90
91 set_buffer_uptodate(bh);
845802b1 92 if (gfs2_is_jdata(ip))
350a9b0a 93 gfs2_trans_add_data(ip->i_gl, bh);
845802b1
AG
94 else {
95 mark_buffer_dirty(bh);
96 gfs2_ordered_add_inode(ip);
97 }
f25ef0c1
SW
98
99 if (release) {
100 unlock_page(page);
09cbfeaf 101 put_page(page);
f25ef0c1
SW
102 }
103
104 return 0;
105}
106
b3b94faa
DT
107/**
108 * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big
109 * @ip: The GFS2 inode to unstuff
ff8f33c8 110 * @page: The (optional) page. This is looked up if the @page is NULL
b3b94faa
DT
111 *
112 * This routine unstuffs a dinode and returns it to a "normal" state such
113 * that the height can be grown in the traditional way.
114 *
115 * Returns: errno
116 */
117
f25ef0c1 118int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
b3b94faa
DT
119{
120 struct buffer_head *bh, *dibh;
48516ced 121 struct gfs2_dinode *di;
cd915493 122 u64 block = 0;
18ec7d5c 123 int isdir = gfs2_is_dir(ip);
b3b94faa
DT
124 int error;
125
126 down_write(&ip->i_rw_mutex);
127
128 error = gfs2_meta_inode_buffer(ip, &dibh);
129 if (error)
130 goto out;
907b9bce 131
a2e0f799 132 if (i_size_read(&ip->i_inode)) {
b3b94faa
DT
133 /* Get a free block, fill it with the stuffed data,
134 and write it out to disk */
135
b45e41d7 136 unsigned int n = 1;
6e87ed0f 137 error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL);
09010978
SW
138 if (error)
139 goto out_brelse;
18ec7d5c 140 if (isdir) {
5731be53 141 gfs2_trans_add_unrevoke(GFS2_SB(&ip->i_inode), block, 1);
61e085a8 142 error = gfs2_dir_get_new_buffer(ip, block, &bh);
b3b94faa
DT
143 if (error)
144 goto out_brelse;
48516ced 145 gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_meta_header),
b3b94faa
DT
146 dibh, sizeof(struct gfs2_dinode));
147 brelse(bh);
148 } else {
f25ef0c1 149 error = gfs2_unstuffer_page(ip, dibh, block, page);
b3b94faa
DT
150 if (error)
151 goto out_brelse;
152 }
153 }
154
155 /* Set up the pointer to the new block */
156
350a9b0a 157 gfs2_trans_add_meta(ip->i_gl, dibh);
48516ced 158 di = (struct gfs2_dinode *)dibh->b_data;
b3b94faa
DT
159 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
160
a2e0f799 161 if (i_size_read(&ip->i_inode)) {
48516ced 162 *(__be64 *)(di + 1) = cpu_to_be64(block);
77658aad
SW
163 gfs2_add_inode_blocks(&ip->i_inode, 1);
164 di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
b3b94faa
DT
165 }
166
ecc30c79 167 ip->i_height = 1;
48516ced 168 di->di_height = cpu_to_be16(1);
b3b94faa 169
a91ea69f 170out_brelse:
b3b94faa 171 brelse(dibh);
a91ea69f 172out:
b3b94faa 173 up_write(&ip->i_rw_mutex);
b3b94faa
DT
174 return error;
175}
176
b3b94faa
DT
177
178/**
179 * find_metapath - Find path through the metadata tree
9b8c81d1 180 * @sdp: The superblock
b3b94faa 181 * @block: The disk block to look up
07e23d68 182 * @mp: The metapath to return the result in
9b8c81d1 183 * @height: The pre-calculated height of the metadata tree
b3b94faa
DT
184 *
185 * This routine returns a struct metapath structure that defines a path
186 * through the metadata of inode "ip" to get to block "block".
187 *
188 * Example:
189 * Given: "ip" is a height 3 file, "offset" is 101342453, and this is a
190 * filesystem with a blocksize of 4096.
191 *
192 * find_metapath() would return a struct metapath structure set to:
07e23d68 193 * mp_fheight = 3, mp_list[0] = 0, mp_list[1] = 48, and mp_list[2] = 165.
b3b94faa
DT
194 *
195 * That means that in order to get to the block containing the byte at
196 * offset 101342453, we would load the indirect block pointed to by pointer
197 * 0 in the dinode. We would then load the indirect block pointed to by
198 * pointer 48 in that indirect block. We would then load the data block
199 * pointed to by pointer 165 in that indirect block.
200 *
201 * ----------------------------------------
202 * | Dinode | |
203 * | | 4|
204 * | |0 1 2 3 4 5 9|
205 * | | 6|
206 * ----------------------------------------
207 * |
208 * |
209 * V
210 * ----------------------------------------
211 * | Indirect Block |
212 * | 5|
213 * | 4 4 4 4 4 5 5 1|
214 * |0 5 6 7 8 9 0 1 2|
215 * ----------------------------------------
216 * |
217 * |
218 * V
219 * ----------------------------------------
220 * | Indirect Block |
221 * | 1 1 1 1 1 5|
222 * | 6 6 6 6 6 1|
223 * |0 3 4 5 6 7 2|
224 * ----------------------------------------
225 * |
226 * |
227 * V
228 * ----------------------------------------
229 * | Data block containing offset |
230 * | 101342453 |
231 * | |
232 * | |
233 * ----------------------------------------
234 *
235 */
236
9b8c81d1
SW
237static void find_metapath(const struct gfs2_sbd *sdp, u64 block,
238 struct metapath *mp, unsigned int height)
b3b94faa 239{
b3b94faa
DT
240 unsigned int i;
241
5f8bd444 242 mp->mp_fheight = height;
9b8c81d1 243 for (i = height; i--;)
7eabb77e 244 mp->mp_list[i] = do_div(block, sdp->sd_inptrs);
b3b94faa
DT
245}
246
5af4e7a0 247static inline unsigned int metapath_branch_start(const struct metapath *mp)
9b8c81d1 248{
5af4e7a0
BM
249 if (mp->mp_list[0] == 0)
250 return 2;
251 return 1;
9b8c81d1
SW
252}
253
d552a2b9 254/**
20cdc193 255 * metaptr1 - Return the first possible metadata pointer in a metapath buffer
d552a2b9
BP
256 * @height: The metadata height (0 = dinode)
257 * @mp: The metapath
258 */
259static inline __be64 *metaptr1(unsigned int height, const struct metapath *mp)
260{
261 struct buffer_head *bh = mp->mp_bh[height];
262 if (height == 0)
263 return ((__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)));
264 return ((__be64 *)(bh->b_data + sizeof(struct gfs2_meta_header)));
265}
266
b3b94faa
DT
267/**
268 * metapointer - Return pointer to start of metadata in a buffer
b3b94faa
DT
269 * @height: The metadata height (0 = dinode)
270 * @mp: The metapath
271 *
272 * Return a pointer to the block number of the next height of the metadata
273 * tree given a buffer containing the pointer to the current height of the
274 * metadata tree.
275 */
276
9b8c81d1 277static inline __be64 *metapointer(unsigned int height, const struct metapath *mp)
b3b94faa 278{
d552a2b9
BP
279 __be64 *p = metaptr1(height, mp);
280 return p + mp->mp_list[height];
b3b94faa
DT
281}
282
7841b9f0
AG
283static inline const __be64 *metaend(unsigned int height, const struct metapath *mp)
284{
285 const struct buffer_head *bh = mp->mp_bh[height];
286 return (const __be64 *)(bh->b_data + bh->b_size);
287}
288
289static void clone_metapath(struct metapath *clone, struct metapath *mp)
290{
291 unsigned int hgt;
292
293 *clone = *mp;
294 for (hgt = 0; hgt < mp->mp_aheight; hgt++)
295 get_bh(clone->mp_bh[hgt]);
296}
297
5cf26b1e 298static void gfs2_metapath_ra(struct gfs2_glock *gl, __be64 *start, __be64 *end)
b99b98dc 299{
b99b98dc
SW
300 const __be64 *t;
301
5cf26b1e 302 for (t = start; t < end; t++) {
c3ce5aa9
AG
303 struct buffer_head *rabh;
304
b99b98dc
SW
305 if (!*t)
306 continue;
307
308 rabh = gfs2_getbuf(gl, be64_to_cpu(*t), CREATE);
309 if (trylock_buffer(rabh)) {
310 if (!buffer_uptodate(rabh)) {
311 rabh->b_end_io = end_buffer_read_sync;
e477b24b
CL
312 submit_bh(REQ_OP_READ,
313 REQ_RAHEAD | REQ_META | REQ_PRIO,
314 rabh);
b99b98dc
SW
315 continue;
316 }
317 unlock_buffer(rabh);
318 }
319 brelse(rabh);
320 }
321}
322
e8b43fe0
AG
323static int __fillup_metapath(struct gfs2_inode *ip, struct metapath *mp,
324 unsigned int x, unsigned int h)
d552a2b9 325{
e8b43fe0
AG
326 for (; x < h; x++) {
327 __be64 *ptr = metapointer(x, mp);
328 u64 dblock = be64_to_cpu(*ptr);
329 int ret;
d552a2b9 330
e8b43fe0
AG
331 if (!dblock)
332 break;
333 ret = gfs2_meta_indirect_buffer(ip, x + 1, dblock, &mp->mp_bh[x + 1]);
334 if (ret)
335 return ret;
336 }
337 mp->mp_aheight = x + 1;
338 return 0;
d552a2b9
BP
339}
340
b3b94faa 341/**
9b8c81d1
SW
342 * lookup_metapath - Walk the metadata tree to a specific point
343 * @ip: The inode
b3b94faa 344 * @mp: The metapath
b3b94faa 345 *
9b8c81d1
SW
346 * Assumes that the inode's buffer has already been looked up and
347 * hooked onto mp->mp_bh[0] and that the metapath has been initialised
348 * by find_metapath().
349 *
350 * If this function encounters part of the tree which has not been
351 * allocated, it returns the current height of the tree at the point
352 * at which it found the unallocated block. Blocks which are found are
353 * added to the mp->mp_bh[] list.
b3b94faa 354 *
e8b43fe0 355 * Returns: error
b3b94faa
DT
356 */
357
9b8c81d1 358static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp)
11707ea0 359{
e8b43fe0 360 return __fillup_metapath(ip, mp, 0, ip->i_height - 1);
dbac6710
SW
361}
362
d552a2b9
BP
363/**
364 * fillup_metapath - fill up buffers for the metadata path to a specific height
365 * @ip: The inode
366 * @mp: The metapath
367 * @h: The height to which it should be mapped
368 *
369 * Similar to lookup_metapath, but does lookups for a range of heights
370 *
c3ce5aa9 371 * Returns: error or the number of buffers filled
d552a2b9
BP
372 */
373
374static int fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h)
375{
e8b43fe0 376 unsigned int x = 0;
c3ce5aa9 377 int ret;
d552a2b9
BP
378
379 if (h) {
380 /* find the first buffer we need to look up. */
e8b43fe0
AG
381 for (x = h - 1; x > 0; x--) {
382 if (mp->mp_bh[x])
383 break;
d552a2b9
BP
384 }
385 }
c3ce5aa9
AG
386 ret = __fillup_metapath(ip, mp, x, h);
387 if (ret)
388 return ret;
389 return mp->mp_aheight - x - 1;
d552a2b9
BP
390}
391
9b8c81d1 392static inline void release_metapath(struct metapath *mp)
dbac6710
SW
393{
394 int i;
395
9b8c81d1
SW
396 for (i = 0; i < GFS2_MAX_META_HEIGHT; i++) {
397 if (mp->mp_bh[i] == NULL)
398 break;
399 brelse(mp->mp_bh[i]);
400 }
11707ea0
SW
401}
402
30cbf189
SW
403/**
404 * gfs2_extent_length - Returns length of an extent of blocks
405 * @start: Start of the buffer
406 * @len: Length of the buffer in bytes
407 * @ptr: Current position in the buffer
408 * @limit: Max extent length to return (0 = unlimited)
409 * @eob: Set to 1 if we hit "end of block"
410 *
411 * If the first block is zero (unallocated) it will return the number of
412 * unallocated blocks in the extent, otherwise it will return the number
413 * of contiguous blocks in the extent.
414 *
415 * Returns: The length of the extent (minimum of one block)
416 */
417
b650738c 418static inline unsigned int gfs2_extent_length(void *start, unsigned int len, __be64 *ptr, size_t limit, int *eob)
30cbf189
SW
419{
420 const __be64 *end = (start + len);
421 const __be64 *first = ptr;
422 u64 d = be64_to_cpu(*ptr);
423
424 *eob = 0;
425 do {
426 ptr++;
427 if (ptr >= end)
428 break;
429 if (limit && --limit == 0)
430 break;
431 if (d)
432 d++;
433 } while(be64_to_cpu(*ptr) == d);
434 if (ptr >= end)
435 *eob = 1;
436 return (ptr - first);
437}
438
7841b9f0
AG
439typedef const __be64 *(*gfs2_metadata_walker)(
440 struct metapath *mp,
441 const __be64 *start, const __be64 *end,
442 u64 factor, void *data);
443
444#define WALK_STOP ((__be64 *)0)
445#define WALK_NEXT ((__be64 *)1)
446
447static int gfs2_walk_metadata(struct inode *inode, sector_t lblock,
448 u64 len, struct metapath *mp, gfs2_metadata_walker walker,
449 void *data)
450{
451 struct metapath clone;
452 struct gfs2_inode *ip = GFS2_I(inode);
453 struct gfs2_sbd *sdp = GFS2_SB(inode);
454 const __be64 *start, *end, *ptr;
455 u64 factor = 1;
456 unsigned int hgt;
457 int ret = 0;
458
459 for (hgt = ip->i_height - 1; hgt >= mp->mp_aheight; hgt--)
460 factor *= sdp->sd_inptrs;
461
462 for (;;) {
463 u64 step;
464
465 /* Walk indirect block. */
466 start = metapointer(hgt, mp);
467 end = metaend(hgt, mp);
468
469 step = (end - start) * factor;
470 if (step > len)
471 end = start + DIV_ROUND_UP_ULL(len, factor);
472
473 ptr = walker(mp, start, end, factor, data);
474 if (ptr == WALK_STOP)
475 break;
476 if (step >= len)
477 break;
478 len -= step;
479 if (ptr != WALK_NEXT) {
480 BUG_ON(!*ptr);
481 mp->mp_list[hgt] += ptr - start;
482 goto fill_up_metapath;
483 }
484
485lower_metapath:
486 /* Decrease height of metapath. */
487 if (mp != &clone) {
488 clone_metapath(&clone, mp);
489 mp = &clone;
490 }
491 brelse(mp->mp_bh[hgt]);
492 mp->mp_bh[hgt] = NULL;
493 if (!hgt)
494 break;
495 hgt--;
496 factor *= sdp->sd_inptrs;
497
498 /* Advance in metadata tree. */
499 (mp->mp_list[hgt])++;
500 start = metapointer(hgt, mp);
501 end = metaend(hgt, mp);
502 if (start >= end) {
503 mp->mp_list[hgt] = 0;
504 if (!hgt)
505 break;
506 goto lower_metapath;
507 }
508
509fill_up_metapath:
510 /* Increase height of metapath. */
511 if (mp != &clone) {
512 clone_metapath(&clone, mp);
513 mp = &clone;
514 }
515 ret = fillup_metapath(ip, mp, ip->i_height - 1);
516 if (ret < 0)
517 break;
518 hgt += ret;
519 for (; ret; ret--)
520 do_div(factor, sdp->sd_inptrs);
521 mp->mp_aheight = hgt + 1;
522 }
523 if (mp == &clone)
524 release_metapath(mp);
525 return ret;
526}
527
528struct gfs2_hole_walker_args {
529 u64 blocks;
530};
531
532static const __be64 *gfs2_hole_walker(struct metapath *mp,
533 const __be64 *start, const __be64 *end,
534 u64 factor, void *data)
535{
536 struct gfs2_hole_walker_args *args = data;
537 const __be64 *ptr;
538
539 for (ptr = start; ptr < end; ptr++) {
540 if (*ptr) {
541 args->blocks += (ptr - start) * factor;
542 if (mp->mp_aheight == mp->mp_fheight)
543 return WALK_STOP;
544 return ptr; /* increase height */
545 }
546 }
547 args->blocks += (end - start) * factor;
548 return WALK_NEXT;
549}
550
551/**
552 * gfs2_hole_size - figure out the size of a hole
553 * @inode: The inode
554 * @lblock: The logical starting block number
555 * @len: How far to look (in blocks)
556 * @mp: The metapath at lblock
557 * @iomap: The iomap to store the hole size in
558 *
559 * This function modifies @mp.
560 *
561 * Returns: errno on error
562 */
563static int gfs2_hole_size(struct inode *inode, sector_t lblock, u64 len,
564 struct metapath *mp, struct iomap *iomap)
565{
566 struct gfs2_hole_walker_args args = { };
567 int ret = 0;
568
569 ret = gfs2_walk_metadata(inode, lblock, len, mp, gfs2_hole_walker, &args);
570 if (!ret)
571 iomap->length = args.blocks << inode->i_blkbits;
572 return ret;
573}
574
9b8c81d1 575static inline void bmap_lock(struct gfs2_inode *ip, int create)
4cf1ed81 576{
4cf1ed81
SW
577 if (create)
578 down_write(&ip->i_rw_mutex);
579 else
580 down_read(&ip->i_rw_mutex);
581}
582
9b8c81d1 583static inline void bmap_unlock(struct gfs2_inode *ip, int create)
4cf1ed81 584{
4cf1ed81
SW
585 if (create)
586 up_write(&ip->i_rw_mutex);
587 else
588 up_read(&ip->i_rw_mutex);
589}
590
9b8c81d1
SW
591static inline __be64 *gfs2_indirect_init(struct metapath *mp,
592 struct gfs2_glock *gl, unsigned int i,
593 unsigned offset, u64 bn)
594{
595 __be64 *ptr = (__be64 *)(mp->mp_bh[i - 1]->b_data +
596 ((i > 1) ? sizeof(struct gfs2_meta_header) :
597 sizeof(struct gfs2_dinode)));
598 BUG_ON(i < 1);
599 BUG_ON(mp->mp_bh[i] != NULL);
600 mp->mp_bh[i] = gfs2_meta_new(gl, bn);
350a9b0a 601 gfs2_trans_add_meta(gl, mp->mp_bh[i]);
9b8c81d1
SW
602 gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN);
603 gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header));
604 ptr += offset;
605 *ptr = cpu_to_be64(bn);
606 return ptr;
607}
608
609enum alloc_state {
610 ALLOC_DATA = 0,
611 ALLOC_GROW_DEPTH = 1,
612 ALLOC_GROW_HEIGHT = 2,
613 /* ALLOC_UNSTUFF = 3, TBD and rather complicated */
614};
615
616/**
617 * gfs2_bmap_alloc - Build a metadata tree of the requested height
618 * @inode: The GFS2 inode
619 * @lblock: The logical starting block of the extent
620 * @bh_map: This is used to return the mapping details
5f8bd444
BP
621 * @zero_new: True if newly allocated blocks should be zeroed
622 * @mp: The metapath, with proper height information calculated
9b8c81d1 623 * @maxlen: The max number of data blocks to alloc
5f8bd444
BP
624 * @dblock: Pointer to return the resulting new block
625 * @dblks: Pointer to return the number of blocks allocated
9b8c81d1
SW
626 *
627 * In this routine we may have to alloc:
628 * i) Indirect blocks to grow the metadata tree height
629 * ii) Indirect blocks to fill in lower part of the metadata tree
630 * iii) Data blocks
631 *
632 * The function is in two parts. The first part works out the total
633 * number of blocks which we need. The second part does the actual
634 * allocation asking for an extent at a time (if enough contiguous free
635 * blocks are available, there will only be one request per bmap call)
636 * and uses the state machine to initialise the blocks in order.
637 *
638 * Returns: errno on error
639 */
640
3974320c
BP
641static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
642 unsigned flags, struct metapath *mp)
9b8c81d1
SW
643{
644 struct gfs2_inode *ip = GFS2_I(inode);
645 struct gfs2_sbd *sdp = GFS2_SB(inode);
646 struct buffer_head *dibh = mp->mp_bh[0];
5f8bd444 647 u64 bn;
5af4e7a0 648 unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0;
3974320c 649 unsigned dblks = 0;
9b8c81d1 650 unsigned ptrs_per_blk;
5f8bd444 651 const unsigned end_of_metadata = mp->mp_fheight - 1;
9b8c81d1
SW
652 enum alloc_state state;
653 __be64 *ptr;
654 __be64 zero_bn = 0;
3974320c 655 size_t maxlen = iomap->length >> inode->i_blkbits;
9b8c81d1 656
5f8bd444 657 BUG_ON(mp->mp_aheight < 1);
9b8c81d1
SW
658 BUG_ON(dibh == NULL);
659
350a9b0a 660 gfs2_trans_add_meta(ip->i_gl, dibh);
9b8c81d1 661
5f8bd444 662 if (mp->mp_fheight == mp->mp_aheight) {
9b8c81d1 663 struct buffer_head *bh;
3974320c
BP
664 int eob;
665
9b8c81d1
SW
666 /* Bottom indirect block exists, find unalloced extent size */
667 ptr = metapointer(end_of_metadata, mp);
668 bh = mp->mp_bh[end_of_metadata];
3974320c
BP
669 dblks = gfs2_extent_length(bh->b_data, bh->b_size, ptr,
670 maxlen, &eob);
671 BUG_ON(dblks < 1);
9b8c81d1
SW
672 state = ALLOC_DATA;
673 } else {
674 /* Need to allocate indirect blocks */
5f8bd444
BP
675 ptrs_per_blk = mp->mp_fheight > 1 ? sdp->sd_inptrs :
676 sdp->sd_diptrs;
3974320c
BP
677 dblks = min(maxlen, (size_t)(ptrs_per_blk -
678 mp->mp_list[end_of_metadata]));
5f8bd444 679 if (mp->mp_fheight == ip->i_height) {
9b8c81d1 680 /* Writing into existing tree, extend tree down */
5f8bd444 681 iblks = mp->mp_fheight - mp->mp_aheight;
9b8c81d1
SW
682 state = ALLOC_GROW_DEPTH;
683 } else {
684 /* Building up tree height */
685 state = ALLOC_GROW_HEIGHT;
5f8bd444 686 iblks = mp->mp_fheight - ip->i_height;
5af4e7a0 687 branch_start = metapath_branch_start(mp);
5f8bd444 688 iblks += (mp->mp_fheight - branch_start);
9b8c81d1
SW
689 }
690 }
691
692 /* start of the second part of the function (state machine) */
693
3974320c 694 blks = dblks + iblks;
5f8bd444 695 i = mp->mp_aheight;
9b8c81d1 696 do {
09010978 697 int error;
9b8c81d1 698 n = blks - alloced;
6e87ed0f 699 error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
09010978
SW
700 if (error)
701 return error;
9b8c81d1
SW
702 alloced += n;
703 if (state != ALLOC_DATA || gfs2_is_jdata(ip))
704 gfs2_trans_add_unrevoke(sdp, bn, n);
705 switch (state) {
706 /* Growing height of tree */
707 case ALLOC_GROW_HEIGHT:
708 if (i == 1) {
709 ptr = (__be64 *)(dibh->b_data +
710 sizeof(struct gfs2_dinode));
711 zero_bn = *ptr;
712 }
5f8bd444
BP
713 for (; i - 1 < mp->mp_fheight - ip->i_height && n > 0;
714 i++, n--)
9b8c81d1 715 gfs2_indirect_init(mp, ip->i_gl, i, 0, bn++);
5f8bd444 716 if (i - 1 == mp->mp_fheight - ip->i_height) {
9b8c81d1
SW
717 i--;
718 gfs2_buffer_copy_tail(mp->mp_bh[i],
719 sizeof(struct gfs2_meta_header),
720 dibh, sizeof(struct gfs2_dinode));
721 gfs2_buffer_clear_tail(dibh,
722 sizeof(struct gfs2_dinode) +
723 sizeof(__be64));
724 ptr = (__be64 *)(mp->mp_bh[i]->b_data +
725 sizeof(struct gfs2_meta_header));
726 *ptr = zero_bn;
727 state = ALLOC_GROW_DEPTH;
5f8bd444 728 for(i = branch_start; i < mp->mp_fheight; i++) {
9b8c81d1
SW
729 if (mp->mp_bh[i] == NULL)
730 break;
731 brelse(mp->mp_bh[i]);
732 mp->mp_bh[i] = NULL;
733 }
5af4e7a0 734 i = branch_start;
9b8c81d1
SW
735 }
736 if (n == 0)
737 break;
738 /* Branching from existing tree */
739 case ALLOC_GROW_DEPTH:
5f8bd444 740 if (i > 1 && i < mp->mp_fheight)
350a9b0a 741 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[i-1]);
5f8bd444 742 for (; i < mp->mp_fheight && n > 0; i++, n--)
9b8c81d1
SW
743 gfs2_indirect_init(mp, ip->i_gl, i,
744 mp->mp_list[i-1], bn++);
5f8bd444 745 if (i == mp->mp_fheight)
9b8c81d1
SW
746 state = ALLOC_DATA;
747 if (n == 0)
748 break;
749 /* Tree complete, adding data blocks */
750 case ALLOC_DATA:
3974320c 751 BUG_ON(n > dblks);
9b8c81d1 752 BUG_ON(mp->mp_bh[end_of_metadata] == NULL);
350a9b0a 753 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[end_of_metadata]);
3974320c 754 dblks = n;
9b8c81d1 755 ptr = metapointer(end_of_metadata, mp);
3974320c
BP
756 iomap->addr = bn << inode->i_blkbits;
757 iomap->flags |= IOMAP_F_NEW;
9b8c81d1
SW
758 while (n-- > 0)
759 *ptr++ = cpu_to_be64(bn++);
760 break;
761 }
3974320c 762 } while (iomap->addr == IOMAP_NULL_ADDR);
9b8c81d1 763
3974320c 764 iomap->length = (u64)dblks << inode->i_blkbits;
5f8bd444 765 ip->i_height = mp->mp_fheight;
9b8c81d1
SW
766 gfs2_add_inode_blocks(&ip->i_inode, alloced);
767 gfs2_dinode_out(ip, mp->mp_bh[0]->b_data);
9b8c81d1
SW
768 return 0;
769}
770
3974320c
BP
771static void gfs2_stuffed_iomap(struct inode *inode, struct iomap *iomap)
772{
773 struct gfs2_inode *ip = GFS2_I(inode);
774
775 iomap->addr = (ip->i_no_addr << inode->i_blkbits) +
776 sizeof(struct gfs2_dinode);
777 iomap->offset = 0;
778 iomap->length = i_size_read(inode);
779 iomap->type = IOMAP_MAPPED;
780 iomap->flags = IOMAP_F_DATA_INLINE;
781}
782
783/**
784 * gfs2_iomap_begin - Map blocks from an inode to disk blocks
785 * @inode: The inode
786 * @pos: Starting position in bytes
787 * @length: Length to map, in bytes
788 * @flags: iomap flags
789 * @iomap: The iomap structure
790 *
791 * Returns: errno
792 */
793int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
794 unsigned flags, struct iomap *iomap)
b3b94faa 795{
feaa7bba
SW
796 struct gfs2_inode *ip = GFS2_I(inode);
797 struct gfs2_sbd *sdp = GFS2_SB(inode);
3974320c 798 struct metapath mp = { .mp_aheight = 1, };
9b8c81d1 799 __be64 *ptr;
3974320c
BP
800 sector_t lblock;
801 sector_t lend;
49edd5bf 802 int ret = 0;
9b8c81d1
SW
803 int eob;
804 unsigned int len;
805 struct buffer_head *bh;
806 u8 height;
7276b3b0 807
3974320c
BP
808 trace_gfs2_iomap_start(ip, pos, length, flags);
809 if (!length) {
810 ret = -EINVAL;
811 goto out;
812 }
b3b94faa 813
49edd5bf
AG
814 if (gfs2_is_stuffed(ip)) {
815 if (flags & IOMAP_REPORT) {
816 gfs2_stuffed_iomap(inode, iomap);
817 if (pos >= iomap->length)
818 ret = -ENOENT;
819 goto out;
820 }
821 BUG_ON(!(flags & IOMAP_WRITE));
3974320c
BP
822 }
823
824 lblock = pos >> inode->i_blkbits;
825 lend = (pos + length + sdp->sd_sb.sb_bsize - 1) >> inode->i_blkbits;
7841b9f0 826 len = lend - lblock;
3974320c
BP
827
828 iomap->offset = lblock << inode->i_blkbits;
829 iomap->addr = IOMAP_NULL_ADDR;
830 iomap->type = IOMAP_HOLE;
831 iomap->length = (u64)(lend - lblock) << inode->i_blkbits;
832 iomap->flags = IOMAP_F_MERGED;
49edd5bf 833 bmap_lock(ip, flags & IOMAP_WRITE);
20cdc193 834
9b8c81d1
SW
835 ret = gfs2_meta_inode_buffer(ip, &mp.mp_bh[0]);
836 if (ret)
3974320c 837 goto out_release;
b3b94faa 838
9b8c81d1 839 height = ip->i_height;
9a38662b 840 while ((lblock + 1) * sdp->sd_sb.sb_bsize > sdp->sd_heightsize[height])
9b8c81d1
SW
841 height++;
842 find_metapath(sdp, lblock, &mp, height);
9b8c81d1
SW
843 if (height > ip->i_height || gfs2_is_stuffed(ip))
844 goto do_alloc;
3974320c 845
9b8c81d1 846 ret = lookup_metapath(ip, &mp);
e8b43fe0 847 if (ret)
3974320c
BP
848 goto out_release;
849
5f8bd444 850 if (mp.mp_aheight != ip->i_height)
9b8c81d1 851 goto do_alloc;
3974320c 852
9b8c81d1
SW
853 ptr = metapointer(ip->i_height - 1, &mp);
854 if (*ptr == 0)
855 goto do_alloc;
3974320c
BP
856
857 iomap->type = IOMAP_MAPPED;
858 iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits;
859
9b8c81d1 860 bh = mp.mp_bh[ip->i_height - 1];
3974320c 861 len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, lend - lblock, &eob);
9b8c81d1 862 if (eob)
3974320c
BP
863 iomap->flags |= IOMAP_F_BOUNDARY;
864 iomap->length = (u64)len << inode->i_blkbits;
865
3974320c 866out_release:
9b8c81d1 867 release_metapath(&mp);
49edd5bf 868 bmap_unlock(ip, flags & IOMAP_WRITE);
3974320c
BP
869out:
870 trace_gfs2_iomap_end(ip, iomap, ret);
9b8c81d1 871 return ret;
30cbf189 872
9b8c81d1 873do_alloc:
49edd5bf
AG
874 if (flags & IOMAP_WRITE) {
875 ret = gfs2_iomap_alloc(inode, iomap, flags, &mp);
876 } else if (flags & IOMAP_REPORT) {
877 loff_t size = i_size_read(inode);
878 if (pos >= size)
3974320c 879 ret = -ENOENT;
49edd5bf 880 else if (height <= ip->i_height)
7841b9f0 881 ret = gfs2_hole_size(inode, lblock, len, &mp, iomap);
49edd5bf
AG
882 else
883 iomap->length = size - pos;
b3b94faa 884 }
3974320c
BP
885 goto out_release;
886}
887
888/**
d39d18e0 889 * gfs2_block_map - Map one or more blocks of an inode to a disk block
3974320c
BP
890 * @inode: The inode
891 * @lblock: The logical block number
892 * @bh_map: The bh to be mapped
893 * @create: True if its ok to alloc blocks to satify the request
894 *
d39d18e0
AG
895 * The size of the requested mapping is defined in bh_map->b_size.
896 *
897 * Clears buffer_mapped(bh_map) and leaves bh_map->b_size unchanged
898 * when @lblock is not mapped. Sets buffer_mapped(bh_map) and
899 * bh_map->b_size to indicate the size of the mapping when @lblock and
900 * successive blocks are mapped, up to the requested size.
901 *
902 * Sets buffer_boundary() if a read of metadata will be required
903 * before the next block can be mapped. Sets buffer_new() if new
904 * blocks were allocated.
3974320c
BP
905 *
906 * Returns: errno
907 */
908
909int gfs2_block_map(struct inode *inode, sector_t lblock,
910 struct buffer_head *bh_map, int create)
911{
912 struct gfs2_inode *ip = GFS2_I(inode);
913 struct iomap iomap;
914 int ret, flags = 0;
915
916 clear_buffer_mapped(bh_map);
917 clear_buffer_new(bh_map);
918 clear_buffer_boundary(bh_map);
919 trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
920
921 if (create)
922 flags |= IOMAP_WRITE;
3974320c
BP
923 ret = gfs2_iomap_begin(inode, (loff_t)lblock << inode->i_blkbits,
924 bh_map->b_size, flags, &iomap);
925 if (ret) {
926 if (!create && ret == -ENOENT) {
927 /* Return unmapped buffer beyond the end of file. */
928 ret = 0;
929 }
930 goto out;
931 }
932
933 if (iomap.length > bh_map->b_size) {
934 iomap.length = bh_map->b_size;
935 iomap.flags &= ~IOMAP_F_BOUNDARY;
5f8bd444 936 }
3974320c
BP
937 if (iomap.addr != IOMAP_NULL_ADDR)
938 map_bh(bh_map, inode->i_sb, iomap.addr >> inode->i_blkbits);
939 bh_map->b_size = iomap.length;
940 if (iomap.flags & IOMAP_F_BOUNDARY)
941 set_buffer_boundary(bh_map);
942 if (iomap.flags & IOMAP_F_NEW)
943 set_buffer_new(bh_map);
944
945out:
946 trace_gfs2_bmap(ip, bh_map, lblock, create, ret);
947 return ret;
fd88de56
SW
948}
949
941e6d7d
SW
950/*
951 * Deprecated: do not use in new code
952 */
fd88de56
SW
953int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen)
954{
23591256 955 struct buffer_head bh = { .b_state = 0, .b_blocknr = 0 };
7a6bbacb 956 int ret;
fd88de56
SW
957 int create = *new;
958
959 BUG_ON(!extlen);
960 BUG_ON(!dblock);
961 BUG_ON(!new);
962
47a9a527 963 bh.b_size = BIT(inode->i_blkbits + (create ? 0 : 5));
e9e1ef2b 964 ret = gfs2_block_map(inode, lblock, &bh, create);
7a6bbacb
SW
965 *extlen = bh.b_size >> inode->i_blkbits;
966 *dblock = bh.b_blocknr;
967 if (buffer_new(&bh))
968 *new = 1;
969 else
970 *new = 0;
971 return ret;
b3b94faa
DT
972}
973
ba7f7290 974/**
bdba0d5e 975 * gfs2_block_zero_range - Deal with zeroing out data
ba7f7290
SW
976 *
977 * This is partly borrowed from ext3.
978 */
bdba0d5e
AG
979static int gfs2_block_zero_range(struct inode *inode, loff_t from,
980 unsigned int length)
ba7f7290 981{
bdba0d5e 982 struct address_space *mapping = inode->i_mapping;
ba7f7290 983 struct gfs2_inode *ip = GFS2_I(inode);
09cbfeaf
KS
984 unsigned long index = from >> PAGE_SHIFT;
985 unsigned offset = from & (PAGE_SIZE-1);
bdba0d5e 986 unsigned blocksize, iblock, pos;
ba7f7290
SW
987 struct buffer_head *bh;
988 struct page *page;
ba7f7290
SW
989 int err;
990
220cca2a 991 page = find_or_create_page(mapping, index, GFP_NOFS);
ba7f7290
SW
992 if (!page)
993 return 0;
994
995 blocksize = inode->i_sb->s_blocksize;
09cbfeaf 996 iblock = index << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits);
ba7f7290
SW
997
998 if (!page_has_buffers(page))
999 create_empty_buffers(page, blocksize, 0);
1000
1001 /* Find the buffer that contains "offset" */
1002 bh = page_buffers(page);
1003 pos = blocksize;
1004 while (offset >= pos) {
1005 bh = bh->b_this_page;
1006 iblock++;
1007 pos += blocksize;
1008 }
1009
1010 err = 0;
1011
1012 if (!buffer_mapped(bh)) {
e9e1ef2b 1013 gfs2_block_map(inode, iblock, bh, 0);
ba7f7290
SW
1014 /* unmapped? It's a hole - nothing to do */
1015 if (!buffer_mapped(bh))
1016 goto unlock;
1017 }
1018
1019 /* Ok, it's mapped. Make sure it's up-to-date */
1020 if (PageUptodate(page))
1021 set_buffer_uptodate(bh);
1022
1023 if (!buffer_uptodate(bh)) {
1024 err = -EIO;
dfec8a14 1025 ll_rw_block(REQ_OP_READ, 0, 1, &bh);
ba7f7290
SW
1026 wait_on_buffer(bh);
1027 /* Uhhuh. Read error. Complain and punt. */
1028 if (!buffer_uptodate(bh))
1029 goto unlock;
1875f2f3 1030 err = 0;
ba7f7290
SW
1031 }
1032
845802b1 1033 if (gfs2_is_jdata(ip))
350a9b0a 1034 gfs2_trans_add_data(ip->i_gl, bh);
845802b1
AG
1035 else
1036 gfs2_ordered_add_inode(ip);
ba7f7290 1037
eebd2aa3 1038 zero_user(page, offset, length);
40bc9a27 1039 mark_buffer_dirty(bh);
ba7f7290
SW
1040unlock:
1041 unlock_page(page);
09cbfeaf 1042 put_page(page);
ba7f7290
SW
1043 return err;
1044}
1045
c62baf65
FF
1046#define GFS2_JTRUNC_REVOKES 8192
1047
fa731fc4
SW
1048/**
1049 * gfs2_journaled_truncate - Wrapper for truncate_pagecache for jdata files
1050 * @inode: The inode being truncated
1051 * @oldsize: The original (larger) size
1052 * @newsize: The new smaller size
1053 *
1054 * With jdata files, we have to journal a revoke for each block which is
1055 * truncated. As a result, we need to split this into separate transactions
1056 * if the number of pages being truncated gets too large.
1057 */
1058
fa731fc4
SW
1059static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize)
1060{
1061 struct gfs2_sbd *sdp = GFS2_SB(inode);
1062 u64 max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
1063 u64 chunk;
1064 int error;
1065
1066 while (oldsize != newsize) {
e7fdf004
AG
1067 struct gfs2_trans *tr;
1068 unsigned int offs;
1069
fa731fc4
SW
1070 chunk = oldsize - newsize;
1071 if (chunk > max_chunk)
1072 chunk = max_chunk;
e7fdf004
AG
1073
1074 offs = oldsize & ~PAGE_MASK;
1075 if (offs && chunk > PAGE_SIZE)
1076 chunk = offs + ((chunk - offs) & PAGE_MASK);
1077
7caef267 1078 truncate_pagecache(inode, oldsize - chunk);
fa731fc4 1079 oldsize -= chunk;
e7fdf004
AG
1080
1081 tr = current->journal_info;
1082 if (!test_bit(TR_TOUCHED, &tr->tr_flags))
1083 continue;
1084
fa731fc4
SW
1085 gfs2_trans_end(sdp);
1086 error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
1087 if (error)
1088 return error;
1089 }
1090
1091 return 0;
1092}
1093
8b5860a3 1094static int trunc_start(struct inode *inode, u64 newsize)
b3b94faa 1095{
ff8f33c8
SW
1096 struct gfs2_inode *ip = GFS2_I(inode);
1097 struct gfs2_sbd *sdp = GFS2_SB(inode);
80990f40 1098 struct buffer_head *dibh = NULL;
b3b94faa 1099 int journaled = gfs2_is_jdata(ip);
8b5860a3 1100 u64 oldsize = inode->i_size;
b3b94faa
DT
1101 int error;
1102
fa731fc4
SW
1103 if (journaled)
1104 error = gfs2_trans_begin(sdp, RES_DINODE + RES_JDATA, GFS2_JTRUNC_REVOKES);
1105 else
1106 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
b3b94faa
DT
1107 if (error)
1108 return error;
1109
1110 error = gfs2_meta_inode_buffer(ip, &dibh);
1111 if (error)
1112 goto out;
1113
350a9b0a 1114 gfs2_trans_add_meta(ip->i_gl, dibh);
ff8f33c8 1115
b3b94faa 1116 if (gfs2_is_stuffed(ip)) {
ff8f33c8 1117 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize);
b3b94faa 1118 } else {
bdba0d5e
AG
1119 unsigned int blocksize = i_blocksize(inode);
1120 unsigned int offs = newsize & (blocksize - 1);
1121 if (offs) {
1122 error = gfs2_block_zero_range(inode, newsize,
1123 blocksize - offs);
ff8f33c8 1124 if (error)
80990f40 1125 goto out;
b3b94faa 1126 }
ff8f33c8 1127 ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG;
b3b94faa
DT
1128 }
1129
ff8f33c8 1130 i_size_write(inode, newsize);
078cd827 1131 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
ff8f33c8 1132 gfs2_dinode_out(ip, dibh->b_data);
b3b94faa 1133
fa731fc4
SW
1134 if (journaled)
1135 error = gfs2_journaled_truncate(inode, oldsize, newsize);
1136 else
7caef267 1137 truncate_pagecache(inode, newsize);
fa731fc4 1138
a91ea69f 1139out:
80990f40
AG
1140 brelse(dibh);
1141 if (current->journal_info)
1142 gfs2_trans_end(sdp);
b3b94faa
DT
1143 return error;
1144}
1145
d552a2b9
BP
1146/**
1147 * sweep_bh_for_rgrps - find an rgrp in a meta buffer and free blocks therein
1148 * @ip: inode
1149 * @rg_gh: holder of resource group glock
5cf26b1e
AG
1150 * @bh: buffer head to sweep
1151 * @start: starting point in bh
1152 * @end: end point in bh
1153 * @meta: true if bh points to metadata (rather than data)
d552a2b9 1154 * @btotal: place to keep count of total blocks freed
d552a2b9
BP
1155 *
1156 * We sweep a metadata buffer (provided by the metapath) for blocks we need to
1157 * free, and free them all. However, we do it one rgrp at a time. If this
1158 * block has references to multiple rgrps, we break it into individual
1159 * transactions. This allows other processes to use the rgrps while we're
1160 * focused on a single one, for better concurrency / performance.
1161 * At every transaction boundary, we rewrite the inode into the journal.
1162 * That way the bitmaps are kept consistent with the inode and we can recover
1163 * if we're interrupted by power-outages.
1164 *
1165 * Returns: 0, or return code if an error occurred.
1166 * *btotal has the total number of blocks freed
1167 */
1168static int sweep_bh_for_rgrps(struct gfs2_inode *ip, struct gfs2_holder *rd_gh,
5cf26b1e
AG
1169 struct buffer_head *bh, __be64 *start, __be64 *end,
1170 bool meta, u32 *btotal)
b3b94faa 1171{
9b8c81d1 1172 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
d552a2b9
BP
1173 struct gfs2_rgrpd *rgd;
1174 struct gfs2_trans *tr;
5cf26b1e 1175 __be64 *p;
d552a2b9
BP
1176 int blks_outside_rgrp;
1177 u64 bn, bstart, isize_blks;
1178 s64 blen; /* needs to be s64 or gfs2_add_inode_blocks breaks */
d552a2b9
BP
1179 int ret = 0;
1180 bool buf_in_tr = false; /* buffer was added to transaction */
1181
d552a2b9 1182more_rgrps:
5cf26b1e
AG
1183 rgd = NULL;
1184 if (gfs2_holder_initialized(rd_gh)) {
1185 rgd = gfs2_glock2rgrp(rd_gh->gh_gl);
1186 gfs2_assert_withdraw(sdp,
1187 gfs2_glock_is_locked_by_me(rd_gh->gh_gl));
1188 }
d552a2b9
BP
1189 blks_outside_rgrp = 0;
1190 bstart = 0;
1191 blen = 0;
d552a2b9 1192
5cf26b1e 1193 for (p = start; p < end; p++) {
d552a2b9
BP
1194 if (!*p)
1195 continue;
1196 bn = be64_to_cpu(*p);
5cf26b1e
AG
1197
1198 if (rgd) {
1199 if (!rgrp_contains_block(rgd, bn)) {
1200 blks_outside_rgrp++;
1201 continue;
1202 }
d552a2b9 1203 } else {
90bcab99 1204 rgd = gfs2_blk2rgrpd(sdp, bn, true);
5cf26b1e
AG
1205 if (unlikely(!rgd)) {
1206 ret = -EIO;
1207 goto out;
1208 }
d552a2b9
BP
1209 ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
1210 0, rd_gh);
1211 if (ret)
1212 goto out;
1213
1214 /* Must be done with the rgrp glock held: */
1215 if (gfs2_rs_active(&ip->i_res) &&
1216 rgd == ip->i_res.rs_rbm.rgd)
1217 gfs2_rs_deltree(&ip->i_res);
1218 }
1219
d552a2b9
BP
1220 /* The size of our transactions will be unknown until we
1221 actually process all the metadata blocks that relate to
1222 the rgrp. So we estimate. We know it can't be more than
1223 the dinode's i_blocks and we don't want to exceed the
1224 journal flush threshold, sd_log_thresh2. */
1225 if (current->journal_info == NULL) {
1226 unsigned int jblocks_rqsted, revokes;
1227
1228 jblocks_rqsted = rgd->rd_length + RES_DINODE +
1229 RES_INDIRECT;
1230 isize_blks = gfs2_get_inode_blocks(&ip->i_inode);
1231 if (isize_blks > atomic_read(&sdp->sd_log_thresh2))
1232 jblocks_rqsted +=
1233 atomic_read(&sdp->sd_log_thresh2);
1234 else
1235 jblocks_rqsted += isize_blks;
1236 revokes = jblocks_rqsted;
1237 if (meta)
5cf26b1e 1238 revokes += end - start;
d552a2b9
BP
1239 else if (ip->i_depth)
1240 revokes += sdp->sd_inptrs;
1241 ret = gfs2_trans_begin(sdp, jblocks_rqsted, revokes);
1242 if (ret)
1243 goto out_unlock;
1244 down_write(&ip->i_rw_mutex);
1245 }
1246 /* check if we will exceed the transaction blocks requested */
1247 tr = current->journal_info;
1248 if (tr->tr_num_buf_new + RES_STATFS +
1249 RES_QUOTA >= atomic_read(&sdp->sd_log_thresh2)) {
1250 /* We set blks_outside_rgrp to ensure the loop will
1251 be repeated for the same rgrp, but with a new
1252 transaction. */
1253 blks_outside_rgrp++;
1254 /* This next part is tricky. If the buffer was added
1255 to the transaction, we've already set some block
1256 pointers to 0, so we better follow through and free
1257 them, or we will introduce corruption (so break).
1258 This may be impossible, or at least rare, but I
1259 decided to cover the case regardless.
1260
1261 If the buffer was not added to the transaction
1262 (this call), doing so would exceed our transaction
1263 size, so we need to end the transaction and start a
1264 new one (so goto). */
1265
1266 if (buf_in_tr)
1267 break;
1268 goto out_unlock;
1269 }
1270
1271 gfs2_trans_add_meta(ip->i_gl, bh);
1272 buf_in_tr = true;
1273 *p = 0;
1274 if (bstart + blen == bn) {
1275 blen++;
1276 continue;
1277 }
1278 if (bstart) {
1279 __gfs2_free_blocks(ip, bstart, (u32)blen, meta);
1280 (*btotal) += blen;
1281 gfs2_add_inode_blocks(&ip->i_inode, -blen);
1282 }
1283 bstart = bn;
1284 blen = 1;
1285 }
1286 if (bstart) {
1287 __gfs2_free_blocks(ip, bstart, (u32)blen, meta);
1288 (*btotal) += blen;
1289 gfs2_add_inode_blocks(&ip->i_inode, -blen);
1290 }
1291out_unlock:
1292 if (!ret && blks_outside_rgrp) { /* If buffer still has non-zero blocks
1293 outside the rgrp we just processed,
1294 do it all over again. */
1295 if (current->journal_info) {
5cf26b1e
AG
1296 struct buffer_head *dibh;
1297
1298 ret = gfs2_meta_inode_buffer(ip, &dibh);
1299 if (ret)
1300 goto out;
d552a2b9
BP
1301
1302 /* Every transaction boundary, we rewrite the dinode
1303 to keep its di_blocks current in case of failure. */
1304 ip->i_inode.i_mtime = ip->i_inode.i_ctime =
b32c8c76 1305 current_time(&ip->i_inode);
d552a2b9
BP
1306 gfs2_trans_add_meta(ip->i_gl, dibh);
1307 gfs2_dinode_out(ip, dibh->b_data);
5cf26b1e 1308 brelse(dibh);
d552a2b9
BP
1309 up_write(&ip->i_rw_mutex);
1310 gfs2_trans_end(sdp);
1311 }
1312 gfs2_glock_dq_uninit(rd_gh);
1313 cond_resched();
1314 goto more_rgrps;
1315 }
1316out:
1317 return ret;
1318}
1319
10d2cf94
AG
1320static bool mp_eq_to_hgt(struct metapath *mp, __u16 *list, unsigned int h)
1321{
1322 if (memcmp(mp->mp_list, list, h * sizeof(mp->mp_list[0])))
1323 return false;
1324 return true;
1325}
1326
d552a2b9
BP
1327/**
1328 * find_nonnull_ptr - find a non-null pointer given a metapath and height
d552a2b9
BP
1329 * @mp: starting metapath
1330 * @h: desired height to search
1331 *
10d2cf94 1332 * Assumes the metapath is valid (with buffers) out to height h.
d552a2b9
BP
1333 * Returns: true if a non-null pointer was found in the metapath buffer
1334 * false if all remaining pointers are NULL in the buffer
1335 */
1336static bool find_nonnull_ptr(struct gfs2_sbd *sdp, struct metapath *mp,
10d2cf94
AG
1337 unsigned int h,
1338 __u16 *end_list, unsigned int end_aligned)
d552a2b9 1339{
10d2cf94
AG
1340 struct buffer_head *bh = mp->mp_bh[h];
1341 __be64 *first, *ptr, *end;
1342
1343 first = metaptr1(h, mp);
1344 ptr = first + mp->mp_list[h];
1345 end = (__be64 *)(bh->b_data + bh->b_size);
1346 if (end_list && mp_eq_to_hgt(mp, end_list, h)) {
1347 bool keep_end = h < end_aligned;
1348 end = first + end_list[h] + keep_end;
1349 }
d552a2b9 1350
10d2cf94 1351 while (ptr < end) {
c4a9d189 1352 if (*ptr) { /* if we have a non-null pointer */
10d2cf94 1353 mp->mp_list[h] = ptr - first;
c4a9d189
BP
1354 h++;
1355 if (h < GFS2_MAX_META_HEIGHT)
10d2cf94 1356 mp->mp_list[h] = 0;
d552a2b9 1357 return true;
c4a9d189 1358 }
10d2cf94 1359 ptr++;
d552a2b9 1360 }
10d2cf94 1361 return false;
d552a2b9
BP
1362}
1363
1364enum dealloc_states {
1365 DEALLOC_MP_FULL = 0, /* Strip a metapath with all buffers read in */
1366 DEALLOC_MP_LOWER = 1, /* lower the metapath strip height */
1367 DEALLOC_FILL_MP = 2, /* Fill in the metapath to the given height. */
1368 DEALLOC_DONE = 3, /* process complete */
1369};
b3b94faa 1370
5cf26b1e
AG
1371static inline void
1372metapointer_range(struct metapath *mp, int height,
1373 __u16 *start_list, unsigned int start_aligned,
10d2cf94 1374 __u16 *end_list, unsigned int end_aligned,
5cf26b1e
AG
1375 __be64 **start, __be64 **end)
1376{
1377 struct buffer_head *bh = mp->mp_bh[height];
1378 __be64 *first;
1379
1380 first = metaptr1(height, mp);
1381 *start = first;
1382 if (mp_eq_to_hgt(mp, start_list, height)) {
1383 bool keep_start = height < start_aligned;
1384 *start = first + start_list[height] + keep_start;
1385 }
1386 *end = (__be64 *)(bh->b_data + bh->b_size);
10d2cf94
AG
1387 if (end_list && mp_eq_to_hgt(mp, end_list, height)) {
1388 bool keep_end = height < end_aligned;
1389 *end = first + end_list[height] + keep_end;
1390 }
1391}
1392
1393static inline bool walk_done(struct gfs2_sbd *sdp,
1394 struct metapath *mp, int height,
1395 __u16 *end_list, unsigned int end_aligned)
1396{
1397 __u16 end;
1398
1399 if (end_list) {
1400 bool keep_end = height < end_aligned;
1401 if (!mp_eq_to_hgt(mp, end_list, height))
1402 return false;
1403 end = end_list[height] + keep_end;
1404 } else
1405 end = (height > 0) ? sdp->sd_inptrs : sdp->sd_diptrs;
1406 return mp->mp_list[height] >= end;
5cf26b1e
AG
1407}
1408
d552a2b9 1409/**
10d2cf94 1410 * punch_hole - deallocate blocks in a file
d552a2b9 1411 * @ip: inode to truncate
10d2cf94
AG
1412 * @offset: the start of the hole
1413 * @length: the size of the hole (or 0 for truncate)
1414 *
1415 * Punch a hole into a file or truncate a file at a given position. This
1416 * function operates in whole blocks (@offset and @length are rounded
1417 * accordingly); partially filled blocks must be cleared otherwise.
d552a2b9 1418 *
10d2cf94
AG
1419 * This function works from the bottom up, and from the right to the left. In
1420 * other words, it strips off the highest layer (data) before stripping any of
1421 * the metadata. Doing it this way is best in case the operation is interrupted
1422 * by power failure, etc. The dinode is rewritten in every transaction to
1423 * guarantee integrity.
d552a2b9 1424 */
10d2cf94 1425static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length)
d552a2b9
BP
1426{
1427 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
bb491ce6 1428 u64 maxsize = sdp->sd_heightsize[ip->i_height];
10d2cf94 1429 struct metapath mp = {};
d552a2b9
BP
1430 struct buffer_head *dibh, *bh;
1431 struct gfs2_holder rd_gh;
cb7f0903 1432 unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift;
10d2cf94
AG
1433 u64 lblock = (offset + (1 << bsize_shift) - 1) >> bsize_shift;
1434 __u16 start_list[GFS2_MAX_META_HEIGHT];
1435 __u16 __end_list[GFS2_MAX_META_HEIGHT], *end_list = NULL;
4e56a641 1436 unsigned int start_aligned, uninitialized_var(end_aligned);
d552a2b9
BP
1437 unsigned int strip_h = ip->i_height - 1;
1438 u32 btotal = 0;
1439 int ret, state;
1440 int mp_h; /* metapath buffers are read in to this height */
d552a2b9 1441 u64 prev_bnr = 0;
5cf26b1e 1442 __be64 *start, *end;
b3b94faa 1443
bb491ce6
AG
1444 if (offset >= maxsize) {
1445 /*
1446 * The starting point lies beyond the allocated meta-data;
1447 * there are no blocks do deallocate.
1448 */
1449 return 0;
1450 }
1451
10d2cf94
AG
1452 /*
1453 * The start position of the hole is defined by lblock, start_list, and
1454 * start_aligned. The end position of the hole is defined by lend,
1455 * end_list, and end_aligned.
1456 *
1457 * start_aligned and end_aligned define down to which height the start
1458 * and end positions are aligned to the metadata tree (i.e., the
1459 * position is a multiple of the metadata granularity at the height
1460 * above). This determines at which heights additional meta pointers
1461 * needs to be preserved for the remaining data.
1462 */
b3b94faa 1463
10d2cf94 1464 if (length) {
10d2cf94
AG
1465 u64 end_offset = offset + length;
1466 u64 lend;
1467
1468 /*
1469 * Clip the end at the maximum file size for the given height:
1470 * that's how far the metadata goes; files bigger than that
1471 * will have additional layers of indirection.
1472 */
1473 if (end_offset > maxsize)
1474 end_offset = maxsize;
1475 lend = end_offset >> bsize_shift;
1476
1477 if (lblock >= lend)
1478 return 0;
1479
1480 find_metapath(sdp, lend, &mp, ip->i_height);
1481 end_list = __end_list;
1482 memcpy(end_list, mp.mp_list, sizeof(mp.mp_list));
1483
1484 for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) {
1485 if (end_list[mp_h])
1486 break;
1487 }
1488 end_aligned = mp_h;
1489 }
1490
1491 find_metapath(sdp, lblock, &mp, ip->i_height);
cb7f0903
AG
1492 memcpy(start_list, mp.mp_list, sizeof(start_list));
1493
cb7f0903
AG
1494 for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) {
1495 if (start_list[mp_h])
1496 break;
1497 }
1498 start_aligned = mp_h;
d552a2b9
BP
1499
1500 ret = gfs2_meta_inode_buffer(ip, &dibh);
1501 if (ret)
1502 return ret;
b3b94faa 1503
d552a2b9
BP
1504 mp.mp_bh[0] = dibh;
1505 ret = lookup_metapath(ip, &mp);
e8b43fe0
AG
1506 if (ret)
1507 goto out_metapath;
c3ce5aa9
AG
1508
1509 /* issue read-ahead on metadata */
5cf26b1e
AG
1510 for (mp_h = 0; mp_h < mp.mp_aheight - 1; mp_h++) {
1511 metapointer_range(&mp, mp_h, start_list, start_aligned,
10d2cf94 1512 end_list, end_aligned, &start, &end);
5cf26b1e
AG
1513 gfs2_metapath_ra(ip->i_gl, start, end);
1514 }
c3ce5aa9 1515
e8b43fe0 1516 if (mp.mp_aheight == ip->i_height)
d552a2b9
BP
1517 state = DEALLOC_MP_FULL; /* We have a complete metapath */
1518 else
1519 state = DEALLOC_FILL_MP; /* deal with partial metapath */
b3b94faa 1520
d552a2b9
BP
1521 ret = gfs2_rindex_update(sdp);
1522 if (ret)
1523 goto out_metapath;
1524
1525 ret = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
1526 if (ret)
1527 goto out_metapath;
1528 gfs2_holder_mark_uninitialized(&rd_gh);
1529
1530 mp_h = strip_h;
1531
1532 while (state != DEALLOC_DONE) {
1533 switch (state) {
1534 /* Truncate a full metapath at the given strip height.
1535 * Note that strip_h == mp_h in order to be in this state. */
1536 case DEALLOC_MP_FULL:
d552a2b9
BP
1537 bh = mp.mp_bh[mp_h];
1538 gfs2_assert_withdraw(sdp, bh);
1539 if (gfs2_assert_withdraw(sdp,
1540 prev_bnr != bh->b_blocknr)) {
1541 printk(KERN_EMERG "GFS2: fsid=%s:inode %llu, "
1542 "block:%llu, i_h:%u, s_h:%u, mp_h:%u\n",
1543 sdp->sd_fsname,
1544 (unsigned long long)ip->i_no_addr,
1545 prev_bnr, ip->i_height, strip_h, mp_h);
1546 }
1547 prev_bnr = bh->b_blocknr;
cb7f0903 1548
5cf26b1e
AG
1549 if (gfs2_metatype_check(sdp, bh,
1550 (mp_h ? GFS2_METATYPE_IN :
1551 GFS2_METATYPE_DI))) {
1552 ret = -EIO;
1553 goto out;
1554 }
1555
10d2cf94
AG
1556 /*
1557 * Below, passing end_aligned as 0 gives us the
1558 * metapointer range excluding the end point: the end
1559 * point is the first metapath we must not deallocate!
1560 */
1561
5cf26b1e 1562 metapointer_range(&mp, mp_h, start_list, start_aligned,
10d2cf94 1563 end_list, 0 /* end_aligned */,
5cf26b1e
AG
1564 &start, &end);
1565 ret = sweep_bh_for_rgrps(ip, &rd_gh, mp.mp_bh[mp_h],
1566 start, end,
1567 mp_h != ip->i_height - 1,
1568 &btotal);
cb7f0903 1569
d552a2b9
BP
1570 /* If we hit an error or just swept dinode buffer,
1571 just exit. */
1572 if (ret || !mp_h) {
1573 state = DEALLOC_DONE;
1574 break;
1575 }
1576 state = DEALLOC_MP_LOWER;
1577 break;
1578
1579 /* lower the metapath strip height */
1580 case DEALLOC_MP_LOWER:
1581 /* We're done with the current buffer, so release it,
1582 unless it's the dinode buffer. Then back up to the
1583 previous pointer. */
1584 if (mp_h) {
1585 brelse(mp.mp_bh[mp_h]);
1586 mp.mp_bh[mp_h] = NULL;
1587 }
1588 /* If we can't get any lower in height, we've stripped
1589 off all we can. Next step is to back up and start
1590 stripping the previous level of metadata. */
1591 if (mp_h == 0) {
1592 strip_h--;
cb7f0903 1593 memcpy(mp.mp_list, start_list, sizeof(start_list));
d552a2b9
BP
1594 mp_h = strip_h;
1595 state = DEALLOC_FILL_MP;
1596 break;
1597 }
1598 mp.mp_list[mp_h] = 0;
1599 mp_h--; /* search one metadata height down */
d552a2b9 1600 mp.mp_list[mp_h]++;
10d2cf94
AG
1601 if (walk_done(sdp, &mp, mp_h, end_list, end_aligned))
1602 break;
d552a2b9
BP
1603 /* Here we've found a part of the metapath that is not
1604 * allocated. We need to search at that height for the
1605 * next non-null pointer. */
10d2cf94 1606 if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned)) {
d552a2b9
BP
1607 state = DEALLOC_FILL_MP;
1608 mp_h++;
1609 }
1610 /* No more non-null pointers at this height. Back up
1611 to the previous height and try again. */
1612 break; /* loop around in the same state */
1613
1614 /* Fill the metapath with buffers to the given height. */
1615 case DEALLOC_FILL_MP:
1616 /* Fill the buffers out to the current height. */
1617 ret = fillup_metapath(ip, &mp, mp_h);
c3ce5aa9 1618 if (ret < 0)
d552a2b9 1619 goto out;
c3ce5aa9
AG
1620
1621 /* issue read-ahead on metadata */
1622 if (mp.mp_aheight > 1) {
5cf26b1e
AG
1623 for (; ret > 1; ret--) {
1624 metapointer_range(&mp, mp.mp_aheight - ret,
1625 start_list, start_aligned,
10d2cf94 1626 end_list, end_aligned,
5cf26b1e
AG
1627 &start, &end);
1628 gfs2_metapath_ra(ip->i_gl, start, end);
1629 }
c3ce5aa9 1630 }
d552a2b9
BP
1631
1632 /* If buffers found for the entire strip height */
e8b43fe0 1633 if (mp.mp_aheight - 1 == strip_h) {
d552a2b9
BP
1634 state = DEALLOC_MP_FULL;
1635 break;
1636 }
e8b43fe0
AG
1637 if (mp.mp_aheight < ip->i_height) /* We have a partial height */
1638 mp_h = mp.mp_aheight - 1;
d552a2b9
BP
1639
1640 /* If we find a non-null block pointer, crawl a bit
1641 higher up in the metapath and try again, otherwise
1642 we need to look lower for a new starting point. */
10d2cf94 1643 if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned))
d552a2b9
BP
1644 mp_h++;
1645 else
1646 state = DEALLOC_MP_LOWER;
b3b94faa 1647 break;
d552a2b9 1648 }
b3b94faa
DT
1649 }
1650
d552a2b9
BP
1651 if (btotal) {
1652 if (current->journal_info == NULL) {
1653 ret = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS +
1654 RES_QUOTA, 0);
1655 if (ret)
1656 goto out;
1657 down_write(&ip->i_rw_mutex);
1658 }
1659 gfs2_statfs_change(sdp, 0, +btotal, 0);
1660 gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid,
1661 ip->i_inode.i_gid);
b32c8c76 1662 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
d552a2b9
BP
1663 gfs2_trans_add_meta(ip->i_gl, dibh);
1664 gfs2_dinode_out(ip, dibh->b_data);
1665 up_write(&ip->i_rw_mutex);
1666 gfs2_trans_end(sdp);
1667 }
b3b94faa 1668
d552a2b9
BP
1669out:
1670 if (gfs2_holder_initialized(&rd_gh))
1671 gfs2_glock_dq_uninit(&rd_gh);
1672 if (current->journal_info) {
1673 up_write(&ip->i_rw_mutex);
1674 gfs2_trans_end(sdp);
1675 cond_resched();
1676 }
1677 gfs2_quota_unhold(ip);
1678out_metapath:
1679 release_metapath(&mp);
1680 return ret;
b3b94faa
DT
1681}
1682
1683static int trunc_end(struct gfs2_inode *ip)
1684{
feaa7bba 1685 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
b3b94faa
DT
1686 struct buffer_head *dibh;
1687 int error;
1688
1689 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
1690 if (error)
1691 return error;
1692
1693 down_write(&ip->i_rw_mutex);
1694
1695 error = gfs2_meta_inode_buffer(ip, &dibh);
1696 if (error)
1697 goto out;
1698
a2e0f799 1699 if (!i_size_read(&ip->i_inode)) {
ecc30c79 1700 ip->i_height = 0;
ce276b06 1701 ip->i_goal = ip->i_no_addr;
b3b94faa 1702 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
45138990 1703 gfs2_ordered_del_inode(ip);
b3b94faa 1704 }
078cd827 1705 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
383f01fb 1706 ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG;
b3b94faa 1707
350a9b0a 1708 gfs2_trans_add_meta(ip->i_gl, dibh);
539e5d6b 1709 gfs2_dinode_out(ip, dibh->b_data);
b3b94faa
DT
1710 brelse(dibh);
1711
a91ea69f 1712out:
b3b94faa 1713 up_write(&ip->i_rw_mutex);
b3b94faa 1714 gfs2_trans_end(sdp);
b3b94faa
DT
1715 return error;
1716}
1717
1718/**
1719 * do_shrink - make a file smaller
ff8f33c8 1720 * @inode: the inode
ff8f33c8 1721 * @newsize: the size to make the file
b3b94faa 1722 *
ff8f33c8
SW
1723 * Called with an exclusive lock on @inode. The @size must
1724 * be equal to or smaller than the current inode size.
b3b94faa
DT
1725 *
1726 * Returns: errno
1727 */
1728
8b5860a3 1729static int do_shrink(struct inode *inode, u64 newsize)
b3b94faa 1730{
ff8f33c8 1731 struct gfs2_inode *ip = GFS2_I(inode);
b3b94faa
DT
1732 int error;
1733
8b5860a3 1734 error = trunc_start(inode, newsize);
b3b94faa
DT
1735 if (error < 0)
1736 return error;
ff8f33c8 1737 if (gfs2_is_stuffed(ip))
b3b94faa
DT
1738 return 0;
1739
10d2cf94 1740 error = punch_hole(ip, newsize, 0);
ff8f33c8 1741 if (error == 0)
b3b94faa
DT
1742 error = trunc_end(ip);
1743
1744 return error;
1745}
1746
ff8f33c8 1747void gfs2_trim_blocks(struct inode *inode)
a13b8c5f 1748{
ff8f33c8
SW
1749 int ret;
1750
8b5860a3 1751 ret = do_shrink(inode, inode->i_size);
ff8f33c8
SW
1752 WARN_ON(ret != 0);
1753}
1754
1755/**
1756 * do_grow - Touch and update inode size
1757 * @inode: The inode
1758 * @size: The new size
1759 *
1760 * This function updates the timestamps on the inode and
1761 * may also increase the size of the inode. This function
1762 * must not be called with @size any smaller than the current
1763 * inode size.
1764 *
1765 * Although it is not strictly required to unstuff files here,
1766 * earlier versions of GFS2 have a bug in the stuffed file reading
1767 * code which will result in a buffer overrun if the size is larger
1768 * than the max stuffed file size. In order to prevent this from
25985edc 1769 * occurring, such files are unstuffed, but in other cases we can
ff8f33c8
SW
1770 * just update the inode size directly.
1771 *
1772 * Returns: 0 on success, or -ve on error
1773 */
1774
1775static int do_grow(struct inode *inode, u64 size)
1776{
1777 struct gfs2_inode *ip = GFS2_I(inode);
1778 struct gfs2_sbd *sdp = GFS2_SB(inode);
7b9cff46 1779 struct gfs2_alloc_parms ap = { .target = 1, };
a13b8c5f
WC
1780 struct buffer_head *dibh;
1781 int error;
2f7ee358 1782 int unstuff = 0;
a13b8c5f 1783
235628c5 1784 if (gfs2_is_stuffed(ip) && size > gfs2_max_stuffed_size(ip)) {
b8fbf471 1785 error = gfs2_quota_lock_check(ip, &ap);
ff8f33c8 1786 if (error)
5407e242 1787 return error;
ff8f33c8 1788
7b9cff46 1789 error = gfs2_inplace_reserve(ip, &ap);
ff8f33c8
SW
1790 if (error)
1791 goto do_grow_qunlock;
2f7ee358 1792 unstuff = 1;
ff8f33c8
SW
1793 }
1794
a01aedfe
BP
1795 error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT +
1796 (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF ?
1797 0 : RES_QUOTA), 0);
a13b8c5f 1798 if (error)
ff8f33c8 1799 goto do_grow_release;
a13b8c5f 1800
2f7ee358 1801 if (unstuff) {
ff8f33c8
SW
1802 error = gfs2_unstuff_dinode(ip, NULL);
1803 if (error)
1804 goto do_end_trans;
1805 }
a13b8c5f
WC
1806
1807 error = gfs2_meta_inode_buffer(ip, &dibh);
1808 if (error)
ff8f33c8 1809 goto do_end_trans;
a13b8c5f 1810
ff8f33c8 1811 i_size_write(inode, size);
078cd827 1812 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
350a9b0a 1813 gfs2_trans_add_meta(ip->i_gl, dibh);
a13b8c5f
WC
1814 gfs2_dinode_out(ip, dibh->b_data);
1815 brelse(dibh);
1816
ff8f33c8 1817do_end_trans:
a13b8c5f 1818 gfs2_trans_end(sdp);
ff8f33c8 1819do_grow_release:
2f7ee358 1820 if (unstuff) {
ff8f33c8
SW
1821 gfs2_inplace_release(ip);
1822do_grow_qunlock:
1823 gfs2_quota_unlock(ip);
ff8f33c8 1824 }
a13b8c5f
WC
1825 return error;
1826}
1827
b3b94faa 1828/**
ff8f33c8
SW
1829 * gfs2_setattr_size - make a file a given size
1830 * @inode: the inode
1831 * @newsize: the size to make the file
b3b94faa 1832 *
ff8f33c8 1833 * The file size can grow, shrink, or stay the same size. This
3e7aafc3 1834 * is called holding i_rwsem and an exclusive glock on the inode
ff8f33c8 1835 * in question.
b3b94faa
DT
1836 *
1837 * Returns: errno
1838 */
1839
ff8f33c8 1840int gfs2_setattr_size(struct inode *inode, u64 newsize)
b3b94faa 1841{
af5c2697 1842 struct gfs2_inode *ip = GFS2_I(inode);
ff8f33c8 1843 int ret;
b3b94faa 1844
ff8f33c8 1845 BUG_ON(!S_ISREG(inode->i_mode));
b3b94faa 1846
ff8f33c8
SW
1847 ret = inode_newsize_ok(inode, newsize);
1848 if (ret)
1849 return ret;
b3b94faa 1850
562c72aa
CH
1851 inode_dio_wait(inode);
1852
b54e9a0b 1853 ret = gfs2_rsqa_alloc(ip);
d2b47cfb 1854 if (ret)
2b3dcf35 1855 goto out;
d2b47cfb 1856
8b5860a3 1857 if (newsize >= inode->i_size) {
2b3dcf35
BP
1858 ret = do_grow(inode, newsize);
1859 goto out;
1860 }
ff8f33c8 1861
8b5860a3 1862 ret = do_shrink(inode, newsize);
2b3dcf35 1863out:
a097dc7e 1864 gfs2_rsqa_delete(ip, NULL);
2b3dcf35 1865 return ret;
b3b94faa
DT
1866}
1867
1868int gfs2_truncatei_resume(struct gfs2_inode *ip)
1869{
1870 int error;
10d2cf94 1871 error = punch_hole(ip, i_size_read(&ip->i_inode), 0);
b3b94faa
DT
1872 if (!error)
1873 error = trunc_end(ip);
1874 return error;
1875}
1876
1877int gfs2_file_dealloc(struct gfs2_inode *ip)
1878{
10d2cf94 1879 return punch_hole(ip, 0, 0);
b3b94faa
DT
1880}
1881
b50f227b
SW
1882/**
1883 * gfs2_free_journal_extents - Free cached journal bmap info
1884 * @jd: The journal
1885 *
1886 */
1887
1888void gfs2_free_journal_extents(struct gfs2_jdesc *jd)
1889{
1890 struct gfs2_journal_extent *jext;
1891
1892 while(!list_empty(&jd->extent_list)) {
1893 jext = list_entry(jd->extent_list.next, struct gfs2_journal_extent, list);
1894 list_del(&jext->list);
1895 kfree(jext);
1896 }
1897}
1898
1899/**
1900 * gfs2_add_jextent - Add or merge a new extent to extent cache
1901 * @jd: The journal descriptor
1902 * @lblock: The logical block at start of new extent
c62baf65 1903 * @dblock: The physical block at start of new extent
b50f227b
SW
1904 * @blocks: Size of extent in fs blocks
1905 *
1906 * Returns: 0 on success or -ENOMEM
1907 */
1908
1909static int gfs2_add_jextent(struct gfs2_jdesc *jd, u64 lblock, u64 dblock, u64 blocks)
1910{
1911 struct gfs2_journal_extent *jext;
1912
1913 if (!list_empty(&jd->extent_list)) {
1914 jext = list_entry(jd->extent_list.prev, struct gfs2_journal_extent, list);
1915 if ((jext->dblock + jext->blocks) == dblock) {
1916 jext->blocks += blocks;
1917 return 0;
1918 }
1919 }
1920
1921 jext = kzalloc(sizeof(struct gfs2_journal_extent), GFP_NOFS);
1922 if (jext == NULL)
1923 return -ENOMEM;
1924 jext->dblock = dblock;
1925 jext->lblock = lblock;
1926 jext->blocks = blocks;
1927 list_add_tail(&jext->list, &jd->extent_list);
1928 jd->nr_extents++;
1929 return 0;
1930}
1931
1932/**
1933 * gfs2_map_journal_extents - Cache journal bmap info
1934 * @sdp: The super block
1935 * @jd: The journal to map
1936 *
1937 * Create a reusable "extent" mapping from all logical
1938 * blocks to all physical blocks for the given journal. This will save
1939 * us time when writing journal blocks. Most journals will have only one
1940 * extent that maps all their logical blocks. That's because gfs2.mkfs
1941 * arranges the journal blocks sequentially to maximize performance.
1942 * So the extent would map the first block for the entire file length.
1943 * However, gfs2_jadd can happen while file activity is happening, so
1944 * those journals may not be sequential. Less likely is the case where
1945 * the users created their own journals by mounting the metafs and
1946 * laying it out. But it's still possible. These journals might have
1947 * several extents.
1948 *
1949 * Returns: 0 on success, or error on failure
1950 */
1951
1952int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd)
1953{
1954 u64 lblock = 0;
1955 u64 lblock_stop;
1956 struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
1957 struct buffer_head bh;
1958 unsigned int shift = sdp->sd_sb.sb_bsize_shift;
1959 u64 size;
1960 int rc;
1961
1962 lblock_stop = i_size_read(jd->jd_inode) >> shift;
1963 size = (lblock_stop - lblock) << shift;
1964 jd->nr_extents = 0;
1965 WARN_ON(!list_empty(&jd->extent_list));
1966
1967 do {
1968 bh.b_state = 0;
1969 bh.b_blocknr = 0;
1970 bh.b_size = size;
1971 rc = gfs2_block_map(jd->jd_inode, lblock, &bh, 0);
1972 if (rc || !buffer_mapped(&bh))
1973 goto fail;
1974 rc = gfs2_add_jextent(jd, lblock, bh.b_blocknr, bh.b_size >> shift);
1975 if (rc)
1976 goto fail;
1977 size -= bh.b_size;
1978 lblock += (bh.b_size >> ip->i_inode.i_blkbits);
1979 } while(size > 0);
1980
1981 fs_info(sdp, "journal %d mapped with %u extents\n", jd->jd_jid,
1982 jd->nr_extents);
1983 return 0;
1984
1985fail:
1986 fs_warn(sdp, "error %d mapping journal %u at offset %llu (extent %u)\n",
1987 rc, jd->jd_jid,
1988 (unsigned long long)(i_size_read(jd->jd_inode) - size),
1989 jd->nr_extents);
1990 fs_warn(sdp, "bmap=%d lblock=%llu block=%llu, state=0x%08lx, size=%llu\n",
1991 rc, (unsigned long long)lblock, (unsigned long long)bh.b_blocknr,
1992 bh.b_state, (unsigned long long)bh.b_size);
1993 gfs2_free_journal_extents(jd);
1994 return rc;
1995}
1996
b3b94faa
DT
1997/**
1998 * gfs2_write_alloc_required - figure out if a write will require an allocation
1999 * @ip: the file being written to
2000 * @offset: the offset to write to
2001 * @len: the number of bytes being written
b3b94faa 2002 *
461cb419 2003 * Returns: 1 if an alloc is required, 0 otherwise
b3b94faa
DT
2004 */
2005
cd915493 2006int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
461cb419 2007 unsigned int len)
b3b94faa 2008{
feaa7bba 2009 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
941e6d7d
SW
2010 struct buffer_head bh;
2011 unsigned int shift;
2012 u64 lblock, lblock_stop, size;
7ed122e4 2013 u64 end_of_file;
b3b94faa 2014
b3b94faa
DT
2015 if (!len)
2016 return 0;
2017
2018 if (gfs2_is_stuffed(ip)) {
235628c5 2019 if (offset + len > gfs2_max_stuffed_size(ip))
461cb419 2020 return 1;
b3b94faa
DT
2021 return 0;
2022 }
2023
941e6d7d 2024 shift = sdp->sd_sb.sb_bsize_shift;
7ed122e4 2025 BUG_ON(gfs2_is_dir(ip));
a2e0f799 2026 end_of_file = (i_size_read(&ip->i_inode) + sdp->sd_sb.sb_bsize - 1) >> shift;
7ed122e4
SW
2027 lblock = offset >> shift;
2028 lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
2029 if (lblock_stop > end_of_file)
461cb419 2030 return 1;
b3b94faa 2031
941e6d7d
SW
2032 size = (lblock_stop - lblock) << shift;
2033 do {
2034 bh.b_state = 0;
2035 bh.b_size = size;
2036 gfs2_block_map(&ip->i_inode, lblock, &bh, 0);
2037 if (!buffer_mapped(&bh))
461cb419 2038 return 1;
941e6d7d
SW
2039 size -= bh.b_size;
2040 lblock += (bh.b_size >> ip->i_inode.i_blkbits);
2041 } while(size > 0);
b3b94faa
DT
2042
2043 return 0;
2044}
2045
4e56a641
AG
2046static int stuffed_zero_range(struct inode *inode, loff_t offset, loff_t length)
2047{
2048 struct gfs2_inode *ip = GFS2_I(inode);
2049 struct buffer_head *dibh;
2050 int error;
2051
2052 if (offset >= inode->i_size)
2053 return 0;
2054 if (offset + length > inode->i_size)
2055 length = inode->i_size - offset;
2056
2057 error = gfs2_meta_inode_buffer(ip, &dibh);
2058 if (error)
2059 return error;
2060 gfs2_trans_add_meta(ip->i_gl, dibh);
2061 memset(dibh->b_data + sizeof(struct gfs2_dinode) + offset, 0,
2062 length);
2063 brelse(dibh);
2064 return 0;
2065}
2066
2067static int gfs2_journaled_truncate_range(struct inode *inode, loff_t offset,
2068 loff_t length)
2069{
2070 struct gfs2_sbd *sdp = GFS2_SB(inode);
2071 loff_t max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
2072 int error;
2073
2074 while (length) {
2075 struct gfs2_trans *tr;
2076 loff_t chunk;
2077 unsigned int offs;
2078
2079 chunk = length;
2080 if (chunk > max_chunk)
2081 chunk = max_chunk;
2082
2083 offs = offset & ~PAGE_MASK;
2084 if (offs && chunk > PAGE_SIZE)
2085 chunk = offs + ((chunk - offs) & PAGE_MASK);
2086
2087 truncate_pagecache_range(inode, offset, chunk);
2088 offset += chunk;
2089 length -= chunk;
2090
2091 tr = current->journal_info;
2092 if (!test_bit(TR_TOUCHED, &tr->tr_flags))
2093 continue;
2094
2095 gfs2_trans_end(sdp);
2096 error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
2097 if (error)
2098 return error;
2099 }
2100 return 0;
2101}
2102
2103int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length)
2104{
2105 struct inode *inode = file_inode(file);
2106 struct gfs2_inode *ip = GFS2_I(inode);
2107 struct gfs2_sbd *sdp = GFS2_SB(inode);
2108 int error;
2109
2110 if (gfs2_is_jdata(ip))
2111 error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_JDATA,
2112 GFS2_JTRUNC_REVOKES);
2113 else
2114 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
2115 if (error)
2116 return error;
2117
2118 if (gfs2_is_stuffed(ip)) {
2119 error = stuffed_zero_range(inode, offset, length);
2120 if (error)
2121 goto out;
2122 } else {
2123 unsigned int start_off, end_off, blocksize;
2124
2125 blocksize = i_blocksize(inode);
2126 start_off = offset & (blocksize - 1);
2127 end_off = (offset + length) & (blocksize - 1);
2128 if (start_off) {
2129 unsigned int len = length;
2130 if (length > blocksize - start_off)
2131 len = blocksize - start_off;
2132 error = gfs2_block_zero_range(inode, offset, len);
2133 if (error)
2134 goto out;
2135 if (start_off + length < blocksize)
2136 end_off = 0;
2137 }
2138 if (end_off) {
2139 error = gfs2_block_zero_range(inode,
2140 offset + length - end_off, end_off);
2141 if (error)
2142 goto out;
2143 }
2144 }
2145
2146 if (gfs2_is_jdata(ip)) {
2147 BUG_ON(!current->journal_info);
2148 gfs2_journaled_truncate_range(inode, offset, length);
2149 } else
2150 truncate_pagecache_range(inode, offset, offset + length - 1);
2151
2152 file_update_time(file);
2153 mark_inode_dirty(inode);
2154
2155 if (current->journal_info)
2156 gfs2_trans_end(sdp);
2157
2158 if (!gfs2_is_stuffed(ip))
2159 error = punch_hole(ip, offset, length);
2160
2161out:
2162 if (current->journal_info)
2163 gfs2_trans_end(sdp);
2164 return error;
2165}