]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - fs/gfs2/rgrp.c
gfs2: Turn gfs2_rbm_incr into gfs2_rbm_add
[mirror_ubuntu-jammy-kernel.git] / fs / gfs2 / rgrp.c
CommitLineData
7336d0e6 1// SPDX-License-Identifier: GPL-2.0-only
b3b94faa
DT
2/*
3 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
fe6c991c 4 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
b3b94faa
DT
5 */
6
d77d1b58
JP
7#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
8
b3b94faa
DT
9#include <linux/slab.h>
10#include <linux/spinlock.h>
11#include <linux/completion.h>
12#include <linux/buffer_head.h>
f42faf4f 13#include <linux/fs.h>
5c676f6d 14#include <linux/gfs2_ondisk.h>
1f466a47 15#include <linux/prefetch.h>
f15ab561 16#include <linux/blkdev.h>
7c9ca621 17#include <linux/rbtree.h>
9dbe9610 18#include <linux/random.h>
b3b94faa
DT
19
20#include "gfs2.h"
5c676f6d 21#include "incore.h"
b3b94faa
DT
22#include "glock.h"
23#include "glops.h"
b3b94faa
DT
24#include "lops.h"
25#include "meta_io.h"
26#include "quota.h"
27#include "rgrp.h"
28#include "super.h"
29#include "trans.h"
5c676f6d 30#include "util.h"
172e045a 31#include "log.h"
c8cdf479 32#include "inode.h"
63997775 33#include "trace_gfs2.h"
850d2d91 34#include "dir.h"
b3b94faa 35
2c1e52aa 36#define BFITNOENT ((u32)~0)
6760bdcd 37#define NO_BLOCK ((u64)~0)
88c8ab1f
SW
38
39/*
40 * These routines are used by the resource group routines (rgrp.c)
41 * to keep track of block allocation. Each block is represented by two
feaa7bba
SW
42 * bits. So, each byte represents GFS2_NBBY (i.e. 4) blocks.
43 *
44 * 0 = Free
45 * 1 = Used (not metadata)
46 * 2 = Unlinked (still in use) inode
47 * 3 = Used (metadata)
88c8ab1f
SW
48 */
49
5ce13431
BP
50struct gfs2_extent {
51 struct gfs2_rbm rbm;
52 u32 len;
53};
54
88c8ab1f
SW
55static const char valid_change[16] = {
56 /* current */
feaa7bba 57 /* n */ 0, 1, 1, 1,
88c8ab1f 58 /* e */ 1, 0, 0, 0,
feaa7bba 59 /* w */ 0, 0, 0, 1,
88c8ab1f
SW
60 1, 0, 0, 0
61};
62
5ce13431 63static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
8381e602 64 const struct gfs2_inode *ip, bool nowrap);
ff7f4cb4
SW
65
66
88c8ab1f
SW
67/**
68 * gfs2_setbit - Set a bit in the bitmaps
3e6339dd
SW
69 * @rbm: The position of the bit to set
70 * @do_clone: Also set the clone bitmap, if it exists
88c8ab1f
SW
71 * @new_state: the new state of the block
72 *
73 */
74
3e6339dd 75static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone,
06344b91 76 unsigned char new_state)
88c8ab1f 77{
b45e41d7 78 unsigned char *byte1, *byte2, *end, cur_state;
e579ed4f 79 struct gfs2_bitmap *bi = rbm_bi(rbm);
281b4952 80 unsigned int buflen = bi->bi_bytes;
3e6339dd 81 const unsigned int bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
88c8ab1f 82
e579ed4f
BP
83 byte1 = bi->bi_bh->b_data + bi->bi_offset + (rbm->offset / GFS2_NBBY);
84 end = bi->bi_bh->b_data + bi->bi_offset + buflen;
88c8ab1f 85
b45e41d7 86 BUG_ON(byte1 >= end);
88c8ab1f 87
b45e41d7 88 cur_state = (*byte1 >> bit) & GFS2_BIT_MASK;
88c8ab1f 89
b45e41d7 90 if (unlikely(!valid_change[new_state * 4 + cur_state])) {
e54c78a2
BP
91 struct gfs2_sbd *sdp = rbm->rgd->rd_sbd;
92
93 fs_warn(sdp, "buf_blk = 0x%x old_state=%d, new_state=%d\n",
d77d1b58 94 rbm->offset, cur_state, new_state);
e54c78a2
BP
95 fs_warn(sdp, "rgrp=0x%llx bi_start=0x%x biblk: 0x%llx\n",
96 (unsigned long long)rbm->rgd->rd_addr, bi->bi_start,
97 (unsigned long long)bi->bi_bh->b_blocknr);
281b4952
AG
98 fs_warn(sdp, "bi_offset=0x%x bi_bytes=0x%x block=0x%llx\n",
99 bi->bi_offset, bi->bi_bytes,
e54c78a2 100 (unsigned long long)gfs2_rbm_to_block(rbm));
95c8e17f 101 dump_stack();
3e6339dd 102 gfs2_consist_rgrpd(rbm->rgd);
b45e41d7
SW
103 return;
104 }
105 *byte1 ^= (cur_state ^ new_state) << bit;
106
e579ed4f
BP
107 if (do_clone && bi->bi_clone) {
108 byte2 = bi->bi_clone + bi->bi_offset + (rbm->offset / GFS2_NBBY);
b45e41d7
SW
109 cur_state = (*byte2 >> bit) & GFS2_BIT_MASK;
110 *byte2 ^= (cur_state ^ new_state) << bit;
111 }
88c8ab1f
SW
112}
113
114/**
115 * gfs2_testbit - test a bit in the bitmaps
c04a2ef3 116 * @rbm: The bit to test
dffe12a8
BP
117 * @use_clone: If true, test the clone bitmap, not the official bitmap.
118 *
119 * Some callers like gfs2_unaligned_extlen need to test the clone bitmaps,
120 * not the "real" bitmaps, to avoid allocating recently freed blocks.
88c8ab1f 121 *
c04a2ef3 122 * Returns: The two bit block state of the requested bit
88c8ab1f
SW
123 */
124
dffe12a8 125static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm, bool use_clone)
88c8ab1f 126{
e579ed4f 127 struct gfs2_bitmap *bi = rbm_bi(rbm);
dffe12a8 128 const u8 *buffer;
c04a2ef3 129 const u8 *byte;
88c8ab1f
SW
130 unsigned int bit;
131
dffe12a8
BP
132 if (use_clone && bi->bi_clone)
133 buffer = bi->bi_clone;
134 else
135 buffer = bi->bi_bh->b_data;
136 buffer += bi->bi_offset;
c04a2ef3
SW
137 byte = buffer + (rbm->offset / GFS2_NBBY);
138 bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
88c8ab1f 139
c04a2ef3 140 return (*byte >> bit) & GFS2_BIT_MASK;
88c8ab1f
SW
141}
142
223b2b88
SW
143/**
144 * gfs2_bit_search
145 * @ptr: Pointer to bitmap data
146 * @mask: Mask to use (normally 0x55555.... but adjusted for search start)
147 * @state: The state we are searching for
148 *
149 * We xor the bitmap data with a patter which is the bitwise opposite
150 * of what we are looking for, this gives rise to a pattern of ones
151 * wherever there is a match. Since we have two bits per entry, we
152 * take this pattern, shift it down by one place and then and it with
153 * the original. All the even bit positions (0,2,4, etc) then represent
154 * successful matches, so we mask with 0x55555..... to remove the unwanted
155 * odd bit positions.
156 *
157 * This allows searching of a whole u64 at once (32 blocks) with a
158 * single test (on 64 bit arches).
159 */
160
161static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
162{
163 u64 tmp;
164 static const u64 search[] = {
075ac448
HE
165 [0] = 0xffffffffffffffffULL,
166 [1] = 0xaaaaaaaaaaaaaaaaULL,
167 [2] = 0x5555555555555555ULL,
168 [3] = 0x0000000000000000ULL,
223b2b88
SW
169 };
170 tmp = le64_to_cpu(*ptr) ^ search[state];
171 tmp &= (tmp >> 1);
172 tmp &= mask;
173 return tmp;
174}
175
8e2e0047
BP
176/**
177 * rs_cmp - multi-block reservation range compare
178 * @blk: absolute file system block number of the new reservation
179 * @len: number of blocks in the new reservation
180 * @rs: existing reservation to compare against
181 *
182 * returns: 1 if the block range is beyond the reach of the reservation
183 * -1 if the block range is before the start of the reservation
184 * 0 if the block range overlaps with the reservation
185 */
186static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs)
187{
4a993fb1 188 u64 startblk = gfs2_rbm_to_block(&rs->rs_rbm);
8e2e0047
BP
189
190 if (blk >= startblk + rs->rs_free)
191 return 1;
192 if (blk + len - 1 < startblk)
193 return -1;
194 return 0;
195}
196
88c8ab1f
SW
197/**
198 * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
199 * a block in a given allocation state.
886b1416 200 * @buf: the buffer that holds the bitmaps
223b2b88 201 * @len: the length (in bytes) of the buffer
88c8ab1f 202 * @goal: start search at this block's bit-pair (within @buffer)
223b2b88 203 * @state: GFS2_BLKST_XXX the state of the block we're looking for.
88c8ab1f
SW
204 *
205 * Scope of @goal and returned block number is only within this bitmap buffer,
206 * not entire rgrp or filesystem. @buffer will be offset from the actual
223b2b88
SW
207 * beginning of a bitmap block buffer, skipping any header structures, but
208 * headers are always a multiple of 64 bits long so that the buffer is
209 * always aligned to a 64 bit boundary.
210 *
211 * The size of the buffer is in bytes, but is it assumed that it is
fd589a8f 212 * always ok to read a complete multiple of 64 bits at the end
223b2b88 213 * of the block in case the end is no aligned to a natural boundary.
88c8ab1f
SW
214 *
215 * Return: the block number (bitmap buffer scope) that was found
216 */
217
02ab1721
HE
218static u32 gfs2_bitfit(const u8 *buf, const unsigned int len,
219 u32 goal, u8 state)
88c8ab1f 220{
223b2b88
SW
221 u32 spoint = (goal << 1) & ((8*sizeof(u64)) - 1);
222 const __le64 *ptr = ((__le64 *)buf) + (goal >> 5);
223 const __le64 *end = (__le64 *)(buf + ALIGN(len, sizeof(u64)));
224 u64 tmp;
075ac448 225 u64 mask = 0x5555555555555555ULL;
223b2b88
SW
226 u32 bit;
227
223b2b88
SW
228 /* Mask off bits we don't care about at the start of the search */
229 mask <<= spoint;
230 tmp = gfs2_bit_search(ptr, mask, state);
231 ptr++;
232 while(tmp == 0 && ptr < end) {
075ac448 233 tmp = gfs2_bit_search(ptr, 0x5555555555555555ULL, state);
223b2b88 234 ptr++;
1f466a47 235 }
223b2b88
SW
236 /* Mask off any bits which are more than len bytes from the start */
237 if (ptr == end && (len & (sizeof(u64) - 1)))
238 tmp &= (((u64)~0) >> (64 - 8*(len & (sizeof(u64) - 1))));
239 /* Didn't find anything, so return */
240 if (tmp == 0)
241 return BFITNOENT;
242 ptr--;
d8bd504a 243 bit = __ffs64(tmp);
223b2b88
SW
244 bit /= 2; /* two bits per entry in the bitmap */
245 return (((const unsigned char *)ptr - buf) * GFS2_NBBY) + bit;
88c8ab1f
SW
246}
247
ff7f4cb4
SW
248/**
249 * gfs2_rbm_from_block - Set the rbm based upon rgd and block number
250 * @rbm: The rbm with rgd already set correctly
251 * @block: The block number (filesystem relative)
252 *
253 * This sets the bi and offset members of an rbm based on a
254 * resource group and a filesystem relative block number. The
255 * resource group must be set in the rbm on entry, the bi and
256 * offset members will be set by this function.
257 *
258 * Returns: 0 on success, or an error code
259 */
260
261static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block)
262{
3548fce1 263 if (!rgrp_contains_block(rbm->rgd, block))
ff7f4cb4 264 return -E2BIG;
e579ed4f 265 rbm->bii = 0;
3548fce1 266 rbm->offset = block - rbm->rgd->rd_data0;
a68a0a35 267 /* Check if the block is within the first block */
e579ed4f 268 if (rbm->offset < rbm_bi(rbm)->bi_blocks)
a68a0a35
BP
269 return 0;
270
271 /* Adjust for the size diff between gfs2_meta_header and gfs2_rgrp */
272 rbm->offset += (sizeof(struct gfs2_rgrp) -
273 sizeof(struct gfs2_meta_header)) * GFS2_NBBY;
e579ed4f
BP
274 rbm->bii = rbm->offset / rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
275 rbm->offset -= rbm->bii * rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
ff7f4cb4
SW
276 return 0;
277}
278
149ed7f5 279/**
0eacdd16 280 * gfs2_rbm_add - add a number of blocks to an rbm
149ed7f5 281 * @rbm: The rbm with rgd already set correctly
0eacdd16 282 * @blocks: The number of blocks to add to rpm
149ed7f5 283 *
0eacdd16
AG
284 * This function takes an existing rbm structure and adds a number of blocks to
285 * it.
149ed7f5 286 *
0eacdd16 287 * Returns: True if the new rbm would point past the end of the rgrp.
149ed7f5
BP
288 */
289
0eacdd16 290static bool gfs2_rbm_add(struct gfs2_rbm *rbm, u32 blocks)
149ed7f5 291{
0eacdd16
AG
292 struct gfs2_rgrpd *rgd = rbm->rgd;
293 struct gfs2_bitmap *bi = rgd->rd_bits + rbm->bii;
294
295 if (rbm->offset + blocks < bi->bi_blocks) {
296 rbm->offset += blocks;
149ed7f5
BP
297 return false;
298 }
0eacdd16 299 blocks -= bi->bi_blocks - rbm->offset;
149ed7f5 300
0eacdd16
AG
301 for(;;) {
302 bi++;
303 if (bi == rgd->rd_bits + rgd->rd_length)
304 return true;
305 if (blocks < bi->bi_blocks) {
306 rbm->offset = blocks;
307 rbm->bii = bi - rgd->rd_bits;
308 return false;
309 }
310 blocks -= bi->bi_blocks;
311 }
149ed7f5
BP
312}
313
ff7f4cb4
SW
314/**
315 * gfs2_unaligned_extlen - Look for free blocks which are not byte aligned
316 * @rbm: Position to search (value/result)
317 * @n_unaligned: Number of unaligned blocks to check
318 * @len: Decremented for each block found (terminate on zero)
319 *
320 * Returns: true if a non-free block is encountered
321 */
322
323static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len)
324{
ff7f4cb4
SW
325 u32 n;
326 u8 res;
327
328 for (n = 0; n < n_unaligned; n++) {
dffe12a8 329 res = gfs2_testbit(rbm, true);
ff7f4cb4
SW
330 if (res != GFS2_BLKST_FREE)
331 return true;
332 (*len)--;
333 if (*len == 0)
334 return true;
0eacdd16 335 if (gfs2_rbm_add(rbm, 1))
ff7f4cb4
SW
336 return true;
337 }
338
339 return false;
340}
341
342/**
343 * gfs2_free_extlen - Return extent length of free blocks
27ff6a0f 344 * @rrbm: Starting position
ff7f4cb4
SW
345 * @len: Max length to check
346 *
347 * Starting at the block specified by the rbm, see how many free blocks
348 * there are, not reading more than len blocks ahead. This can be done
349 * using memchr_inv when the blocks are byte aligned, but has to be done
350 * on a block by block basis in case of unaligned blocks. Also this
351 * function can cope with bitmap boundaries (although it must stop on
352 * a resource group boundary)
353 *
354 * Returns: Number of free blocks in the extent
355 */
356
357static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len)
358{
359 struct gfs2_rbm rbm = *rrbm;
360 u32 n_unaligned = rbm.offset & 3;
361 u32 size = len;
362 u32 bytes;
363 u32 chunk_size;
364 u8 *ptr, *start, *end;
365 u64 block;
e579ed4f 366 struct gfs2_bitmap *bi;
ff7f4cb4
SW
367
368 if (n_unaligned &&
369 gfs2_unaligned_extlen(&rbm, 4 - n_unaligned, &len))
370 goto out;
371
3701530a 372 n_unaligned = len & 3;
ff7f4cb4
SW
373 /* Start is now byte aligned */
374 while (len > 3) {
e579ed4f
BP
375 bi = rbm_bi(&rbm);
376 start = bi->bi_bh->b_data;
377 if (bi->bi_clone)
378 start = bi->bi_clone;
e579ed4f 379 start += bi->bi_offset;
281b4952 380 end = start + bi->bi_bytes;
ff7f4cb4
SW
381 BUG_ON(rbm.offset & 3);
382 start += (rbm.offset / GFS2_NBBY);
383 bytes = min_t(u32, len / GFS2_NBBY, (end - start));
384 ptr = memchr_inv(start, 0, bytes);
385 chunk_size = ((ptr == NULL) ? bytes : (ptr - start));
386 chunk_size *= GFS2_NBBY;
387 BUG_ON(len < chunk_size);
388 len -= chunk_size;
389 block = gfs2_rbm_to_block(&rbm);
15bd50ad
BP
390 if (gfs2_rbm_from_block(&rbm, block + chunk_size)) {
391 n_unaligned = 0;
ff7f4cb4 392 break;
15bd50ad
BP
393 }
394 if (ptr) {
395 n_unaligned = 3;
396 break;
397 }
ff7f4cb4
SW
398 n_unaligned = len & 3;
399 }
400
401 /* Deal with any bits left over at the end */
402 if (n_unaligned)
403 gfs2_unaligned_extlen(&rbm, n_unaligned, &len);
404out:
405 return size - len;
406}
407
88c8ab1f
SW
408/**
409 * gfs2_bitcount - count the number of bits in a certain state
886b1416 410 * @rgd: the resource group descriptor
88c8ab1f
SW
411 * @buffer: the buffer that holds the bitmaps
412 * @buflen: the length (in bytes) of the buffer
413 * @state: the state of the block we're looking for
414 *
415 * Returns: The number of bits
416 */
417
110acf38
SW
418static u32 gfs2_bitcount(struct gfs2_rgrpd *rgd, const u8 *buffer,
419 unsigned int buflen, u8 state)
88c8ab1f 420{
110acf38
SW
421 const u8 *byte = buffer;
422 const u8 *end = buffer + buflen;
423 const u8 state1 = state << 2;
424 const u8 state2 = state << 4;
425 const u8 state3 = state << 6;
cd915493 426 u32 count = 0;
88c8ab1f
SW
427
428 for (; byte < end; byte++) {
429 if (((*byte) & 0x03) == state)
430 count++;
431 if (((*byte) & 0x0C) == state1)
432 count++;
433 if (((*byte) & 0x30) == state2)
434 count++;
435 if (((*byte) & 0xC0) == state3)
436 count++;
437 }
438
439 return count;
440}
441
b3b94faa
DT
442/**
443 * gfs2_rgrp_verify - Verify that a resource group is consistent
b3b94faa
DT
444 * @rgd: the rgrp
445 *
446 */
447
448void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
449{
450 struct gfs2_sbd *sdp = rgd->rd_sbd;
451 struct gfs2_bitmap *bi = NULL;
bb8d8a6f 452 u32 length = rgd->rd_length;
cd915493 453 u32 count[4], tmp;
b3b94faa
DT
454 int buf, x;
455
cd915493 456 memset(count, 0, 4 * sizeof(u32));
b3b94faa
DT
457
458 /* Count # blocks in each of 4 possible allocation states */
459 for (buf = 0; buf < length; buf++) {
460 bi = rgd->rd_bits + buf;
461 for (x = 0; x < 4; x++)
462 count[x] += gfs2_bitcount(rgd,
463 bi->bi_bh->b_data +
464 bi->bi_offset,
281b4952 465 bi->bi_bytes, x);
b3b94faa
DT
466 }
467
cfc8b549 468 if (count[0] != rgd->rd_free) {
8dc88ac6
AG
469 gfs2_lm(sdp, "free data mismatch: %u != %u\n",
470 count[0], rgd->rd_free);
471 gfs2_consist_rgrpd(rgd);
b3b94faa
DT
472 return;
473 }
474
73f74948 475 tmp = rgd->rd_data - rgd->rd_free - rgd->rd_dinodes;
6b946170 476 if (count[1] != tmp) {
8dc88ac6
AG
477 gfs2_lm(sdp, "used data mismatch: %u != %u\n",
478 count[1], tmp);
479 gfs2_consist_rgrpd(rgd);
b3b94faa
DT
480 return;
481 }
482
6b946170 483 if (count[2] + count[3] != rgd->rd_dinodes) {
8dc88ac6
AG
484 gfs2_lm(sdp, "used metadata mismatch: %u != %u\n",
485 count[2] + count[3], rgd->rd_dinodes);
486 gfs2_consist_rgrpd(rgd);
b3b94faa
DT
487 return;
488 }
b3b94faa
DT
489}
490
b3b94faa
DT
491/**
492 * gfs2_blk2rgrpd - Find resource group for a given data/meta block number
493 * @sdp: The GFS2 superblock
886b1416
BP
494 * @blk: The data block number
495 * @exact: True if this needs to be an exact match
b3b94faa 496 *
90bcab99
SW
497 * The @exact argument should be set to true by most callers. The exception
498 * is when we need to match blocks which are not represented by the rgrp
499 * bitmap, but which are part of the rgrp (i.e. padding blocks) which are
500 * there for alignment purposes. Another way of looking at it is that @exact
501 * matches only valid data/metadata blocks, but with @exact false, it will
502 * match any block within the extent of the rgrp.
503 *
b3b94faa
DT
504 * Returns: The resource group, or NULL if not found
505 */
506
66fc061b 507struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact)
b3b94faa 508{
66fc061b 509 struct rb_node *n, *next;
f75bbfb4 510 struct gfs2_rgrpd *cur;
b3b94faa
DT
511
512 spin_lock(&sdp->sd_rindex_spin);
66fc061b
SW
513 n = sdp->sd_rindex_tree.rb_node;
514 while (n) {
515 cur = rb_entry(n, struct gfs2_rgrpd, rd_node);
516 next = NULL;
7c9ca621 517 if (blk < cur->rd_addr)
66fc061b 518 next = n->rb_left;
f75bbfb4 519 else if (blk >= cur->rd_data0 + cur->rd_data)
66fc061b
SW
520 next = n->rb_right;
521 if (next == NULL) {
b3b94faa 522 spin_unlock(&sdp->sd_rindex_spin);
66fc061b
SW
523 if (exact) {
524 if (blk < cur->rd_addr)
525 return NULL;
526 if (blk >= cur->rd_data0 + cur->rd_data)
527 return NULL;
528 }
7c9ca621 529 return cur;
b3b94faa 530 }
66fc061b 531 n = next;
b3b94faa 532 }
b3b94faa
DT
533 spin_unlock(&sdp->sd_rindex_spin);
534
535 return NULL;
536}
537
538/**
539 * gfs2_rgrpd_get_first - get the first Resource Group in the filesystem
540 * @sdp: The GFS2 superblock
541 *
542 * Returns: The first rgrp in the filesystem
543 */
544
545struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp)
546{
7c9ca621
BP
547 const struct rb_node *n;
548 struct gfs2_rgrpd *rgd;
549
8339ee54 550 spin_lock(&sdp->sd_rindex_spin);
7c9ca621
BP
551 n = rb_first(&sdp->sd_rindex_tree);
552 rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
8339ee54 553 spin_unlock(&sdp->sd_rindex_spin);
7c9ca621
BP
554
555 return rgd;
b3b94faa
DT
556}
557
558/**
559 * gfs2_rgrpd_get_next - get the next RG
886b1416 560 * @rgd: the resource group descriptor
b3b94faa
DT
561 *
562 * Returns: The next rgrp
563 */
564
565struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd)
566{
7c9ca621
BP
567 struct gfs2_sbd *sdp = rgd->rd_sbd;
568 const struct rb_node *n;
569
570 spin_lock(&sdp->sd_rindex_spin);
571 n = rb_next(&rgd->rd_node);
572 if (n == NULL)
573 n = rb_first(&sdp->sd_rindex_tree);
574
575 if (unlikely(&rgd->rd_node == n)) {
576 spin_unlock(&sdp->sd_rindex_spin);
b3b94faa 577 return NULL;
7c9ca621
BP
578 }
579 rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
580 spin_unlock(&sdp->sd_rindex_spin);
581 return rgd;
b3b94faa
DT
582}
583
00a158be
AD
584void check_and_update_goal(struct gfs2_inode *ip)
585{
586 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
587 if (!ip->i_goal || gfs2_blk2rgrpd(sdp, ip->i_goal, 1) == NULL)
588 ip->i_goal = ip->i_no_addr;
589}
590
8339ee54
SW
591void gfs2_free_clones(struct gfs2_rgrpd *rgd)
592{
593 int x;
594
595 for (x = 0; x < rgd->rd_length; x++) {
596 struct gfs2_bitmap *bi = rgd->rd_bits + x;
597 kfree(bi->bi_clone);
598 bi->bi_clone = NULL;
599 }
600}
601
3792ce97
BP
602static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs,
603 const char *fs_id_buf)
8e2e0047 604{
f85c10e2
BP
605 struct gfs2_inode *ip = container_of(rs, struct gfs2_inode, i_res);
606
3792ce97 607 gfs2_print_dbg(seq, "%s B: n:%llu s:%llu b:%u f:%u\n", fs_id_buf,
f85c10e2 608 (unsigned long long)ip->i_no_addr,
9e733d39 609 (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm),
4a993fb1 610 rs->rs_rbm.offset, rs->rs_free);
8e2e0047
BP
611}
612
0a305e49 613/**
8e2e0047
BP
614 * __rs_deltree - remove a multi-block reservation from the rgd tree
615 * @rs: The reservation to remove
616 *
617 */
20095218 618static void __rs_deltree(struct gfs2_blkreserv *rs)
8e2e0047
BP
619{
620 struct gfs2_rgrpd *rgd;
621
622 if (!gfs2_rs_active(rs))
623 return;
624
4a993fb1 625 rgd = rs->rs_rbm.rgd;
9e733d39 626 trace_gfs2_rs(rs, TRACE_RS_TREEDEL);
4a993fb1 627 rb_erase(&rs->rs_node, &rgd->rd_rstree);
24d634e8 628 RB_CLEAR_NODE(&rs->rs_node);
8e2e0047
BP
629
630 if (rs->rs_free) {
ec23df2b
AG
631 u64 last_block = gfs2_rbm_to_block(&rs->rs_rbm) +
632 rs->rs_free - 1;
633 struct gfs2_rbm last_rbm = { .rgd = rs->rs_rbm.rgd, };
634 struct gfs2_bitmap *start, *last;
e579ed4f 635
20095218 636 /* return reserved blocks to the rgrp */
4a993fb1
SW
637 BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free);
638 rs->rs_rbm.rgd->rd_reserved -= rs->rs_free;
5ea5050c
BP
639 /* The rgrp extent failure point is likely not to increase;
640 it will only do so if the freed blocks are somehow
641 contiguous with a span of free blocks that follows. Still,
642 it will force the number to be recalculated later. */
643 rgd->rd_extfail_pt += rs->rs_free;
8e2e0047 644 rs->rs_free = 0;
ec23df2b
AG
645 if (gfs2_rbm_from_block(&last_rbm, last_block))
646 return;
647 start = rbm_bi(&rs->rs_rbm);
648 last = rbm_bi(&last_rbm);
649 do
650 clear_bit(GBF_FULL, &start->bi_flags);
651 while (start++ != last);
8e2e0047 652 }
8e2e0047
BP
653}
654
655/**
656 * gfs2_rs_deltree - remove a multi-block reservation from the rgd tree
657 * @rs: The reservation to remove
658 *
659 */
20095218 660void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
8e2e0047
BP
661{
662 struct gfs2_rgrpd *rgd;
663
4a993fb1
SW
664 rgd = rs->rs_rbm.rgd;
665 if (rgd) {
666 spin_lock(&rgd->rd_rsspin);
20095218 667 __rs_deltree(rs);
44f52122 668 BUG_ON(rs->rs_free);
4a993fb1
SW
669 spin_unlock(&rgd->rd_rsspin);
670 }
8e2e0047
BP
671}
672
673/**
1595548f 674 * gfs2_rs_delete - delete a multi-block reservation
0a305e49 675 * @ip: The inode for this reservation
af5c2697 676 * @wcount: The inode's write count, or NULL
0a305e49
BP
677 *
678 */
1595548f 679void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount)
0a305e49
BP
680{
681 down_write(&ip->i_rw_mutex);
44f52122 682 if ((wcount == NULL) || (atomic_read(wcount) <= 1))
a097dc7e 683 gfs2_rs_deltree(&ip->i_res);
0a305e49
BP
684 up_write(&ip->i_rw_mutex);
685}
686
8e2e0047
BP
687/**
688 * return_all_reservations - return all reserved blocks back to the rgrp.
689 * @rgd: the rgrp that needs its space back
690 *
691 * We previously reserved a bunch of blocks for allocation. Now we need to
692 * give them back. This leave the reservation structures in tact, but removes
693 * all of their corresponding "no-fly zones".
694 */
695static void return_all_reservations(struct gfs2_rgrpd *rgd)
696{
697 struct rb_node *n;
698 struct gfs2_blkreserv *rs;
699
700 spin_lock(&rgd->rd_rsspin);
701 while ((n = rb_first(&rgd->rd_rstree))) {
702 rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
20095218 703 __rs_deltree(rs);
8e2e0047
BP
704 }
705 spin_unlock(&rgd->rd_rsspin);
706}
707
8339ee54 708void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
b3b94faa 709{
7c9ca621 710 struct rb_node *n;
b3b94faa
DT
711 struct gfs2_rgrpd *rgd;
712 struct gfs2_glock *gl;
713
7c9ca621
BP
714 while ((n = rb_first(&sdp->sd_rindex_tree))) {
715 rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
b3b94faa
DT
716 gl = rgd->rd_gl;
717
7c9ca621 718 rb_erase(n, &sdp->sd_rindex_tree);
b3b94faa
DT
719
720 if (gl) {
b3422cac
BP
721 if (gl->gl_state != LM_ST_UNLOCKED) {
722 gfs2_glock_cb(gl, LM_ST_UNLOCKED);
723 flush_delayed_work(&gl->gl_work);
724 }
10283ea5 725 gfs2_rgrp_brelse(rgd);
b3422cac 726 glock_clear_object(gl, rgd);
b3b94faa
DT
727 gfs2_glock_put(gl);
728 }
729
8339ee54 730 gfs2_free_clones(rgd);
d0f17d38 731 return_all_reservations(rgd);
b3b94faa 732 kfree(rgd->rd_bits);
36e4ad03 733 rgd->rd_bits = NULL;
6bdd9be6 734 kmem_cache_free(gfs2_rgrpd_cachep, rgd);
b3b94faa
DT
735 }
736}
737
b3b94faa
DT
738/**
739 * gfs2_compute_bitstructs - Compute the bitmap sizes
740 * @rgd: The resource group descriptor
741 *
742 * Calculates bitmap descriptors, one for each block that contains bitmap data
743 *
744 * Returns: errno
745 */
746
747static int compute_bitstructs(struct gfs2_rgrpd *rgd)
748{
749 struct gfs2_sbd *sdp = rgd->rd_sbd;
750 struct gfs2_bitmap *bi;
bb8d8a6f 751 u32 length = rgd->rd_length; /* # blocks in hdr & bitmap */
cd915493 752 u32 bytes_left, bytes;
b3b94faa
DT
753 int x;
754
feaa7bba
SW
755 if (!length)
756 return -EINVAL;
757
dd894be8 758 rgd->rd_bits = kcalloc(length, sizeof(struct gfs2_bitmap), GFP_NOFS);
b3b94faa
DT
759 if (!rgd->rd_bits)
760 return -ENOMEM;
761
bb8d8a6f 762 bytes_left = rgd->rd_bitbytes;
b3b94faa
DT
763
764 for (x = 0; x < length; x++) {
765 bi = rgd->rd_bits + x;
766
60a0b8f9 767 bi->bi_flags = 0;
b3b94faa
DT
768 /* small rgrp; bitmap stored completely in header block */
769 if (length == 1) {
770 bytes = bytes_left;
771 bi->bi_offset = sizeof(struct gfs2_rgrp);
772 bi->bi_start = 0;
281b4952 773 bi->bi_bytes = bytes;
7e230f57 774 bi->bi_blocks = bytes * GFS2_NBBY;
b3b94faa
DT
775 /* header block */
776 } else if (x == 0) {
777 bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_rgrp);
778 bi->bi_offset = sizeof(struct gfs2_rgrp);
779 bi->bi_start = 0;
281b4952 780 bi->bi_bytes = bytes;
7e230f57 781 bi->bi_blocks = bytes * GFS2_NBBY;
b3b94faa
DT
782 /* last block */
783 } else if (x + 1 == length) {
784 bytes = bytes_left;
785 bi->bi_offset = sizeof(struct gfs2_meta_header);
bb8d8a6f 786 bi->bi_start = rgd->rd_bitbytes - bytes_left;
281b4952 787 bi->bi_bytes = bytes;
7e230f57 788 bi->bi_blocks = bytes * GFS2_NBBY;
b3b94faa
DT
789 /* other blocks */
790 } else {
568f4c96
SW
791 bytes = sdp->sd_sb.sb_bsize -
792 sizeof(struct gfs2_meta_header);
b3b94faa 793 bi->bi_offset = sizeof(struct gfs2_meta_header);
bb8d8a6f 794 bi->bi_start = rgd->rd_bitbytes - bytes_left;
281b4952 795 bi->bi_bytes = bytes;
7e230f57 796 bi->bi_blocks = bytes * GFS2_NBBY;
b3b94faa
DT
797 }
798
799 bytes_left -= bytes;
800 }
801
802 if (bytes_left) {
803 gfs2_consist_rgrpd(rgd);
804 return -EIO;
805 }
806 bi = rgd->rd_bits + (length - 1);
281b4952 807 if ((bi->bi_start + bi->bi_bytes) * GFS2_NBBY != rgd->rd_data) {
8dc88ac6
AG
808 gfs2_lm(sdp,
809 "ri_addr = %llu\n"
810 "ri_length = %u\n"
811 "ri_data0 = %llu\n"
812 "ri_data = %u\n"
813 "ri_bitbytes = %u\n"
814 "start=%u len=%u offset=%u\n",
815 (unsigned long long)rgd->rd_addr,
816 rgd->rd_length,
817 (unsigned long long)rgd->rd_data0,
818 rgd->rd_data,
819 rgd->rd_bitbytes,
820 bi->bi_start, bi->bi_bytes, bi->bi_offset);
821 gfs2_consist_rgrpd(rgd);
b3b94faa
DT
822 return -EIO;
823 }
824
825 return 0;
826}
827
7ae8fa84
RP
828/**
829 * gfs2_ri_total - Total up the file system space, according to the rindex.
886b1416 830 * @sdp: the filesystem
7ae8fa84
RP
831 *
832 */
833u64 gfs2_ri_total(struct gfs2_sbd *sdp)
834{
835 u64 total_data = 0;
836 struct inode *inode = sdp->sd_rindex;
837 struct gfs2_inode *ip = GFS2_I(inode);
7ae8fa84 838 char buf[sizeof(struct gfs2_rindex)];
7ae8fa84
RP
839 int error, rgrps;
840
7ae8fa84
RP
841 for (rgrps = 0;; rgrps++) {
842 loff_t pos = rgrps * sizeof(struct gfs2_rindex);
843
bcd7278d 844 if (pos + sizeof(struct gfs2_rindex) > i_size_read(inode))
7ae8fa84 845 break;
4306629e 846 error = gfs2_internal_read(ip, buf, &pos,
7ae8fa84
RP
847 sizeof(struct gfs2_rindex));
848 if (error != sizeof(struct gfs2_rindex))
849 break;
bb8d8a6f 850 total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data);
7ae8fa84 851 }
7ae8fa84
RP
852 return total_data;
853}
854
6aad1c3d 855static int rgd_insert(struct gfs2_rgrpd *rgd)
7c9ca621
BP
856{
857 struct gfs2_sbd *sdp = rgd->rd_sbd;
858 struct rb_node **newn = &sdp->sd_rindex_tree.rb_node, *parent = NULL;
859
860 /* Figure out where to put new node */
861 while (*newn) {
862 struct gfs2_rgrpd *cur = rb_entry(*newn, struct gfs2_rgrpd,
863 rd_node);
864
865 parent = *newn;
866 if (rgd->rd_addr < cur->rd_addr)
867 newn = &((*newn)->rb_left);
868 else if (rgd->rd_addr > cur->rd_addr)
869 newn = &((*newn)->rb_right);
870 else
6aad1c3d 871 return -EEXIST;
7c9ca621
BP
872 }
873
874 rb_link_node(&rgd->rd_node, parent, newn);
875 rb_insert_color(&rgd->rd_node, &sdp->sd_rindex_tree);
6aad1c3d
BP
876 sdp->sd_rgrps++;
877 return 0;
7c9ca621
BP
878}
879
b3b94faa 880/**
6c53267f 881 * read_rindex_entry - Pull in a new resource index entry from the disk
4306629e 882 * @ip: Pointer to the rindex inode
b3b94faa 883 *
8339ee54 884 * Returns: 0 on success, > 0 on EOF, error code otherwise
6c53267f
RP
885 */
886
4306629e 887static int read_rindex_entry(struct gfs2_inode *ip)
6c53267f
RP
888{
889 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
890 loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex);
8339ee54 891 struct gfs2_rindex buf;
6c53267f
RP
892 int error;
893 struct gfs2_rgrpd *rgd;
894
8339ee54
SW
895 if (pos >= i_size_read(&ip->i_inode))
896 return 1;
897
4306629e 898 error = gfs2_internal_read(ip, (char *)&buf, &pos,
6c53267f 899 sizeof(struct gfs2_rindex));
8339ee54
SW
900
901 if (error != sizeof(struct gfs2_rindex))
902 return (error == 0) ? 1 : error;
6c53267f 903
6bdd9be6 904 rgd = kmem_cache_zalloc(gfs2_rgrpd_cachep, GFP_NOFS);
6c53267f
RP
905 error = -ENOMEM;
906 if (!rgd)
907 return error;
908
6c53267f 909 rgd->rd_sbd = sdp;
8339ee54
SW
910 rgd->rd_addr = be64_to_cpu(buf.ri_addr);
911 rgd->rd_length = be32_to_cpu(buf.ri_length);
912 rgd->rd_data0 = be64_to_cpu(buf.ri_data0);
913 rgd->rd_data = be32_to_cpu(buf.ri_data);
914 rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes);
8e2e0047 915 spin_lock_init(&rgd->rd_rsspin);
7c9ca621 916
6c53267f
RP
917 error = compute_bitstructs(rgd);
918 if (error)
8339ee54 919 goto fail;
6c53267f 920
bb8d8a6f 921 error = gfs2_glock_get(sdp, rgd->rd_addr,
6c53267f
RP
922 &gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
923 if (error)
8339ee54 924 goto fail;
6c53267f 925
4e2f8849 926 rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr;
0e27c18c 927 rgd->rd_flags &= ~(GFS2_RDF_UPTODATE | GFS2_RDF_PREFERRED);
7c9ca621
BP
928 if (rgd->rd_data > sdp->sd_max_rg_data)
929 sdp->sd_max_rg_data = rgd->rd_data;
8339ee54 930 spin_lock(&sdp->sd_rindex_spin);
6aad1c3d 931 error = rgd_insert(rgd);
8339ee54 932 spin_unlock(&sdp->sd_rindex_spin);
36e4ad03 933 if (!error) {
6f6597ba 934 glock_set_object(rgd->rd_gl, rgd);
6aad1c3d 935 return 0;
36e4ad03 936 }
6aad1c3d
BP
937
938 error = 0; /* someone else read in the rgrp; free it and ignore it */
c1ac539e 939 gfs2_glock_put(rgd->rd_gl);
8339ee54
SW
940
941fail:
942 kfree(rgd->rd_bits);
36e4ad03 943 rgd->rd_bits = NULL;
8339ee54 944 kmem_cache_free(gfs2_rgrpd_cachep, rgd);
6c53267f
RP
945 return error;
946}
947
0e27c18c
BP
948/**
949 * set_rgrp_preferences - Run all the rgrps, selecting some we prefer to use
950 * @sdp: the GFS2 superblock
951 *
952 * The purpose of this function is to select a subset of the resource groups
953 * and mark them as PREFERRED. We do it in such a way that each node prefers
954 * to use a unique set of rgrps to minimize glock contention.
955 */
956static void set_rgrp_preferences(struct gfs2_sbd *sdp)
957{
958 struct gfs2_rgrpd *rgd, *first;
959 int i;
960
961 /* Skip an initial number of rgrps, based on this node's journal ID.
962 That should start each node out on its own set. */
963 rgd = gfs2_rgrpd_get_first(sdp);
964 for (i = 0; i < sdp->sd_lockstruct.ls_jid; i++)
965 rgd = gfs2_rgrpd_get_next(rgd);
966 first = rgd;
967
968 do {
969 rgd->rd_flags |= GFS2_RDF_PREFERRED;
970 for (i = 0; i < sdp->sd_journals; i++) {
971 rgd = gfs2_rgrpd_get_next(rgd);
959b6717 972 if (!rgd || rgd == first)
0e27c18c
BP
973 break;
974 }
959b6717 975 } while (rgd && rgd != first);
0e27c18c
BP
976}
977
6c53267f
RP
978/**
979 * gfs2_ri_update - Pull in a new resource index from the disk
980 * @ip: pointer to the rindex inode
981 *
b3b94faa
DT
982 * Returns: 0 on successful update, error code otherwise
983 */
984
8339ee54 985static int gfs2_ri_update(struct gfs2_inode *ip)
b3b94faa 986{
feaa7bba 987 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
b3b94faa
DT
988 int error;
989
8339ee54 990 do {
4306629e 991 error = read_rindex_entry(ip);
8339ee54
SW
992 } while (error == 0);
993
994 if (error < 0)
995 return error;
b3b94faa 996
77872151
BP
997 if (RB_EMPTY_ROOT(&sdp->sd_rindex_tree)) {
998 fs_err(sdp, "no resource groups found in the file system.\n");
999 return -ENOENT;
1000 }
0e27c18c
BP
1001 set_rgrp_preferences(sdp);
1002
cf45b752 1003 sdp->sd_rindex_uptodate = 1;
6c53267f
RP
1004 return 0;
1005}
b3b94faa 1006
b3b94faa 1007/**
8339ee54 1008 * gfs2_rindex_update - Update the rindex if required
b3b94faa 1009 * @sdp: The GFS2 superblock
b3b94faa
DT
1010 *
1011 * We grab a lock on the rindex inode to make sure that it doesn't
1012 * change whilst we are performing an operation. We keep this lock
1013 * for quite long periods of time compared to other locks. This
1014 * doesn't matter, since it is shared and it is very, very rarely
1015 * accessed in the exclusive mode (i.e. only when expanding the filesystem).
1016 *
1017 * This makes sure that we're using the latest copy of the resource index
1018 * special file, which might have been updated if someone expanded the
1019 * filesystem (via gfs2_grow utility), which adds new resource groups.
1020 *
8339ee54 1021 * Returns: 0 on succeess, error code otherwise
b3b94faa
DT
1022 */
1023
8339ee54 1024int gfs2_rindex_update(struct gfs2_sbd *sdp)
b3b94faa 1025{
feaa7bba 1026 struct gfs2_inode *ip = GFS2_I(sdp->sd_rindex);
b3b94faa 1027 struct gfs2_glock *gl = ip->i_gl;
8339ee54
SW
1028 struct gfs2_holder ri_gh;
1029 int error = 0;
a365fbf3 1030 int unlock_required = 0;
b3b94faa
DT
1031
1032 /* Read new copy from disk if we don't have the latest */
cf45b752 1033 if (!sdp->sd_rindex_uptodate) {
a365fbf3
SW
1034 if (!gfs2_glock_is_locked_by_me(gl)) {
1035 error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh);
1036 if (error)
6aad1c3d 1037 return error;
a365fbf3
SW
1038 unlock_required = 1;
1039 }
8339ee54 1040 if (!sdp->sd_rindex_uptodate)
b3b94faa 1041 error = gfs2_ri_update(ip);
a365fbf3
SW
1042 if (unlock_required)
1043 gfs2_glock_dq_uninit(&ri_gh);
b3b94faa
DT
1044 }
1045
1046 return error;
1047}
1048
42d52e38 1049static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf)
bb8d8a6f
SW
1050{
1051 const struct gfs2_rgrp *str = buf;
42d52e38 1052 u32 rg_flags;
bb8d8a6f 1053
42d52e38 1054 rg_flags = be32_to_cpu(str->rg_flags);
09010978 1055 rg_flags &= ~GFS2_RDF_MASK;
1ce97e56
SW
1056 rgd->rd_flags &= GFS2_RDF_MASK;
1057 rgd->rd_flags |= rg_flags;
cfc8b549 1058 rgd->rd_free = be32_to_cpu(str->rg_free);
73f74948 1059 rgd->rd_dinodes = be32_to_cpu(str->rg_dinodes);
d8b71f73 1060 rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration);
166725d9 1061 /* rd_data0, rd_data and rd_bitbytes already set from rindex */
bb8d8a6f
SW
1062}
1063
3f30f929
BP
1064static void gfs2_rgrp_ondisk2lvb(struct gfs2_rgrp_lvb *rgl, const void *buf)
1065{
1066 const struct gfs2_rgrp *str = buf;
1067
1068 rgl->rl_magic = cpu_to_be32(GFS2_MAGIC);
1069 rgl->rl_flags = str->rg_flags;
1070 rgl->rl_free = str->rg_free;
1071 rgl->rl_dinodes = str->rg_dinodes;
1072 rgl->rl_igeneration = str->rg_igeneration;
1073 rgl->__pad = 0UL;
1074}
1075
42d52e38 1076static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
bb8d8a6f 1077{
65adc273 1078 struct gfs2_rgrpd *next = gfs2_rgrpd_get_next(rgd);
bb8d8a6f 1079 struct gfs2_rgrp *str = buf;
850d2d91 1080 u32 crc;
bb8d8a6f 1081
09010978 1082 str->rg_flags = cpu_to_be32(rgd->rd_flags & ~GFS2_RDF_MASK);
cfc8b549 1083 str->rg_free = cpu_to_be32(rgd->rd_free);
73f74948 1084 str->rg_dinodes = cpu_to_be32(rgd->rd_dinodes);
65adc273
AP
1085 if (next == NULL)
1086 str->rg_skip = 0;
1087 else if (next->rd_addr > rgd->rd_addr)
1088 str->rg_skip = cpu_to_be32(next->rd_addr - rgd->rd_addr);
d8b71f73 1089 str->rg_igeneration = cpu_to_be64(rgd->rd_igeneration);
166725d9
AP
1090 str->rg_data0 = cpu_to_be64(rgd->rd_data0);
1091 str->rg_data = cpu_to_be32(rgd->rd_data);
1092 str->rg_bitbytes = cpu_to_be32(rgd->rd_bitbytes);
850d2d91
AP
1093 str->rg_crc = 0;
1094 crc = gfs2_disk_hash(buf, sizeof(struct gfs2_rgrp));
1095 str->rg_crc = cpu_to_be32(crc);
166725d9 1096
bb8d8a6f 1097 memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
3f30f929 1098 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, buf);
bb8d8a6f
SW
1099}
1100
90306c41
BM
1101static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd)
1102{
1103 struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
1104 struct gfs2_rgrp *str = (struct gfs2_rgrp *)rgd->rd_bits[0].bi_bh->b_data;
f29e62ee 1105 struct gfs2_sbd *sdp = rgd->rd_sbd;
72244b6b 1106 int valid = 1;
90306c41 1107
72244b6b 1108 if (rgl->rl_flags != str->rg_flags) {
f29e62ee
BP
1109 fs_warn(sdp, "GFS2: rgd: %llu lvb flag mismatch %u/%u",
1110 (unsigned long long)rgd->rd_addr,
72244b6b
BP
1111 be32_to_cpu(rgl->rl_flags), be32_to_cpu(str->rg_flags));
1112 valid = 0;
1113 }
1114 if (rgl->rl_free != str->rg_free) {
f29e62ee
BP
1115 fs_warn(sdp, "GFS2: rgd: %llu lvb free mismatch %u/%u",
1116 (unsigned long long)rgd->rd_addr,
1117 be32_to_cpu(rgl->rl_free), be32_to_cpu(str->rg_free));
72244b6b
BP
1118 valid = 0;
1119 }
1120 if (rgl->rl_dinodes != str->rg_dinodes) {
f29e62ee
BP
1121 fs_warn(sdp, "GFS2: rgd: %llu lvb dinode mismatch %u/%u",
1122 (unsigned long long)rgd->rd_addr,
1123 be32_to_cpu(rgl->rl_dinodes),
1124 be32_to_cpu(str->rg_dinodes));
72244b6b
BP
1125 valid = 0;
1126 }
1127 if (rgl->rl_igeneration != str->rg_igeneration) {
f29e62ee
BP
1128 fs_warn(sdp, "GFS2: rgd: %llu lvb igen mismatch %llu/%llu",
1129 (unsigned long long)rgd->rd_addr,
1130 (unsigned long long)be64_to_cpu(rgl->rl_igeneration),
1131 (unsigned long long)be64_to_cpu(str->rg_igeneration));
72244b6b
BP
1132 valid = 0;
1133 }
1134 return valid;
90306c41
BM
1135}
1136
90306c41
BM
1137static u32 count_unlinked(struct gfs2_rgrpd *rgd)
1138{
1139 struct gfs2_bitmap *bi;
1140 const u32 length = rgd->rd_length;
1141 const u8 *buffer = NULL;
1142 u32 i, goal, count = 0;
1143
1144 for (i = 0, bi = rgd->rd_bits; i < length; i++, bi++) {
1145 goal = 0;
1146 buffer = bi->bi_bh->b_data + bi->bi_offset;
1147 WARN_ON(!buffer_uptodate(bi->bi_bh));
281b4952
AG
1148 while (goal < bi->bi_blocks) {
1149 goal = gfs2_bitfit(buffer, bi->bi_bytes, goal,
90306c41
BM
1150 GFS2_BLKST_UNLINKED);
1151 if (goal == BFITNOENT)
1152 break;
1153 count++;
1154 goal++;
1155 }
1156 }
1157
1158 return count;
1159}
1160
1161
b3b94faa 1162/**
90306c41
BM
1163 * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
1164 * @rgd: the struct gfs2_rgrpd describing the RG to read in
b3b94faa
DT
1165 *
1166 * Read in all of a Resource Group's header and bitmap blocks.
10283ea5 1167 * Caller must eventually call gfs2_rgrp_brelse() to free the bitmaps.
b3b94faa
DT
1168 *
1169 * Returns: errno
1170 */
1171
c2b0b30e 1172static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
b3b94faa
DT
1173{
1174 struct gfs2_sbd *sdp = rgd->rd_sbd;
1175 struct gfs2_glock *gl = rgd->rd_gl;
bb8d8a6f 1176 unsigned int length = rgd->rd_length;
b3b94faa
DT
1177 struct gfs2_bitmap *bi;
1178 unsigned int x, y;
1179 int error;
1180
90306c41
BM
1181 if (rgd->rd_bits[0].bi_bh != NULL)
1182 return 0;
1183
b3b94faa
DT
1184 for (x = 0; x < length; x++) {
1185 bi = rgd->rd_bits + x;
c8d57703 1186 error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, 0, &bi->bi_bh);
b3b94faa
DT
1187 if (error)
1188 goto fail;
1189 }
1190
1191 for (y = length; y--;) {
1192 bi = rgd->rd_bits + y;
7276b3b0 1193 error = gfs2_meta_wait(sdp, bi->bi_bh);
b3b94faa
DT
1194 if (error)
1195 goto fail;
feaa7bba 1196 if (gfs2_metatype_check(sdp, bi->bi_bh, y ? GFS2_METATYPE_RB :
b3b94faa
DT
1197 GFS2_METATYPE_RG)) {
1198 error = -EIO;
1199 goto fail;
1200 }
1201 }
1202
cf45b752 1203 if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) {
60a0b8f9
SW
1204 for (x = 0; x < length; x++)
1205 clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags);
42d52e38 1206 gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data);
1ce97e56 1207 rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
7c9ca621 1208 rgd->rd_free_clone = rgd->rd_free;
5ea5050c
BP
1209 /* max out the rgrp allocation failure point */
1210 rgd->rd_extfail_pt = rgd->rd_free;
b3b94faa 1211 }
951b4bd5 1212 if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) {
90306c41
BM
1213 rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd));
1214 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl,
1215 rgd->rd_bits[0].bi_bh->b_data);
1216 }
1217 else if (sdp->sd_args.ar_rgrplvb) {
1218 if (!gfs2_rgrp_lvb_valid(rgd)){
1219 gfs2_consist_rgrpd(rgd);
1220 error = -EIO;
1221 goto fail;
1222 }
1223 if (rgd->rd_rgl->rl_unlinked == 0)
1224 rgd->rd_flags &= ~GFS2_RDF_CHECK;
1225 }
b3b94faa
DT
1226 return 0;
1227
feaa7bba 1228fail:
b3b94faa
DT
1229 while (x--) {
1230 bi = rgd->rd_bits + x;
1231 brelse(bi->bi_bh);
1232 bi->bi_bh = NULL;
1233 gfs2_assert_warn(sdp, !bi->bi_clone);
1234 }
b3b94faa
DT
1235
1236 return error;
1237}
1238
c2b0b30e 1239static int update_rgrp_lvb(struct gfs2_rgrpd *rgd)
90306c41
BM
1240{
1241 u32 rl_flags;
1242
1243 if (rgd->rd_flags & GFS2_RDF_UPTODATE)
1244 return 0;
1245
951b4bd5 1246 if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic)
90306c41
BM
1247 return gfs2_rgrp_bh_get(rgd);
1248
1249 rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags);
1250 rl_flags &= ~GFS2_RDF_MASK;
1251 rgd->rd_flags &= GFS2_RDF_MASK;
4f36cb36 1252 rgd->rd_flags |= (rl_flags | GFS2_RDF_CHECK);
90306c41
BM
1253 if (rgd->rd_rgl->rl_unlinked == 0)
1254 rgd->rd_flags &= ~GFS2_RDF_CHECK;
1255 rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free);
1256 rgd->rd_free_clone = rgd->rd_free;
1257 rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes);
1258 rgd->rd_igeneration = be64_to_cpu(rgd->rd_rgl->rl_igeneration);
1259 return 0;
1260}
1261
1262int gfs2_rgrp_go_lock(struct gfs2_holder *gh)
1263{
1264 struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
1265 struct gfs2_sbd *sdp = rgd->rd_sbd;
1266
1267 if (gh->gh_flags & GL_SKIP && sdp->sd_args.ar_rgrplvb)
1268 return 0;
8b127d04 1269 return gfs2_rgrp_bh_get(rgd);
90306c41
BM
1270}
1271
b3b94faa 1272/**
39b0f1e9
BP
1273 * gfs2_rgrp_brelse - Release RG bitmaps read in with gfs2_rgrp_bh_get()
1274 * @rgd: The resource group
b3b94faa
DT
1275 *
1276 */
1277
39b0f1e9 1278void gfs2_rgrp_brelse(struct gfs2_rgrpd *rgd)
b3b94faa 1279{
bb8d8a6f 1280 int x, length = rgd->rd_length;
b3b94faa 1281
b3b94faa
DT
1282 for (x = 0; x < length; x++) {
1283 struct gfs2_bitmap *bi = rgd->rd_bits + x;
90306c41
BM
1284 if (bi->bi_bh) {
1285 brelse(bi->bi_bh);
1286 bi->bi_bh = NULL;
1287 }
b3b94faa 1288 }
39b0f1e9
BP
1289}
1290
66fc061b 1291int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
7c9ca621 1292 struct buffer_head *bh,
66fc061b 1293 const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed)
f15ab561
SW
1294{
1295 struct super_block *sb = sdp->sd_vfs;
f15ab561 1296 u64 blk;
64d576ba 1297 sector_t start = 0;
b2c87cae 1298 sector_t nr_blks = 0;
f15ab561
SW
1299 int rv;
1300 unsigned int x;
66fc061b
SW
1301 u32 trimmed = 0;
1302 u8 diff;
f15ab561 1303
281b4952 1304 for (x = 0; x < bi->bi_bytes; x++) {
66fc061b
SW
1305 const u8 *clone = bi->bi_clone ? bi->bi_clone : bi->bi_bh->b_data;
1306 clone += bi->bi_offset;
1307 clone += x;
1308 if (bh) {
1309 const u8 *orig = bh->b_data + bi->bi_offset + x;
1310 diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1));
1311 } else {
1312 diff = ~(*clone | (*clone >> 1));
1313 }
f15ab561
SW
1314 diff &= 0x55;
1315 if (diff == 0)
1316 continue;
1317 blk = offset + ((bi->bi_start + x) * GFS2_NBBY);
f15ab561
SW
1318 while(diff) {
1319 if (diff & 1) {
b2c87cae 1320 if (nr_blks == 0)
f15ab561 1321 goto start_new_extent;
b2c87cae
BP
1322 if ((start + nr_blks) != blk) {
1323 if (nr_blks >= minlen) {
1324 rv = sb_issue_discard(sb,
1325 start, nr_blks,
66fc061b
SW
1326 GFP_NOFS, 0);
1327 if (rv)
1328 goto fail;
b2c87cae 1329 trimmed += nr_blks;
66fc061b 1330 }
b2c87cae 1331 nr_blks = 0;
f15ab561
SW
1332start_new_extent:
1333 start = blk;
1334 }
b2c87cae 1335 nr_blks++;
f15ab561
SW
1336 }
1337 diff >>= 2;
b2c87cae 1338 blk++;
f15ab561
SW
1339 }
1340 }
b2c87cae
BP
1341 if (nr_blks >= minlen) {
1342 rv = sb_issue_discard(sb, start, nr_blks, GFP_NOFS, 0);
f15ab561
SW
1343 if (rv)
1344 goto fail;
b2c87cae 1345 trimmed += nr_blks;
f15ab561 1346 }
66fc061b
SW
1347 if (ptrimmed)
1348 *ptrimmed = trimmed;
1349 return 0;
1350
f15ab561 1351fail:
66fc061b 1352 if (sdp->sd_args.ar_discard)
af38816e 1353 fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem\n", rv);
f15ab561 1354 sdp->sd_args.ar_discard = 0;
66fc061b
SW
1355 return -EIO;
1356}
1357
1358/**
1359 * gfs2_fitrim - Generate discard requests for unused bits of the filesystem
1360 * @filp: Any file on the filesystem
1361 * @argp: Pointer to the arguments (also used to pass result)
1362 *
1363 * Returns: 0 on success, otherwise error code
1364 */
1365
1366int gfs2_fitrim(struct file *filp, void __user *argp)
1367{
496ad9aa 1368 struct inode *inode = file_inode(filp);
66fc061b
SW
1369 struct gfs2_sbd *sdp = GFS2_SB(inode);
1370 struct request_queue *q = bdev_get_queue(sdp->sd_vfs->s_bdev);
1371 struct buffer_head *bh;
1372 struct gfs2_rgrpd *rgd;
1373 struct gfs2_rgrpd *rgd_end;
1374 struct gfs2_holder gh;
1375 struct fstrim_range r;
1376 int ret = 0;
1377 u64 amt;
1378 u64 trimmed = 0;
076f0faa 1379 u64 start, end, minlen;
66fc061b 1380 unsigned int x;
076f0faa 1381 unsigned bs_shift = sdp->sd_sb.sb_bsize_shift;
66fc061b
SW
1382
1383 if (!capable(CAP_SYS_ADMIN))
1384 return -EPERM;
1385
c5c68724
BP
1386 if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
1387 return -EROFS;
1388
66fc061b
SW
1389 if (!blk_queue_discard(q))
1390 return -EOPNOTSUPP;
1391
3a238ade 1392 if (copy_from_user(&r, argp, sizeof(r)))
66fc061b
SW
1393 return -EFAULT;
1394
5e2f7d61
BP
1395 ret = gfs2_rindex_update(sdp);
1396 if (ret)
1397 return ret;
1398
076f0faa
LC
1399 start = r.start >> bs_shift;
1400 end = start + (r.len >> bs_shift);
1401 minlen = max_t(u64, r.minlen,
1402 q->limits.discard_granularity) >> bs_shift;
1403
6a98c333
AD
1404 if (end <= start || minlen > sdp->sd_max_rg_data)
1405 return -EINVAL;
1406
076f0faa 1407 rgd = gfs2_blk2rgrpd(sdp, start, 0);
6a98c333 1408 rgd_end = gfs2_blk2rgrpd(sdp, end, 0);
076f0faa 1409
6a98c333
AD
1410 if ((gfs2_rgrpd_get_first(sdp) == gfs2_rgrpd_get_next(rgd_end))
1411 && (start > rgd_end->rd_data0 + rgd_end->rd_data))
1412 return -EINVAL; /* start is beyond the end of the fs */
66fc061b
SW
1413
1414 while (1) {
1415
1416 ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh);
1417 if (ret)
1418 goto out;
1419
1420 if (!(rgd->rd_flags & GFS2_RGF_TRIMMED)) {
1421 /* Trim each bitmap in the rgrp */
1422 for (x = 0; x < rgd->rd_length; x++) {
1423 struct gfs2_bitmap *bi = rgd->rd_bits + x;
076f0faa
LC
1424 ret = gfs2_rgrp_send_discards(sdp,
1425 rgd->rd_data0, NULL, bi, minlen,
1426 &amt);
66fc061b
SW
1427 if (ret) {
1428 gfs2_glock_dq_uninit(&gh);
1429 goto out;
1430 }
1431 trimmed += amt;
1432 }
1433
1434 /* Mark rgrp as having been trimmed */
1435 ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0);
1436 if (ret == 0) {
1437 bh = rgd->rd_bits[0].bi_bh;
1438 rgd->rd_flags |= GFS2_RGF_TRIMMED;
350a9b0a 1439 gfs2_trans_add_meta(rgd->rd_gl, bh);
66fc061b
SW
1440 gfs2_rgrp_out(rgd, bh->b_data);
1441 gfs2_trans_end(sdp);
1442 }
1443 }
1444 gfs2_glock_dq_uninit(&gh);
1445
1446 if (rgd == rgd_end)
1447 break;
1448
1449 rgd = gfs2_rgrpd_get_next(rgd);
1450 }
1451
1452out:
6a98c333 1453 r.len = trimmed << bs_shift;
3a238ade 1454 if (copy_to_user(argp, &r, sizeof(r)))
66fc061b
SW
1455 return -EFAULT;
1456
1457 return ret;
f15ab561
SW
1458}
1459
8e2e0047
BP
1460/**
1461 * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree
8e2e0047 1462 * @ip: the inode structure
8e2e0047 1463 *
8e2e0047 1464 */
ff7f4cb4 1465static void rs_insert(struct gfs2_inode *ip)
8e2e0047
BP
1466{
1467 struct rb_node **newn, *parent = NULL;
1468 int rc;
a097dc7e 1469 struct gfs2_blkreserv *rs = &ip->i_res;
4a993fb1 1470 struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd;
ff7f4cb4 1471 u64 fsblock = gfs2_rbm_to_block(&rs->rs_rbm);
8e2e0047 1472
8e2e0047 1473 BUG_ON(gfs2_rs_active(rs));
c743ffd0 1474
ff7f4cb4
SW
1475 spin_lock(&rgd->rd_rsspin);
1476 newn = &rgd->rd_rstree.rb_node;
8e2e0047
BP
1477 while (*newn) {
1478 struct gfs2_blkreserv *cur =
1479 rb_entry(*newn, struct gfs2_blkreserv, rs_node);
1480
1481 parent = *newn;
ff7f4cb4 1482 rc = rs_cmp(fsblock, rs->rs_free, cur);
8e2e0047
BP
1483 if (rc > 0)
1484 newn = &((*newn)->rb_right);
1485 else if (rc < 0)
1486 newn = &((*newn)->rb_left);
1487 else {
1488 spin_unlock(&rgd->rd_rsspin);
ff7f4cb4
SW
1489 WARN_ON(1);
1490 return;
8e2e0047
BP
1491 }
1492 }
1493
8e2e0047
BP
1494 rb_link_node(&rs->rs_node, parent, newn);
1495 rb_insert_color(&rs->rs_node, &rgd->rd_rstree);
1496
8e2e0047 1497 /* Do our rgrp accounting for the reservation */
ff7f4cb4 1498 rgd->rd_reserved += rs->rs_free; /* blocks reserved */
8e2e0047 1499 spin_unlock(&rgd->rd_rsspin);
9e733d39 1500 trace_gfs2_rs(rs, TRACE_RS_INSERT);
8e2e0047
BP
1501}
1502
f6753df3
BP
1503/**
1504 * rgd_free - return the number of free blocks we can allocate.
1505 * @rgd: the resource group
1506 *
1507 * This function returns the number of free blocks for an rgrp.
1508 * That's the clone-free blocks (blocks that are free, not including those
1509 * still being used for unlinked files that haven't been deleted.)
1510 *
1511 * It also subtracts any blocks reserved by someone else, but does not
1512 * include free blocks that are still part of our current reservation,
1513 * because obviously we can (and will) allocate them.
1514 */
1515static inline u32 rgd_free(struct gfs2_rgrpd *rgd, struct gfs2_blkreserv *rs)
1516{
1517 u32 tot_reserved, tot_free;
1518
1519 if (WARN_ON_ONCE(rgd->rd_reserved < rs->rs_free))
1520 return 0;
1521 tot_reserved = rgd->rd_reserved - rs->rs_free;
1522
1523 if (rgd->rd_free_clone < tot_reserved)
1524 tot_reserved = 0;
1525
1526 tot_free = rgd->rd_free_clone - tot_reserved;
1527
1528 return tot_free;
1529}
1530
8e2e0047 1531/**
ff7f4cb4 1532 * rg_mblk_search - find a group of multiple free blocks to form a reservation
8e2e0047 1533 * @rgd: the resource group descriptor
8e2e0047 1534 * @ip: pointer to the inode for which we're reserving blocks
7b9cff46 1535 * @ap: the allocation parameters
8e2e0047 1536 *
8e2e0047
BP
1537 */
1538
ff7f4cb4 1539static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
7b9cff46 1540 const struct gfs2_alloc_parms *ap)
8e2e0047 1541{
ff7f4cb4
SW
1542 struct gfs2_rbm rbm = { .rgd = rgd, };
1543 u64 goal;
a097dc7e 1544 struct gfs2_blkreserv *rs = &ip->i_res;
ff7f4cb4 1545 u32 extlen;
f6753df3 1546 u32 free_blocks = rgd_free(rgd, rs);
ff7f4cb4 1547 int ret;
af21ca8e 1548 struct inode *inode = &ip->i_inode;
8e2e0047 1549
af21ca8e
BP
1550 if (S_ISDIR(inode->i_mode))
1551 extlen = 1;
1552 else {
21f09c43 1553 extlen = max_t(u32, atomic_read(&ip->i_sizehint), ap->target);
ad899458 1554 extlen = clamp(extlen, (u32)RGRP_RSRV_MINBLKS, free_blocks);
af21ca8e 1555 }
ff7f4cb4 1556 if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen))
c743ffd0
SW
1557 return;
1558
8e2e0047
BP
1559 /* Find bitmap block that contains bits for goal block */
1560 if (rgrp_contains_block(rgd, ip->i_goal))
ff7f4cb4 1561 goal = ip->i_goal;
8e2e0047 1562 else
ff7f4cb4 1563 goal = rgd->rd_last_alloc + rgd->rd_data0;
8e2e0047 1564
ff7f4cb4
SW
1565 if (WARN_ON(gfs2_rbm_from_block(&rbm, goal)))
1566 return;
8e2e0047 1567
8381e602 1568 ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &extlen, ip, true);
ff7f4cb4
SW
1569 if (ret == 0) {
1570 rs->rs_rbm = rbm;
1571 rs->rs_free = extlen;
ff7f4cb4 1572 rs_insert(ip);
13d2eb01
BP
1573 } else {
1574 if (goal == rgd->rd_last_alloc + rgd->rd_data0)
1575 rgd->rd_last_alloc = 0;
8e2e0047 1576 }
b3e47ca0
BP
1577}
1578
5b924ae2
SW
1579/**
1580 * gfs2_next_unreserved_block - Return next block that is not reserved
1581 * @rgd: The resource group
1582 * @block: The starting block
ff7f4cb4 1583 * @length: The required length
5b924ae2
SW
1584 * @ip: Ignore any reservations for this inode
1585 *
1586 * If the block does not appear in any reservation, then return the
1587 * block number unchanged. If it does appear in the reservation, then
1588 * keep looking through the tree of reservations in order to find the
1589 * first block number which is not reserved.
1590 */
1591
1592static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
ff7f4cb4 1593 u32 length,
5b924ae2
SW
1594 const struct gfs2_inode *ip)
1595{
1596 struct gfs2_blkreserv *rs;
1597 struct rb_node *n;
1598 int rc;
1599
1600 spin_lock(&rgd->rd_rsspin);
ff7f4cb4 1601 n = rgd->rd_rstree.rb_node;
5b924ae2
SW
1602 while (n) {
1603 rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
ff7f4cb4 1604 rc = rs_cmp(block, length, rs);
5b924ae2
SW
1605 if (rc < 0)
1606 n = n->rb_left;
1607 else if (rc > 0)
1608 n = n->rb_right;
1609 else
1610 break;
1611 }
1612
1613 if (n) {
a097dc7e 1614 while ((rs_cmp(block, length, rs) == 0) && (&ip->i_res != rs)) {
5b924ae2 1615 block = gfs2_rbm_to_block(&rs->rs_rbm) + rs->rs_free;
ff7f4cb4 1616 n = n->rb_right;
5b924ae2
SW
1617 if (n == NULL)
1618 break;
1619 rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
1620 }
1621 }
1622
1623 spin_unlock(&rgd->rd_rsspin);
1624 return block;
1625}
1626
5b924ae2
SW
1627/**
1628 * gfs2_reservation_check_and_update - Check for reservations during block alloc
1629 * @rbm: The current position in the resource group
ff7f4cb4
SW
1630 * @ip: The inode for which we are searching for blocks
1631 * @minext: The minimum extent length
5ce13431 1632 * @maxext: A pointer to the maximum extent structure
5b924ae2
SW
1633 *
1634 * This checks the current position in the rgrp to see whether there is
1635 * a reservation covering this block. If not then this function is a
1636 * no-op. If there is, then the position is moved to the end of the
1637 * contiguous reservation(s) so that we are pointing at the first
1638 * non-reserved block.
1639 *
1640 * Returns: 0 if no reservation, 1 if @rbm has changed, otherwise an error
1641 */
1642
1643static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm,
ff7f4cb4 1644 const struct gfs2_inode *ip,
5ce13431
BP
1645 u32 minext,
1646 struct gfs2_extent *maxext)
5b924ae2
SW
1647{
1648 u64 block = gfs2_rbm_to_block(rbm);
ff7f4cb4 1649 u32 extlen = 1;
5b924ae2 1650 u64 nblock;
5b924ae2 1651
ff7f4cb4
SW
1652 /*
1653 * If we have a minimum extent length, then skip over any extent
1654 * which is less than the min extent length in size.
1655 */
1656 if (minext) {
1657 extlen = gfs2_free_extlen(rbm, minext);
5ce13431 1658 if (extlen <= maxext->len)
ff7f4cb4
SW
1659 goto fail;
1660 }
1661
1662 /*
1663 * Check the extent which has been found against the reservations
1664 * and skip if parts of it are already reserved
1665 */
1666 nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip);
5ce13431
BP
1667 if (nblock == block) {
1668 if (!minext || extlen >= minext)
1669 return 0;
1670
1671 if (extlen > maxext->len) {
1672 maxext->len = extlen;
1673 maxext->rbm = *rbm;
1674 }
0eacdd16
AG
1675 } else {
1676 u64 len = nblock - block;
1677 if (len >= (u64)1 << 32)
1678 return -E2BIG;
1679 extlen = len;
5ce13431 1680 }
0eacdd16
AG
1681fail:
1682 if (gfs2_rbm_add(rbm, extlen))
1683 return -E2BIG;
5b924ae2
SW
1684 return 1;
1685}
1686
1687/**
1688 * gfs2_rbm_find - Look for blocks of a particular state
1689 * @rbm: Value/result starting position and final position
1690 * @state: The state which we want to find
5ce13431
BP
1691 * @minext: Pointer to the requested extent length (NULL for a single block)
1692 * This is updated to be the actual reservation size.
5b924ae2
SW
1693 * @ip: If set, check for reservations
1694 * @nowrap: Stop looking at the end of the rgrp, rather than wrapping
1695 * around until we've reached the starting point.
1696 *
1697 * Side effects:
1698 * - If looking for free blocks, we set GBF_FULL on each bitmap which
1699 * has no free blocks in it.
5ea5050c
BP
1700 * - If looking for free blocks, we set rd_extfail_pt on each rgrp which
1701 * has come up short on a free block search.
5b924ae2
SW
1702 *
1703 * Returns: 0 on success, -ENOSPC if there is no block of the requested state
1704 */
1705
5ce13431 1706static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
8381e602 1707 const struct gfs2_inode *ip, bool nowrap)
5b924ae2 1708{
71921ef8 1709 bool scan_from_start = rbm->bii == 0 && rbm->offset == 0;
5b924ae2 1710 struct buffer_head *bh;
71921ef8 1711 int last_bii;
5b924ae2
SW
1712 u32 offset;
1713 u8 *buffer;
71921ef8 1714 bool wrapped = false;
5b924ae2 1715 int ret;
e579ed4f 1716 struct gfs2_bitmap *bi;
5ce13431 1717 struct gfs2_extent maxext = { .rbm.rgd = rbm->rgd, };
5b924ae2 1718
71921ef8
AG
1719 /*
1720 * Determine the last bitmap to search. If we're not starting at the
1721 * beginning of a bitmap, we need to search that bitmap twice to scan
1722 * the entire resource group.
5b924ae2 1723 */
71921ef8 1724 last_bii = rbm->bii - (rbm->offset == 0);
5b924ae2
SW
1725
1726 while(1) {
e579ed4f 1727 bi = rbm_bi(rbm);
e79e0e14
BP
1728 if ((ip == NULL || !gfs2_rs_active(&ip->i_res)) &&
1729 test_bit(GBF_FULL, &bi->bi_flags) &&
5b924ae2
SW
1730 (state == GFS2_BLKST_FREE))
1731 goto next_bitmap;
1732
e579ed4f
BP
1733 bh = bi->bi_bh;
1734 buffer = bh->b_data + bi->bi_offset;
5b924ae2 1735 WARN_ON(!buffer_uptodate(bh));
e579ed4f
BP
1736 if (state != GFS2_BLKST_UNLINKED && bi->bi_clone)
1737 buffer = bi->bi_clone + bi->bi_offset;
281b4952 1738 offset = gfs2_bitfit(buffer, bi->bi_bytes, rbm->offset, state);
71921ef8
AG
1739 if (offset == BFITNOENT) {
1740 if (state == GFS2_BLKST_FREE && rbm->offset == 0)
1741 set_bit(GBF_FULL, &bi->bi_flags);
1742 goto next_bitmap;
1743 }
5b924ae2
SW
1744 rbm->offset = offset;
1745 if (ip == NULL)
1746 return 0;
1747
5ce13431
BP
1748 ret = gfs2_reservation_check_and_update(rbm, ip,
1749 minext ? *minext : 0,
1750 &maxext);
5b924ae2
SW
1751 if (ret == 0)
1752 return 0;
71921ef8 1753 if (ret > 0)
8d8b752a 1754 goto next_iter;
5d50d532 1755 if (ret == -E2BIG) {
e579ed4f 1756 rbm->bii = 0;
5d50d532 1757 rbm->offset = 0;
5d50d532
SW
1758 goto res_covered_end_of_rgrp;
1759 }
5b924ae2
SW
1760 return ret;
1761
5b924ae2
SW
1762next_bitmap: /* Find next bitmap in the rgrp */
1763 rbm->offset = 0;
e579ed4f
BP
1764 rbm->bii++;
1765 if (rbm->bii == rbm->rgd->rd_length)
1766 rbm->bii = 0;
5d50d532 1767res_covered_end_of_rgrp:
71921ef8
AG
1768 if (rbm->bii == 0) {
1769 if (wrapped)
1770 break;
1771 wrapped = true;
1772 if (nowrap)
1773 break;
1774 }
8d8b752a 1775next_iter:
71921ef8
AG
1776 /* Have we scanned the entire resource group? */
1777 if (wrapped && rbm->bii > last_bii)
5b924ae2
SW
1778 break;
1779 }
1780
5ce13431
BP
1781 if (minext == NULL || state != GFS2_BLKST_FREE)
1782 return -ENOSPC;
1783
5ea5050c
BP
1784 /* If the extent was too small, and it's smaller than the smallest
1785 to have failed before, remember for future reference that it's
1786 useless to search this rgrp again for this amount or more. */
71921ef8
AG
1787 if (wrapped && (scan_from_start || rbm->bii > last_bii) &&
1788 *minext < rbm->rgd->rd_extfail_pt)
5ea5050c
BP
1789 rbm->rgd->rd_extfail_pt = *minext;
1790
5ce13431
BP
1791 /* If the maximum extent we found is big enough to fulfill the
1792 minimum requirements, use it anyway. */
1793 if (maxext.len) {
1794 *rbm = maxext.rbm;
1795 *minext = maxext.len;
1796 return 0;
1797 }
1798
5b924ae2
SW
1799 return -ENOSPC;
1800}
1801
c8cdf479
SW
1802/**
1803 * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes
1804 * @rgd: The rgrp
886b1416
BP
1805 * @last_unlinked: block address of the last dinode we unlinked
1806 * @skip: block address we should explicitly not unlink
c8cdf479 1807 *
1a0eae88
BP
1808 * Returns: 0 if no error
1809 * The inode, if one has been found, in inode.
c8cdf479
SW
1810 */
1811
044b9414 1812static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip)
c8cdf479 1813{
5b924ae2 1814 u64 block;
5f3eae75 1815 struct gfs2_sbd *sdp = rgd->rd_sbd;
044b9414
SW
1816 struct gfs2_glock *gl;
1817 struct gfs2_inode *ip;
1818 int error;
1819 int found = 0;
e579ed4f 1820 struct gfs2_rbm rbm = { .rgd = rgd, .bii = 0, .offset = 0 };
c8cdf479 1821
5b924ae2 1822 while (1) {
5ce13431 1823 error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, NULL, NULL,
8381e602 1824 true);
5b924ae2
SW
1825 if (error == -ENOSPC)
1826 break;
1827 if (WARN_ON_ONCE(error))
24c73873 1828 break;
b3e47ca0 1829
5b924ae2
SW
1830 block = gfs2_rbm_to_block(&rbm);
1831 if (gfs2_rbm_from_block(&rbm, block + 1))
1832 break;
1833 if (*last_unlinked != NO_BLOCK && block <= *last_unlinked)
c8cdf479 1834 continue;
5b924ae2 1835 if (block == skip)
1e19a195 1836 continue;
5b924ae2 1837 *last_unlinked = block;
044b9414 1838
5ea31bc0 1839 error = gfs2_glock_get(sdp, block, &gfs2_iopen_glops, CREATE, &gl);
044b9414
SW
1840 if (error)
1841 continue;
1842
1843 /* If the inode is already in cache, we can ignore it here
1844 * because the existing inode disposal code will deal with
1845 * it when all refs have gone away. Accessing gl_object like
1846 * this is not safe in general. Here it is ok because we do
1847 * not dereference the pointer, and we only need an approx
1848 * answer to whether it is NULL or not.
1849 */
1850 ip = gl->gl_object;
1851
a0e3cc65 1852 if (ip || !gfs2_queue_delete_work(gl, 0))
044b9414
SW
1853 gfs2_glock_put(gl);
1854 else
1855 found++;
1856
1857 /* Limit reclaim to sensible number of tasks */
44ad37d6 1858 if (found > NR_CPUS)
044b9414 1859 return;
c8cdf479
SW
1860 }
1861
1862 rgd->rd_flags &= ~GFS2_RDF_CHECK;
044b9414 1863 return;
c8cdf479
SW
1864}
1865
bcd97c06
SW
1866/**
1867 * gfs2_rgrp_congested - Use stats to figure out whether an rgrp is congested
1868 * @rgd: The rgrp in question
1869 * @loops: An indication of how picky we can be (0=very, 1=less so)
1870 *
1871 * This function uses the recently added glock statistics in order to
1872 * figure out whether a parciular resource group is suffering from
1873 * contention from multiple nodes. This is done purely on the basis
1874 * of timings, since this is the only data we have to work with and
1875 * our aim here is to reject a resource group which is highly contended
1876 * but (very important) not to do this too often in order to ensure that
1877 * we do not land up introducing fragmentation by changing resource
1878 * groups when not actually required.
1879 *
1880 * The calculation is fairly simple, we want to know whether the SRTTB
1881 * (i.e. smoothed round trip time for blocking operations) to acquire
1882 * the lock for this rgrp's glock is significantly greater than the
1883 * time taken for resource groups on average. We introduce a margin in
1884 * the form of the variable @var which is computed as the sum of the two
1885 * respective variences, and multiplied by a factor depending on @loops
1886 * and whether we have a lot of data to base the decision on. This is
1887 * then tested against the square difference of the means in order to
1888 * decide whether the result is statistically significant or not.
1889 *
1890 * Returns: A boolean verdict on the congestion status
1891 */
1892
1893static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops)
1894{
1895 const struct gfs2_glock *gl = rgd->rd_gl;
15562c43 1896 const struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
bcd97c06 1897 struct gfs2_lkstats *st;
4d207133
BH
1898 u64 r_dcount, l_dcount;
1899 u64 l_srttb, a_srttb = 0;
bcd97c06 1900 s64 srttb_diff;
4d207133
BH
1901 u64 sqr_diff;
1902 u64 var;
0166b197 1903 int cpu, nonzero = 0;
bcd97c06
SW
1904
1905 preempt_disable();
f4a3ae93
BP
1906 for_each_present_cpu(cpu) {
1907 st = &per_cpu_ptr(sdp->sd_lkstats, cpu)->lkstats[LM_TYPE_RGRP];
0166b197
BP
1908 if (st->stats[GFS2_LKS_SRTTB]) {
1909 a_srttb += st->stats[GFS2_LKS_SRTTB];
1910 nonzero++;
1911 }
f4a3ae93 1912 }
bcd97c06 1913 st = &this_cpu_ptr(sdp->sd_lkstats)->lkstats[LM_TYPE_RGRP];
0166b197
BP
1914 if (nonzero)
1915 do_div(a_srttb, nonzero);
bcd97c06
SW
1916 r_dcount = st->stats[GFS2_LKS_DCOUNT];
1917 var = st->stats[GFS2_LKS_SRTTVARB] +
1918 gl->gl_stats.stats[GFS2_LKS_SRTTVARB];
1919 preempt_enable();
1920
1921 l_srttb = gl->gl_stats.stats[GFS2_LKS_SRTTB];
1922 l_dcount = gl->gl_stats.stats[GFS2_LKS_DCOUNT];
1923
f4a3ae93 1924 if ((l_dcount < 1) || (r_dcount < 1) || (a_srttb == 0))
bcd97c06
SW
1925 return false;
1926
f4a3ae93 1927 srttb_diff = a_srttb - l_srttb;
bcd97c06
SW
1928 sqr_diff = srttb_diff * srttb_diff;
1929
1930 var *= 2;
1931 if (l_dcount < 8 || r_dcount < 8)
1932 var *= 2;
1933 if (loops == 1)
1934 var *= 2;
1935
1936 return ((srttb_diff < 0) && (sqr_diff > var));
1937}
1938
1939/**
1940 * gfs2_rgrp_used_recently
1941 * @rs: The block reservation with the rgrp to test
1942 * @msecs: The time limit in milliseconds
1943 *
1944 * Returns: True if the rgrp glock has been used within the time limit
1945 */
1946static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs,
1947 u64 msecs)
1948{
1949 u64 tdiff;
1950
1951 tdiff = ktime_to_ns(ktime_sub(ktime_get_real(),
1952 rs->rs_rbm.rgd->rd_gl->gl_dstamp));
1953
1954 return tdiff > (msecs * 1000 * 1000);
1955}
1956
9dbe9610
SW
1957static u32 gfs2_orlov_skip(const struct gfs2_inode *ip)
1958{
1959 const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1960 u32 skip;
1961
1962 get_random_bytes(&skip, sizeof(skip));
1963 return skip % sdp->sd_rgrps;
1964}
1965
c743ffd0
SW
1966static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin)
1967{
1968 struct gfs2_rgrpd *rgd = *pos;
aa8920c9 1969 struct gfs2_sbd *sdp = rgd->rd_sbd;
c743ffd0
SW
1970
1971 rgd = gfs2_rgrpd_get_next(rgd);
1972 if (rgd == NULL)
aa8920c9 1973 rgd = gfs2_rgrpd_get_first(sdp);
c743ffd0
SW
1974 *pos = rgd;
1975 if (rgd != begin) /* If we didn't wrap */
1976 return true;
1977 return false;
1978}
1979
0e27c18c
BP
1980/**
1981 * fast_to_acquire - determine if a resource group will be fast to acquire
1982 *
1983 * If this is one of our preferred rgrps, it should be quicker to acquire,
1984 * because we tried to set ourselves up as dlm lock master.
1985 */
1986static inline int fast_to_acquire(struct gfs2_rgrpd *rgd)
1987{
1988 struct gfs2_glock *gl = rgd->rd_gl;
1989
1990 if (gl->gl_state != LM_ST_UNLOCKED && list_empty(&gl->gl_holders) &&
1991 !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) &&
1992 !test_bit(GLF_DEMOTE, &gl->gl_flags))
1993 return 1;
1994 if (rgd->rd_flags & GFS2_RDF_PREFERRED)
1995 return 1;
1996 return 0;
1997}
1998
b3b94faa 1999/**
666d1d8a 2000 * gfs2_inplace_reserve - Reserve space in the filesystem
b3b94faa 2001 * @ip: the inode to reserve space for
7b9cff46 2002 * @ap: the allocation parameters
b3b94faa 2003 *
25435e5e
AD
2004 * We try our best to find an rgrp that has at least ap->target blocks
2005 * available. After a couple of passes (loops == 2), the prospects of finding
2006 * such an rgrp diminish. At this stage, we return the first rgrp that has
243fea4d 2007 * at least ap->min_target blocks available. Either way, we set ap->allowed to
25435e5e
AD
2008 * the number of blocks available in the chosen rgrp.
2009 *
2010 * Returns: 0 on success,
2011 * -ENOMEM if a suitable rgrp can't be found
2012 * errno otherwise
b3b94faa
DT
2013 */
2014
25435e5e 2015int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
b3b94faa 2016{
feaa7bba 2017 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
8e2e0047 2018 struct gfs2_rgrpd *begin = NULL;
a097dc7e 2019 struct gfs2_blkreserv *rs = &ip->i_res;
bcd97c06 2020 int error = 0, rg_locked, flags = 0;
666d1d8a 2021 u64 last_unlinked = NO_BLOCK;
7c9ca621 2022 int loops = 0;
f6753df3 2023 u32 free_blocks, skip = 0;
b3b94faa 2024
90306c41
BM
2025 if (sdp->sd_args.ar_rgrplvb)
2026 flags |= GL_SKIP;
7b9cff46 2027 if (gfs2_assert_warn(sdp, ap->target))
c743ffd0 2028 return -EINVAL;
8e2e0047 2029 if (gfs2_rs_active(rs)) {
4a993fb1 2030 begin = rs->rs_rbm.rgd;
b7eba890
AG
2031 } else if (rs->rs_rbm.rgd &&
2032 rgrp_contains_block(rs->rs_rbm.rgd, ip->i_goal)) {
2033 begin = rs->rs_rbm.rgd;
8e2e0047 2034 } else {
00a158be 2035 check_and_update_goal(ip);
4a993fb1 2036 rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
8e2e0047 2037 }
7b9cff46 2038 if (S_ISDIR(ip->i_inode.i_mode) && (ap->aflags & GFS2_AF_ORLOV))
9dbe9610 2039 skip = gfs2_orlov_skip(ip);
4a993fb1 2040 if (rs->rs_rbm.rgd == NULL)
7c9ca621
BP
2041 return -EBADSLT;
2042
2043 while (loops < 3) {
c743ffd0
SW
2044 rg_locked = 1;
2045
2046 if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) {
2047 rg_locked = 0;
9dbe9610
SW
2048 if (skip && skip--)
2049 goto next_rgrp;
0e27c18c
BP
2050 if (!gfs2_rs_active(rs)) {
2051 if (loops == 0 &&
2052 !fast_to_acquire(rs->rs_rbm.rgd))
2053 goto next_rgrp;
2054 if ((loops < 2) &&
2055 gfs2_rgrp_used_recently(rs, 1000) &&
2056 gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
2057 goto next_rgrp;
2058 }
4a993fb1 2059 error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl,
8e2e0047 2060 LM_ST_EXCLUSIVE, flags,
21f09c43 2061 &ip->i_rgd_gh);
c743ffd0
SW
2062 if (unlikely(error))
2063 return error;
bcd97c06
SW
2064 if (!gfs2_rs_active(rs) && (loops < 2) &&
2065 gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
2066 goto skip_rgrp;
c743ffd0 2067 if (sdp->sd_args.ar_rgrplvb) {
4a993fb1 2068 error = update_rgrp_lvb(rs->rs_rbm.rgd);
c743ffd0 2069 if (unlikely(error)) {
21f09c43 2070 gfs2_glock_dq_uninit(&ip->i_rgd_gh);
90306c41
BM
2071 return error;
2072 }
2073 }
292c8c14 2074 }
666d1d8a 2075
243fea4d 2076 /* Skip unusable resource groups */
5ea5050c
BP
2077 if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC |
2078 GFS2_RDF_ERROR)) ||
25435e5e 2079 (loops == 0 && ap->target > rs->rs_rbm.rgd->rd_extfail_pt))
c743ffd0
SW
2080 goto skip_rgrp;
2081
2082 if (sdp->sd_args.ar_rgrplvb)
2083 gfs2_rgrp_bh_get(rs->rs_rbm.rgd);
2084
2085 /* Get a reservation if we don't already have one */
2086 if (!gfs2_rs_active(rs))
7b9cff46 2087 rg_mblk_search(rs->rs_rbm.rgd, ip, ap);
c743ffd0
SW
2088
2089 /* Skip rgrps when we can't get a reservation on first pass */
2090 if (!gfs2_rs_active(rs) && (loops < 1))
2091 goto check_rgrp;
2092
2093 /* If rgrp has enough free space, use it */
f6753df3
BP
2094 free_blocks = rgd_free(rs->rs_rbm.rgd, rs);
2095 if (free_blocks >= ap->target ||
25435e5e 2096 (loops == 2 && ap->min_target &&
f6753df3
BP
2097 free_blocks >= ap->min_target)) {
2098 ap->allowed = free_blocks;
c743ffd0 2099 return 0;
b3b94faa 2100 }
c743ffd0
SW
2101check_rgrp:
2102 /* Check for unlinked inodes which can be reclaimed */
2103 if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK)
2104 try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked,
2105 ip->i_no_addr);
2106skip_rgrp:
1330edbe
BP
2107 /* Drop reservation, if we couldn't use reserved rgrp */
2108 if (gfs2_rs_active(rs))
2109 gfs2_rs_deltree(rs);
2110
c743ffd0
SW
2111 /* Unlock rgrp if required */
2112 if (!rg_locked)
21f09c43 2113 gfs2_glock_dq_uninit(&ip->i_rgd_gh);
c743ffd0
SW
2114next_rgrp:
2115 /* Find the next rgrp, and continue looking */
2116 if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin))
2117 continue;
9dbe9610
SW
2118 if (skip)
2119 continue;
c743ffd0
SW
2120
2121 /* If we've scanned all the rgrps, but found no free blocks
2122 * then this checks for some less likely conditions before
2123 * trying again.
2124 */
c743ffd0
SW
2125 loops++;
2126 /* Check that fs hasn't grown if writing to rindex */
2127 if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) {
2128 error = gfs2_ri_update(ip);
2129 if (error)
2130 return error;
2131 }
2132 /* Flushing the log may release space */
2133 if (loops == 2)
805c0907
BP
2134 gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
2135 GFS2_LFC_INPLACE_RESERVE);
b3b94faa 2136 }
b3b94faa 2137
c743ffd0 2138 return -ENOSPC;
b3b94faa
DT
2139}
2140
2141/**
2142 * gfs2_inplace_release - release an inplace reservation
2143 * @ip: the inode the reservation was taken out on
2144 *
2145 * Release a reservation made by gfs2_inplace_reserve().
2146 */
2147
2148void gfs2_inplace_release(struct gfs2_inode *ip)
2149{
21f09c43
AG
2150 if (gfs2_holder_initialized(&ip->i_rgd_gh))
2151 gfs2_glock_dq_uninit(&ip->i_rgd_gh);
b3b94faa
DT
2152}
2153
b3e47ca0
BP
2154/**
2155 * gfs2_alloc_extent - allocate an extent from a given bitmap
4a993fb1 2156 * @rbm: the resource group information
b3e47ca0 2157 * @dinode: TRUE if the first block we allocate is for a dinode
c04a2ef3 2158 * @n: The extent length (value/result)
b3e47ca0 2159 *
c04a2ef3 2160 * Add the bitmap buffer to the transaction.
b3e47ca0 2161 * Set the found bits to @new_state to change block's allocation state.
b3e47ca0 2162 */
c04a2ef3 2163static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode,
4a993fb1 2164 unsigned int *n)
b3e47ca0 2165{
c04a2ef3 2166 struct gfs2_rbm pos = { .rgd = rbm->rgd, };
b3e47ca0 2167 const unsigned int elen = *n;
c04a2ef3
SW
2168 u64 block;
2169 int ret;
b3e47ca0 2170
c04a2ef3
SW
2171 *n = 1;
2172 block = gfs2_rbm_to_block(rbm);
e579ed4f 2173 gfs2_trans_add_meta(rbm->rgd->rd_gl, rbm_bi(rbm)->bi_bh);
3e6339dd 2174 gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
c04a2ef3 2175 block++;
60a0b8f9 2176 while (*n < elen) {
c04a2ef3 2177 ret = gfs2_rbm_from_block(&pos, block);
dffe12a8 2178 if (ret || gfs2_testbit(&pos, true) != GFS2_BLKST_FREE)
60a0b8f9 2179 break;
e579ed4f 2180 gfs2_trans_add_meta(pos.rgd->rd_gl, rbm_bi(&pos)->bi_bh);
3e6339dd 2181 gfs2_setbit(&pos, true, GFS2_BLKST_USED);
60a0b8f9 2182 (*n)++;
c04a2ef3 2183 block++;
c8cdf479 2184 }
b3b94faa
DT
2185}
2186
2187/**
2188 * rgblk_free - Change alloc state of given block(s)
2189 * @sdp: the filesystem
0ddeded4 2190 * @rgd: the resource group the blocks are in
b3b94faa
DT
2191 * @bstart: the start of a run of blocks to free
2192 * @blen: the length of the block run (all must lie within ONE RG!)
2193 * @new_state: GFS2_BLKST_XXX the after-allocation block state
b3b94faa
DT
2194 */
2195
0ddeded4
AG
2196static void rgblk_free(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd,
2197 u64 bstart, u32 blen, unsigned char new_state)
b3b94faa 2198{
3b1d0b9d 2199 struct gfs2_rbm rbm;
d24e0569 2200 struct gfs2_bitmap *bi, *bi_prev = NULL;
b3b94faa 2201
0ddeded4 2202 rbm.rgd = rgd;
f654683d 2203 if (WARN_ON_ONCE(gfs2_rbm_from_block(&rbm, bstart)))
0ddeded4 2204 return;
b3b94faa 2205 while (blen--) {
e579ed4f 2206 bi = rbm_bi(&rbm);
d24e0569
BP
2207 if (bi != bi_prev) {
2208 if (!bi->bi_clone) {
2209 bi->bi_clone = kmalloc(bi->bi_bh->b_size,
2210 GFP_NOFS | __GFP_NOFAIL);
2211 memcpy(bi->bi_clone + bi->bi_offset,
2212 bi->bi_bh->b_data + bi->bi_offset,
281b4952 2213 bi->bi_bytes);
d24e0569
BP
2214 }
2215 gfs2_trans_add_meta(rbm.rgd->rd_gl, bi->bi_bh);
2216 bi_prev = bi;
b3b94faa 2217 }
3e6339dd 2218 gfs2_setbit(&rbm, false, new_state);
0eacdd16 2219 gfs2_rbm_add(&rbm, 1);
b3b94faa 2220 }
b3b94faa
DT
2221}
2222
2223/**
09010978
SW
2224 * gfs2_rgrp_dump - print out an rgrp
2225 * @seq: The iterator
0e539ca1 2226 * @rgd: The rgrp in question
3792ce97 2227 * @fs_id_buf: pointer to file system id (if requested)
09010978
SW
2228 *
2229 */
2230
0e539ca1 2231void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_rgrpd *rgd,
3792ce97 2232 const char *fs_id_buf)
09010978 2233{
8e2e0047
BP
2234 struct gfs2_blkreserv *trs;
2235 const struct rb_node *n;
2236
3792ce97
BP
2237 gfs2_print_dbg(seq, "%s R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n",
2238 fs_id_buf,
09010978 2239 (unsigned long long)rgd->rd_addr, rgd->rd_flags,
8e2e0047 2240 rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes,
5ea5050c 2241 rgd->rd_reserved, rgd->rd_extfail_pt);
72244b6b
BP
2242 if (rgd->rd_sbd->sd_args.ar_rgrplvb) {
2243 struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
2244
3792ce97 2245 gfs2_print_dbg(seq, "%s L: f:%02x b:%u i:%u\n", fs_id_buf,
72244b6b
BP
2246 be32_to_cpu(rgl->rl_flags),
2247 be32_to_cpu(rgl->rl_free),
2248 be32_to_cpu(rgl->rl_dinodes));
2249 }
8e2e0047
BP
2250 spin_lock(&rgd->rd_rsspin);
2251 for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) {
2252 trs = rb_entry(n, struct gfs2_blkreserv, rs_node);
3792ce97 2253 dump_rs(seq, trs, fs_id_buf);
8e2e0047
BP
2254 }
2255 spin_unlock(&rgd->rd_rsspin);
09010978
SW
2256}
2257
6050b9c7
SW
2258static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
2259{
2260 struct gfs2_sbd *sdp = rgd->rd_sbd;
98fb0574 2261 char fs_id_buf[sizeof(sdp->sd_fsname) + 7];
3792ce97 2262
6050b9c7 2263 fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n",
86d00636 2264 (unsigned long long)rgd->rd_addr);
6050b9c7 2265 fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n");
3792ce97 2266 sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname);
0e539ca1 2267 gfs2_rgrp_dump(NULL, rgd, fs_id_buf);
6050b9c7
SW
2268 rgd->rd_flags |= GFS2_RDF_ERROR;
2269}
2270
8e2e0047 2271/**
5b924ae2
SW
2272 * gfs2_adjust_reservation - Adjust (or remove) a reservation after allocation
2273 * @ip: The inode we have just allocated blocks for
2274 * @rbm: The start of the allocated blocks
2275 * @len: The extent length
8e2e0047 2276 *
5b924ae2
SW
2277 * Adjusts a reservation after an allocation has taken place. If the
2278 * reservation does not match the allocation, or if it is now empty
2279 * then it is removed.
8e2e0047 2280 */
5b924ae2
SW
2281
2282static void gfs2_adjust_reservation(struct gfs2_inode *ip,
2283 const struct gfs2_rbm *rbm, unsigned len)
8e2e0047 2284{
a097dc7e 2285 struct gfs2_blkreserv *rs = &ip->i_res;
5b924ae2
SW
2286 struct gfs2_rgrpd *rgd = rbm->rgd;
2287 unsigned rlen;
2288 u64 block;
2289 int ret;
8e2e0047 2290
5b924ae2
SW
2291 spin_lock(&rgd->rd_rsspin);
2292 if (gfs2_rs_active(rs)) {
2293 if (gfs2_rbm_eq(&rs->rs_rbm, rbm)) {
2294 block = gfs2_rbm_to_block(rbm);
2295 ret = gfs2_rbm_from_block(&rs->rs_rbm, block + len);
2296 rlen = min(rs->rs_free, len);
2297 rs->rs_free -= rlen;
2298 rgd->rd_reserved -= rlen;
9e733d39 2299 trace_gfs2_rs(rs, TRACE_RS_CLAIM);
5b924ae2
SW
2300 if (rs->rs_free && !ret)
2301 goto out;
1a855033
BP
2302 /* We used up our block reservation, so we should
2303 reserve more blocks next time. */
21f09c43 2304 atomic_add(RGRP_RSRV_ADDBLKS, &ip->i_sizehint);
5b924ae2 2305 }
20095218 2306 __rs_deltree(rs);
8e2e0047 2307 }
5b924ae2
SW
2308out:
2309 spin_unlock(&rgd->rd_rsspin);
8e2e0047
BP
2310}
2311
9e07f2cb
SW
2312/**
2313 * gfs2_set_alloc_start - Set starting point for block allocation
2314 * @rbm: The rbm which will be set to the required location
2315 * @ip: The gfs2 inode
2316 * @dinode: Flag to say if allocation includes a new inode
2317 *
2318 * This sets the starting point from the reservation if one is active
2319 * otherwise it falls back to guessing a start point based on the
2320 * inode's goal block or the last allocation point in the rgrp.
2321 */
2322
2323static void gfs2_set_alloc_start(struct gfs2_rbm *rbm,
2324 const struct gfs2_inode *ip, bool dinode)
2325{
2326 u64 goal;
2327
a097dc7e
BP
2328 if (gfs2_rs_active(&ip->i_res)) {
2329 *rbm = ip->i_res.rs_rbm;
9e07f2cb
SW
2330 return;
2331 }
2332
2333 if (!dinode && rgrp_contains_block(rbm->rgd, ip->i_goal))
2334 goal = ip->i_goal;
2335 else
2336 goal = rbm->rgd->rd_last_alloc + rbm->rgd->rd_data0;
2337
f654683d
AG
2338 if (WARN_ON_ONCE(gfs2_rbm_from_block(rbm, goal))) {
2339 rbm->bii = 0;
2340 rbm->offset = 0;
2341 }
9e07f2cb
SW
2342}
2343
09010978 2344/**
6e87ed0f 2345 * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode
1639431a 2346 * @ip: the inode to allocate the block for
09010978 2347 * @bn: Used to return the starting block number
8e2e0047 2348 * @nblocks: requested number of blocks/extent length (value/result)
6e87ed0f 2349 * @dinode: 1 if we're allocating a dinode block, else 0
3c5d785a 2350 * @generation: the generation number of the inode
b3b94faa 2351 *
09010978 2352 * Returns: 0 or error
b3b94faa
DT
2353 */
2354
6a8099ed 2355int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
6e87ed0f 2356 bool dinode, u64 *generation)
b3b94faa 2357{
feaa7bba 2358 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
d9ba7615 2359 struct buffer_head *dibh;
b7eba890 2360 struct gfs2_rbm rbm = { .rgd = ip->i_res.rs_rbm.rgd, };
6a8099ed 2361 unsigned int ndata;
3c5d785a 2362 u64 block; /* block, within the file system scope */
d9ba7615 2363 int error;
b3b94faa 2364
9e07f2cb 2365 gfs2_set_alloc_start(&rbm, ip, dinode);
8381e602 2366 error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, ip, false);
62e252ee 2367
137834a6 2368 if (error == -ENOSPC) {
9e07f2cb 2369 gfs2_set_alloc_start(&rbm, ip, dinode);
8381e602 2370 error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, NULL, false);
137834a6
SW
2371 }
2372
62e252ee 2373 /* Since all blocks are reserved in advance, this shouldn't happen */
5b924ae2 2374 if (error) {
5ea5050c 2375 fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, full=%d fail_pt=%d\n",
9e733d39 2376 (unsigned long long)ip->i_no_addr, error, *nblocks,
5ea5050c
BP
2377 test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags),
2378 rbm.rgd->rd_extfail_pt);
62e252ee 2379 goto rgrp_error;
8e2e0047 2380 }
62e252ee 2381
c04a2ef3
SW
2382 gfs2_alloc_extent(&rbm, dinode, nblocks);
2383 block = gfs2_rbm_to_block(&rbm);
c743ffd0 2384 rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0;
a097dc7e 2385 if (gfs2_rs_active(&ip->i_res))
5b924ae2 2386 gfs2_adjust_reservation(ip, &rbm, *nblocks);
6a8099ed
SW
2387 ndata = *nblocks;
2388 if (dinode)
2389 ndata--;
b3e47ca0 2390
3c5d785a 2391 if (!dinode) {
6a8099ed 2392 ip->i_goal = block + ndata - 1;
3c5d785a
BP
2393 error = gfs2_meta_inode_buffer(ip, &dibh);
2394 if (error == 0) {
2395 struct gfs2_dinode *di =
2396 (struct gfs2_dinode *)dibh->b_data;
350a9b0a 2397 gfs2_trans_add_meta(ip->i_gl, dibh);
3c5d785a
BP
2398 di->di_goal_meta = di->di_goal_data =
2399 cpu_to_be64(ip->i_goal);
2400 brelse(dibh);
2401 }
d9ba7615 2402 }
4a993fb1 2403 if (rbm.rgd->rd_free < *nblocks) {
e54c78a2 2404 fs_warn(sdp, "nblocks=%u\n", *nblocks);
09010978 2405 goto rgrp_error;
8e2e0047 2406 }
09010978 2407
4a993fb1 2408 rbm.rgd->rd_free -= *nblocks;
3c5d785a 2409 if (dinode) {
4a993fb1
SW
2410 rbm.rgd->rd_dinodes++;
2411 *generation = rbm.rgd->rd_igeneration++;
3c5d785a 2412 if (*generation == 0)
4a993fb1 2413 *generation = rbm.rgd->rd_igeneration++;
3c5d785a 2414 }
b3b94faa 2415
350a9b0a 2416 gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh);
4a993fb1 2417 gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data);
b3b94faa 2418
6a8099ed 2419 gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0);
3c5d785a 2420 if (dinode)
fbb27873 2421 gfs2_trans_remove_revoke(sdp, block, *nblocks);
6a8099ed 2422
fd4b4e04 2423 gfs2_quota_change(ip, *nblocks, ip->i_inode.i_uid, ip->i_inode.i_gid);
b3b94faa 2424
4a993fb1
SW
2425 rbm.rgd->rd_free_clone -= *nblocks;
2426 trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks,
6e87ed0f 2427 dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
6050b9c7
SW
2428 *bn = block;
2429 return 0;
2430
2431rgrp_error:
4a993fb1 2432 gfs2_rgrp_error(rbm.rgd);
6050b9c7 2433 return -EIO;
b3b94faa
DT
2434}
2435
2436/**
46fcb2ed 2437 * __gfs2_free_blocks - free a contiguous run of block(s)
b3b94faa 2438 * @ip: the inode these blocks are being freed from
0ddeded4 2439 * @rgd: the resource group the blocks are in
b3b94faa
DT
2440 * @bstart: first block of a run of contiguous blocks
2441 * @blen: the length of the block run
46fcb2ed 2442 * @meta: 1 if the blocks represent metadata
b3b94faa
DT
2443 *
2444 */
2445
0ddeded4
AG
2446void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
2447 u64 bstart, u32 blen, int meta)
b3b94faa 2448{
feaa7bba 2449 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
b3b94faa 2450
0ddeded4 2451 rgblk_free(sdp, rgd, bstart, blen, GFS2_BLKST_FREE);
41db1ab9 2452 trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE);
cfc8b549 2453 rgd->rd_free += blen;
66fc061b 2454 rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
350a9b0a 2455 gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
42d52e38 2456 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
b3b94faa 2457
6d3117b4 2458 /* Directories keep their data in the metadata address space */
68942870
BP
2459 if (meta || ip->i_depth || gfs2_is_jdata(ip))
2460 gfs2_journal_wipe(ip, bstart, blen);
4c16c36a 2461}
b3b94faa 2462
4c16c36a
BP
2463/**
2464 * gfs2_free_meta - free a contiguous run of data block(s)
2465 * @ip: the inode these blocks are being freed from
0ddeded4 2466 * @rgd: the resource group the blocks are in
4c16c36a
BP
2467 * @bstart: first block of a run of contiguous blocks
2468 * @blen: the length of the block run
2469 *
2470 */
2471
0ddeded4
AG
2472void gfs2_free_meta(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
2473 u64 bstart, u32 blen)
4c16c36a
BP
2474{
2475 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2476
0ddeded4 2477 __gfs2_free_blocks(ip, rgd, bstart, blen, 1);
b3b94faa 2478 gfs2_statfs_change(sdp, 0, +blen, 0);
2933f925 2479 gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
b3b94faa
DT
2480}
2481
feaa7bba
SW
2482void gfs2_unlink_di(struct inode *inode)
2483{
2484 struct gfs2_inode *ip = GFS2_I(inode);
2485 struct gfs2_sbd *sdp = GFS2_SB(inode);
2486 struct gfs2_rgrpd *rgd;
dbb7cae2 2487 u64 blkno = ip->i_no_addr;
feaa7bba 2488
0ddeded4 2489 rgd = gfs2_blk2rgrpd(sdp, blkno, true);
feaa7bba
SW
2490 if (!rgd)
2491 return;
0ddeded4 2492 rgblk_free(sdp, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
41db1ab9 2493 trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
350a9b0a 2494 gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
42d52e38 2495 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
f5580d0f 2496 be32_add_cpu(&rgd->rd_rgl->rl_unlinked, 1);
feaa7bba
SW
2497}
2498
a18c78c5 2499void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
b3b94faa
DT
2500{
2501 struct gfs2_sbd *sdp = rgd->rd_sbd;
b3b94faa 2502
0ddeded4 2503 rgblk_free(sdp, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
73f74948 2504 if (!rgd->rd_dinodes)
b3b94faa 2505 gfs2_consist_rgrpd(rgd);
73f74948 2506 rgd->rd_dinodes--;
cfc8b549 2507 rgd->rd_free++;
b3b94faa 2508
350a9b0a 2509 gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
42d52e38 2510 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
f5580d0f 2511 be32_add_cpu(&rgd->rd_rgl->rl_unlinked, -1);
b3b94faa
DT
2512
2513 gfs2_statfs_change(sdp, 0, +1, -1);
41db1ab9 2514 trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
2933f925 2515 gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
68942870 2516 gfs2_journal_wipe(ip, ip->i_no_addr, 1);
b3b94faa
DT
2517}
2518
acf7e244
SW
2519/**
2520 * gfs2_check_blk_type - Check the type of a block
2521 * @sdp: The superblock
2522 * @no_addr: The block number to check
2523 * @type: The block type we are looking for
2524 *
2525 * Returns: 0 if the block type matches the expected type
2526 * -ESTALE if it doesn't match
2527 * or -ve errno if something went wrong while checking
2528 */
2529
2530int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
2531{
2532 struct gfs2_rgrpd *rgd;
8339ee54 2533 struct gfs2_holder rgd_gh;
dffe12a8 2534 struct gfs2_rbm rbm;
58884c4d 2535 int error = -EINVAL;
acf7e244 2536
66fc061b 2537 rgd = gfs2_blk2rgrpd(sdp, no_addr, 1);
acf7e244 2538 if (!rgd)
8339ee54 2539 goto fail;
acf7e244
SW
2540
2541 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh);
2542 if (error)
8339ee54 2543 goto fail;
acf7e244 2544
dffe12a8
BP
2545 rbm.rgd = rgd;
2546 error = gfs2_rbm_from_block(&rbm, no_addr);
bc923818
ZQ
2547 if (!WARN_ON_ONCE(error)) {
2548 if (gfs2_testbit(&rbm, false) != type)
2549 error = -ESTALE;
2550 }
acf7e244
SW
2551
2552 gfs2_glock_dq_uninit(&rgd_gh);
bc923818 2553
acf7e244
SW
2554fail:
2555 return error;
2556}
2557
b3b94faa
DT
2558/**
2559 * gfs2_rlist_add - add a RG to a list of RGs
70b0c365 2560 * @ip: the inode
b3b94faa
DT
2561 * @rlist: the list of resource groups
2562 * @block: the block
2563 *
2564 * Figure out what RG a block belongs to and add that RG to the list
2565 *
2566 * FIXME: Don't use NOFAIL
2567 *
2568 */
2569
70b0c365 2570void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
cd915493 2571 u64 block)
b3b94faa 2572{
70b0c365 2573 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
b3b94faa
DT
2574 struct gfs2_rgrpd *rgd;
2575 struct gfs2_rgrpd **tmp;
2576 unsigned int new_space;
2577 unsigned int x;
2578
2579 if (gfs2_assert_warn(sdp, !rlist->rl_ghs))
2580 return;
2581
03f8c41c
AG
2582 /*
2583 * The resource group last accessed is kept in the last position.
2584 */
2585
2586 if (rlist->rl_rgrps) {
2587 rgd = rlist->rl_rgd[rlist->rl_rgrps - 1];
2588 if (rgrp_contains_block(rgd, block))
2589 return;
66fc061b 2590 rgd = gfs2_blk2rgrpd(sdp, block, 1);
03f8c41c 2591 } else {
b7eba890 2592 rgd = ip->i_res.rs_rbm.rgd;
03f8c41c
AG
2593 if (!rgd || !rgrp_contains_block(rgd, block))
2594 rgd = gfs2_blk2rgrpd(sdp, block, 1);
2595 }
2596
b3b94faa 2597 if (!rgd) {
03f8c41c
AG
2598 fs_err(sdp, "rlist_add: no rgrp for block %llu\n",
2599 (unsigned long long)block);
b3b94faa
DT
2600 return;
2601 }
2602
03f8c41c
AG
2603 for (x = 0; x < rlist->rl_rgrps; x++) {
2604 if (rlist->rl_rgd[x] == rgd) {
2605 swap(rlist->rl_rgd[x],
2606 rlist->rl_rgd[rlist->rl_rgrps - 1]);
b3b94faa 2607 return;
03f8c41c
AG
2608 }
2609 }
b3b94faa
DT
2610
2611 if (rlist->rl_rgrps == rlist->rl_space) {
2612 new_space = rlist->rl_space + 10;
2613
2614 tmp = kcalloc(new_space, sizeof(struct gfs2_rgrpd *),
dd894be8 2615 GFP_NOFS | __GFP_NOFAIL);
b3b94faa
DT
2616
2617 if (rlist->rl_rgd) {
2618 memcpy(tmp, rlist->rl_rgd,
2619 rlist->rl_space * sizeof(struct gfs2_rgrpd *));
2620 kfree(rlist->rl_rgd);
2621 }
2622
2623 rlist->rl_space = new_space;
2624 rlist->rl_rgd = tmp;
2625 }
2626
2627 rlist->rl_rgd[rlist->rl_rgrps++] = rgd;
2628}
2629
2630/**
2631 * gfs2_rlist_alloc - all RGs have been added to the rlist, now allocate
2632 * and initialize an array of glock holders for them
2633 * @rlist: the list of resource groups
b3b94faa
DT
2634 *
2635 * FIXME: Don't use NOFAIL
2636 *
2637 */
2638
c3abc29e 2639void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist)
b3b94faa
DT
2640{
2641 unsigned int x;
2642
6da2ec56
KC
2643 rlist->rl_ghs = kmalloc_array(rlist->rl_rgrps,
2644 sizeof(struct gfs2_holder),
2645 GFP_NOFS | __GFP_NOFAIL);
b3b94faa
DT
2646 for (x = 0; x < rlist->rl_rgrps; x++)
2647 gfs2_holder_init(rlist->rl_rgd[x]->rd_gl,
c3abc29e 2648 LM_ST_EXCLUSIVE, 0,
b3b94faa
DT
2649 &rlist->rl_ghs[x]);
2650}
2651
2652/**
2653 * gfs2_rlist_free - free a resource group list
27ff6a0f 2654 * @rlist: the list of resource groups
b3b94faa
DT
2655 *
2656 */
2657
2658void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
2659{
2660 unsigned int x;
2661
2662 kfree(rlist->rl_rgd);
2663
2664 if (rlist->rl_ghs) {
2665 for (x = 0; x < rlist->rl_rgrps; x++)
2666 gfs2_holder_uninit(&rlist->rl_ghs[x]);
2667 kfree(rlist->rl_ghs);
8e2e0047 2668 rlist->rl_ghs = NULL;
b3b94faa
DT
2669 }
2670}
2671