]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - fs/gfs2/rgrp.c
gfs2: Fix gfs2_testbit to use clone bitmaps
[mirror_ubuntu-jammy-kernel.git] / fs / gfs2 / rgrp.c
CommitLineData
b3b94faa
DT
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
fe6c991c 3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
b3b94faa
DT
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
e9fc2aa0 7 * of the GNU General Public License version 2.
b3b94faa
DT
8 */
9
d77d1b58
JP
10#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11
b3b94faa
DT
12#include <linux/slab.h>
13#include <linux/spinlock.h>
14#include <linux/completion.h>
15#include <linux/buffer_head.h>
f42faf4f 16#include <linux/fs.h>
5c676f6d 17#include <linux/gfs2_ondisk.h>
1f466a47 18#include <linux/prefetch.h>
f15ab561 19#include <linux/blkdev.h>
7c9ca621 20#include <linux/rbtree.h>
9dbe9610 21#include <linux/random.h>
b3b94faa
DT
22
23#include "gfs2.h"
5c676f6d 24#include "incore.h"
b3b94faa
DT
25#include "glock.h"
26#include "glops.h"
b3b94faa
DT
27#include "lops.h"
28#include "meta_io.h"
29#include "quota.h"
30#include "rgrp.h"
31#include "super.h"
32#include "trans.h"
5c676f6d 33#include "util.h"
172e045a 34#include "log.h"
c8cdf479 35#include "inode.h"
63997775 36#include "trace_gfs2.h"
850d2d91 37#include "dir.h"
b3b94faa 38
2c1e52aa 39#define BFITNOENT ((u32)~0)
6760bdcd 40#define NO_BLOCK ((u64)~0)
88c8ab1f 41
1f466a47
BP
42#if BITS_PER_LONG == 32
43#define LBITMASK (0x55555555UL)
44#define LBITSKIP55 (0x55555555UL)
45#define LBITSKIP00 (0x00000000UL)
46#else
47#define LBITMASK (0x5555555555555555UL)
48#define LBITSKIP55 (0x5555555555555555UL)
49#define LBITSKIP00 (0x0000000000000000UL)
50#endif
51
88c8ab1f
SW
52/*
53 * These routines are used by the resource group routines (rgrp.c)
54 * to keep track of block allocation. Each block is represented by two
feaa7bba
SW
55 * bits. So, each byte represents GFS2_NBBY (i.e. 4) blocks.
56 *
57 * 0 = Free
58 * 1 = Used (not metadata)
59 * 2 = Unlinked (still in use) inode
60 * 3 = Used (metadata)
88c8ab1f
SW
61 */
62
5ce13431
BP
63struct gfs2_extent {
64 struct gfs2_rbm rbm;
65 u32 len;
66};
67
88c8ab1f
SW
68static const char valid_change[16] = {
69 /* current */
feaa7bba 70 /* n */ 0, 1, 1, 1,
88c8ab1f 71 /* e */ 1, 0, 0, 0,
feaa7bba 72 /* w */ 0, 0, 0, 1,
88c8ab1f
SW
73 1, 0, 0, 0
74};
75
5ce13431 76static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
8381e602 77 const struct gfs2_inode *ip, bool nowrap);
ff7f4cb4
SW
78
79
88c8ab1f
SW
80/**
81 * gfs2_setbit - Set a bit in the bitmaps
3e6339dd
SW
82 * @rbm: The position of the bit to set
83 * @do_clone: Also set the clone bitmap, if it exists
88c8ab1f
SW
84 * @new_state: the new state of the block
85 *
86 */
87
3e6339dd 88static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone,
06344b91 89 unsigned char new_state)
88c8ab1f 90{
b45e41d7 91 unsigned char *byte1, *byte2, *end, cur_state;
e579ed4f
BP
92 struct gfs2_bitmap *bi = rbm_bi(rbm);
93 unsigned int buflen = bi->bi_len;
3e6339dd 94 const unsigned int bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
88c8ab1f 95
e579ed4f
BP
96 byte1 = bi->bi_bh->b_data + bi->bi_offset + (rbm->offset / GFS2_NBBY);
97 end = bi->bi_bh->b_data + bi->bi_offset + buflen;
88c8ab1f 98
b45e41d7 99 BUG_ON(byte1 >= end);
88c8ab1f 100
b45e41d7 101 cur_state = (*byte1 >> bit) & GFS2_BIT_MASK;
88c8ab1f 102
b45e41d7 103 if (unlikely(!valid_change[new_state * 4 + cur_state])) {
d77d1b58
JP
104 pr_warn("buf_blk = 0x%x old_state=%d, new_state=%d\n",
105 rbm->offset, cur_state, new_state);
106 pr_warn("rgrp=0x%llx bi_start=0x%x\n",
107 (unsigned long long)rbm->rgd->rd_addr, bi->bi_start);
108 pr_warn("bi_offset=0x%x bi_len=0x%x\n",
109 bi->bi_offset, bi->bi_len);
95c8e17f 110 dump_stack();
3e6339dd 111 gfs2_consist_rgrpd(rbm->rgd);
b45e41d7
SW
112 return;
113 }
114 *byte1 ^= (cur_state ^ new_state) << bit;
115
e579ed4f
BP
116 if (do_clone && bi->bi_clone) {
117 byte2 = bi->bi_clone + bi->bi_offset + (rbm->offset / GFS2_NBBY);
b45e41d7
SW
118 cur_state = (*byte2 >> bit) & GFS2_BIT_MASK;
119 *byte2 ^= (cur_state ^ new_state) << bit;
120 }
88c8ab1f
SW
121}
122
123/**
124 * gfs2_testbit - test a bit in the bitmaps
c04a2ef3 125 * @rbm: The bit to test
dffe12a8
BP
126 * @use_clone: If true, test the clone bitmap, not the official bitmap.
127 *
128 * Some callers like gfs2_unaligned_extlen need to test the clone bitmaps,
129 * not the "real" bitmaps, to avoid allocating recently freed blocks.
88c8ab1f 130 *
c04a2ef3 131 * Returns: The two bit block state of the requested bit
88c8ab1f
SW
132 */
133
dffe12a8 134static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm, bool use_clone)
88c8ab1f 135{
e579ed4f 136 struct gfs2_bitmap *bi = rbm_bi(rbm);
dffe12a8 137 const u8 *buffer;
c04a2ef3 138 const u8 *byte;
88c8ab1f
SW
139 unsigned int bit;
140
dffe12a8
BP
141 if (use_clone && bi->bi_clone)
142 buffer = bi->bi_clone;
143 else
144 buffer = bi->bi_bh->b_data;
145 buffer += bi->bi_offset;
c04a2ef3
SW
146 byte = buffer + (rbm->offset / GFS2_NBBY);
147 bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
88c8ab1f 148
c04a2ef3 149 return (*byte >> bit) & GFS2_BIT_MASK;
88c8ab1f
SW
150}
151
223b2b88
SW
152/**
153 * gfs2_bit_search
154 * @ptr: Pointer to bitmap data
155 * @mask: Mask to use (normally 0x55555.... but adjusted for search start)
156 * @state: The state we are searching for
157 *
158 * We xor the bitmap data with a patter which is the bitwise opposite
159 * of what we are looking for, this gives rise to a pattern of ones
160 * wherever there is a match. Since we have two bits per entry, we
161 * take this pattern, shift it down by one place and then and it with
162 * the original. All the even bit positions (0,2,4, etc) then represent
163 * successful matches, so we mask with 0x55555..... to remove the unwanted
164 * odd bit positions.
165 *
166 * This allows searching of a whole u64 at once (32 blocks) with a
167 * single test (on 64 bit arches).
168 */
169
170static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
171{
172 u64 tmp;
173 static const u64 search[] = {
075ac448
HE
174 [0] = 0xffffffffffffffffULL,
175 [1] = 0xaaaaaaaaaaaaaaaaULL,
176 [2] = 0x5555555555555555ULL,
177 [3] = 0x0000000000000000ULL,
223b2b88
SW
178 };
179 tmp = le64_to_cpu(*ptr) ^ search[state];
180 tmp &= (tmp >> 1);
181 tmp &= mask;
182 return tmp;
183}
184
8e2e0047
BP
185/**
186 * rs_cmp - multi-block reservation range compare
187 * @blk: absolute file system block number of the new reservation
188 * @len: number of blocks in the new reservation
189 * @rs: existing reservation to compare against
190 *
191 * returns: 1 if the block range is beyond the reach of the reservation
192 * -1 if the block range is before the start of the reservation
193 * 0 if the block range overlaps with the reservation
194 */
195static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs)
196{
4a993fb1 197 u64 startblk = gfs2_rbm_to_block(&rs->rs_rbm);
8e2e0047
BP
198
199 if (blk >= startblk + rs->rs_free)
200 return 1;
201 if (blk + len - 1 < startblk)
202 return -1;
203 return 0;
204}
205
88c8ab1f
SW
206/**
207 * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
208 * a block in a given allocation state.
886b1416 209 * @buf: the buffer that holds the bitmaps
223b2b88 210 * @len: the length (in bytes) of the buffer
88c8ab1f 211 * @goal: start search at this block's bit-pair (within @buffer)
223b2b88 212 * @state: GFS2_BLKST_XXX the state of the block we're looking for.
88c8ab1f
SW
213 *
214 * Scope of @goal and returned block number is only within this bitmap buffer,
215 * not entire rgrp or filesystem. @buffer will be offset from the actual
223b2b88
SW
216 * beginning of a bitmap block buffer, skipping any header structures, but
217 * headers are always a multiple of 64 bits long so that the buffer is
218 * always aligned to a 64 bit boundary.
219 *
220 * The size of the buffer is in bytes, but is it assumed that it is
fd589a8f 221 * always ok to read a complete multiple of 64 bits at the end
223b2b88 222 * of the block in case the end is no aligned to a natural boundary.
88c8ab1f
SW
223 *
224 * Return: the block number (bitmap buffer scope) that was found
225 */
226
02ab1721
HE
227static u32 gfs2_bitfit(const u8 *buf, const unsigned int len,
228 u32 goal, u8 state)
88c8ab1f 229{
223b2b88
SW
230 u32 spoint = (goal << 1) & ((8*sizeof(u64)) - 1);
231 const __le64 *ptr = ((__le64 *)buf) + (goal >> 5);
232 const __le64 *end = (__le64 *)(buf + ALIGN(len, sizeof(u64)));
233 u64 tmp;
075ac448 234 u64 mask = 0x5555555555555555ULL;
223b2b88
SW
235 u32 bit;
236
223b2b88
SW
237 /* Mask off bits we don't care about at the start of the search */
238 mask <<= spoint;
239 tmp = gfs2_bit_search(ptr, mask, state);
240 ptr++;
241 while(tmp == 0 && ptr < end) {
075ac448 242 tmp = gfs2_bit_search(ptr, 0x5555555555555555ULL, state);
223b2b88 243 ptr++;
1f466a47 244 }
223b2b88
SW
245 /* Mask off any bits which are more than len bytes from the start */
246 if (ptr == end && (len & (sizeof(u64) - 1)))
247 tmp &= (((u64)~0) >> (64 - 8*(len & (sizeof(u64) - 1))));
248 /* Didn't find anything, so return */
249 if (tmp == 0)
250 return BFITNOENT;
251 ptr--;
d8bd504a 252 bit = __ffs64(tmp);
223b2b88
SW
253 bit /= 2; /* two bits per entry in the bitmap */
254 return (((const unsigned char *)ptr - buf) * GFS2_NBBY) + bit;
88c8ab1f
SW
255}
256
ff7f4cb4
SW
257/**
258 * gfs2_rbm_from_block - Set the rbm based upon rgd and block number
259 * @rbm: The rbm with rgd already set correctly
260 * @block: The block number (filesystem relative)
261 *
262 * This sets the bi and offset members of an rbm based on a
263 * resource group and a filesystem relative block number. The
264 * resource group must be set in the rbm on entry, the bi and
265 * offset members will be set by this function.
266 *
267 * Returns: 0 on success, or an error code
268 */
269
270static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block)
271{
272 u64 rblock = block - rbm->rgd->rd_data0;
ff7f4cb4
SW
273
274 if (WARN_ON_ONCE(rblock > UINT_MAX))
275 return -EINVAL;
276 if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data)
277 return -E2BIG;
278
e579ed4f 279 rbm->bii = 0;
a68a0a35
BP
280 rbm->offset = (u32)(rblock);
281 /* Check if the block is within the first block */
e579ed4f 282 if (rbm->offset < rbm_bi(rbm)->bi_blocks)
a68a0a35
BP
283 return 0;
284
285 /* Adjust for the size diff between gfs2_meta_header and gfs2_rgrp */
286 rbm->offset += (sizeof(struct gfs2_rgrp) -
287 sizeof(struct gfs2_meta_header)) * GFS2_NBBY;
e579ed4f
BP
288 rbm->bii = rbm->offset / rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
289 rbm->offset -= rbm->bii * rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
ff7f4cb4
SW
290 return 0;
291}
292
149ed7f5
BP
293/**
294 * gfs2_rbm_incr - increment an rbm structure
295 * @rbm: The rbm with rgd already set correctly
296 *
297 * This function takes an existing rbm structure and increments it to the next
298 * viable block offset.
299 *
300 * Returns: If incrementing the offset would cause the rbm to go past the
301 * end of the rgrp, true is returned, otherwise false.
302 *
303 */
304
305static bool gfs2_rbm_incr(struct gfs2_rbm *rbm)
306{
307 if (rbm->offset + 1 < rbm_bi(rbm)->bi_blocks) { /* in the same bitmap */
308 rbm->offset++;
309 return false;
310 }
311 if (rbm->bii == rbm->rgd->rd_length - 1) /* at the last bitmap */
312 return true;
313
314 rbm->offset = 0;
315 rbm->bii++;
316 return false;
317}
318
ff7f4cb4
SW
319/**
320 * gfs2_unaligned_extlen - Look for free blocks which are not byte aligned
321 * @rbm: Position to search (value/result)
322 * @n_unaligned: Number of unaligned blocks to check
323 * @len: Decremented for each block found (terminate on zero)
324 *
325 * Returns: true if a non-free block is encountered
326 */
327
328static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len)
329{
ff7f4cb4
SW
330 u32 n;
331 u8 res;
332
333 for (n = 0; n < n_unaligned; n++) {
dffe12a8 334 res = gfs2_testbit(rbm, true);
ff7f4cb4
SW
335 if (res != GFS2_BLKST_FREE)
336 return true;
337 (*len)--;
338 if (*len == 0)
339 return true;
149ed7f5 340 if (gfs2_rbm_incr(rbm))
ff7f4cb4
SW
341 return true;
342 }
343
344 return false;
345}
346
347/**
348 * gfs2_free_extlen - Return extent length of free blocks
27ff6a0f 349 * @rrbm: Starting position
ff7f4cb4
SW
350 * @len: Max length to check
351 *
352 * Starting at the block specified by the rbm, see how many free blocks
353 * there are, not reading more than len blocks ahead. This can be done
354 * using memchr_inv when the blocks are byte aligned, but has to be done
355 * on a block by block basis in case of unaligned blocks. Also this
356 * function can cope with bitmap boundaries (although it must stop on
357 * a resource group boundary)
358 *
359 * Returns: Number of free blocks in the extent
360 */
361
362static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len)
363{
364 struct gfs2_rbm rbm = *rrbm;
365 u32 n_unaligned = rbm.offset & 3;
366 u32 size = len;
367 u32 bytes;
368 u32 chunk_size;
369 u8 *ptr, *start, *end;
370 u64 block;
e579ed4f 371 struct gfs2_bitmap *bi;
ff7f4cb4
SW
372
373 if (n_unaligned &&
374 gfs2_unaligned_extlen(&rbm, 4 - n_unaligned, &len))
375 goto out;
376
3701530a 377 n_unaligned = len & 3;
ff7f4cb4
SW
378 /* Start is now byte aligned */
379 while (len > 3) {
e579ed4f
BP
380 bi = rbm_bi(&rbm);
381 start = bi->bi_bh->b_data;
382 if (bi->bi_clone)
383 start = bi->bi_clone;
e579ed4f 384 start += bi->bi_offset;
dc8fbb03 385 end = start + bi->bi_len;
ff7f4cb4
SW
386 BUG_ON(rbm.offset & 3);
387 start += (rbm.offset / GFS2_NBBY);
388 bytes = min_t(u32, len / GFS2_NBBY, (end - start));
389 ptr = memchr_inv(start, 0, bytes);
390 chunk_size = ((ptr == NULL) ? bytes : (ptr - start));
391 chunk_size *= GFS2_NBBY;
392 BUG_ON(len < chunk_size);
393 len -= chunk_size;
394 block = gfs2_rbm_to_block(&rbm);
15bd50ad
BP
395 if (gfs2_rbm_from_block(&rbm, block + chunk_size)) {
396 n_unaligned = 0;
ff7f4cb4 397 break;
15bd50ad
BP
398 }
399 if (ptr) {
400 n_unaligned = 3;
401 break;
402 }
ff7f4cb4
SW
403 n_unaligned = len & 3;
404 }
405
406 /* Deal with any bits left over at the end */
407 if (n_unaligned)
408 gfs2_unaligned_extlen(&rbm, n_unaligned, &len);
409out:
410 return size - len;
411}
412
88c8ab1f
SW
413/**
414 * gfs2_bitcount - count the number of bits in a certain state
886b1416 415 * @rgd: the resource group descriptor
88c8ab1f
SW
416 * @buffer: the buffer that holds the bitmaps
417 * @buflen: the length (in bytes) of the buffer
418 * @state: the state of the block we're looking for
419 *
420 * Returns: The number of bits
421 */
422
110acf38
SW
423static u32 gfs2_bitcount(struct gfs2_rgrpd *rgd, const u8 *buffer,
424 unsigned int buflen, u8 state)
88c8ab1f 425{
110acf38
SW
426 const u8 *byte = buffer;
427 const u8 *end = buffer + buflen;
428 const u8 state1 = state << 2;
429 const u8 state2 = state << 4;
430 const u8 state3 = state << 6;
cd915493 431 u32 count = 0;
88c8ab1f
SW
432
433 for (; byte < end; byte++) {
434 if (((*byte) & 0x03) == state)
435 count++;
436 if (((*byte) & 0x0C) == state1)
437 count++;
438 if (((*byte) & 0x30) == state2)
439 count++;
440 if (((*byte) & 0xC0) == state3)
441 count++;
442 }
443
444 return count;
445}
446
b3b94faa
DT
447/**
448 * gfs2_rgrp_verify - Verify that a resource group is consistent
b3b94faa
DT
449 * @rgd: the rgrp
450 *
451 */
452
453void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
454{
455 struct gfs2_sbd *sdp = rgd->rd_sbd;
456 struct gfs2_bitmap *bi = NULL;
bb8d8a6f 457 u32 length = rgd->rd_length;
cd915493 458 u32 count[4], tmp;
b3b94faa
DT
459 int buf, x;
460
cd915493 461 memset(count, 0, 4 * sizeof(u32));
b3b94faa
DT
462
463 /* Count # blocks in each of 4 possible allocation states */
464 for (buf = 0; buf < length; buf++) {
465 bi = rgd->rd_bits + buf;
466 for (x = 0; x < 4; x++)
467 count[x] += gfs2_bitcount(rgd,
468 bi->bi_bh->b_data +
469 bi->bi_offset,
470 bi->bi_len, x);
471 }
472
cfc8b549 473 if (count[0] != rgd->rd_free) {
b3b94faa
DT
474 if (gfs2_consist_rgrpd(rgd))
475 fs_err(sdp, "free data mismatch: %u != %u\n",
cfc8b549 476 count[0], rgd->rd_free);
b3b94faa
DT
477 return;
478 }
479
73f74948 480 tmp = rgd->rd_data - rgd->rd_free - rgd->rd_dinodes;
6b946170 481 if (count[1] != tmp) {
b3b94faa
DT
482 if (gfs2_consist_rgrpd(rgd))
483 fs_err(sdp, "used data mismatch: %u != %u\n",
484 count[1], tmp);
485 return;
486 }
487
6b946170 488 if (count[2] + count[3] != rgd->rd_dinodes) {
b3b94faa 489 if (gfs2_consist_rgrpd(rgd))
feaa7bba 490 fs_err(sdp, "used metadata mismatch: %u != %u\n",
6b946170 491 count[2] + count[3], rgd->rd_dinodes);
b3b94faa
DT
492 return;
493 }
b3b94faa
DT
494}
495
b3b94faa
DT
496/**
497 * gfs2_blk2rgrpd - Find resource group for a given data/meta block number
498 * @sdp: The GFS2 superblock
886b1416
BP
499 * @blk: The data block number
500 * @exact: True if this needs to be an exact match
b3b94faa 501 *
90bcab99
SW
502 * The @exact argument should be set to true by most callers. The exception
503 * is when we need to match blocks which are not represented by the rgrp
504 * bitmap, but which are part of the rgrp (i.e. padding blocks) which are
505 * there for alignment purposes. Another way of looking at it is that @exact
506 * matches only valid data/metadata blocks, but with @exact false, it will
507 * match any block within the extent of the rgrp.
508 *
b3b94faa
DT
509 * Returns: The resource group, or NULL if not found
510 */
511
66fc061b 512struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact)
b3b94faa 513{
66fc061b 514 struct rb_node *n, *next;
f75bbfb4 515 struct gfs2_rgrpd *cur;
b3b94faa
DT
516
517 spin_lock(&sdp->sd_rindex_spin);
66fc061b
SW
518 n = sdp->sd_rindex_tree.rb_node;
519 while (n) {
520 cur = rb_entry(n, struct gfs2_rgrpd, rd_node);
521 next = NULL;
7c9ca621 522 if (blk < cur->rd_addr)
66fc061b 523 next = n->rb_left;
f75bbfb4 524 else if (blk >= cur->rd_data0 + cur->rd_data)
66fc061b
SW
525 next = n->rb_right;
526 if (next == NULL) {
b3b94faa 527 spin_unlock(&sdp->sd_rindex_spin);
66fc061b
SW
528 if (exact) {
529 if (blk < cur->rd_addr)
530 return NULL;
531 if (blk >= cur->rd_data0 + cur->rd_data)
532 return NULL;
533 }
7c9ca621 534 return cur;
b3b94faa 535 }
66fc061b 536 n = next;
b3b94faa 537 }
b3b94faa
DT
538 spin_unlock(&sdp->sd_rindex_spin);
539
540 return NULL;
541}
542
543/**
544 * gfs2_rgrpd_get_first - get the first Resource Group in the filesystem
545 * @sdp: The GFS2 superblock
546 *
547 * Returns: The first rgrp in the filesystem
548 */
549
550struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp)
551{
7c9ca621
BP
552 const struct rb_node *n;
553 struct gfs2_rgrpd *rgd;
554
8339ee54 555 spin_lock(&sdp->sd_rindex_spin);
7c9ca621
BP
556 n = rb_first(&sdp->sd_rindex_tree);
557 rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
8339ee54 558 spin_unlock(&sdp->sd_rindex_spin);
7c9ca621
BP
559
560 return rgd;
b3b94faa
DT
561}
562
563/**
564 * gfs2_rgrpd_get_next - get the next RG
886b1416 565 * @rgd: the resource group descriptor
b3b94faa
DT
566 *
567 * Returns: The next rgrp
568 */
569
570struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd)
571{
7c9ca621
BP
572 struct gfs2_sbd *sdp = rgd->rd_sbd;
573 const struct rb_node *n;
574
575 spin_lock(&sdp->sd_rindex_spin);
576 n = rb_next(&rgd->rd_node);
577 if (n == NULL)
578 n = rb_first(&sdp->sd_rindex_tree);
579
580 if (unlikely(&rgd->rd_node == n)) {
581 spin_unlock(&sdp->sd_rindex_spin);
b3b94faa 582 return NULL;
7c9ca621
BP
583 }
584 rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
585 spin_unlock(&sdp->sd_rindex_spin);
586 return rgd;
b3b94faa
DT
587}
588
00a158be
AD
589void check_and_update_goal(struct gfs2_inode *ip)
590{
591 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
592 if (!ip->i_goal || gfs2_blk2rgrpd(sdp, ip->i_goal, 1) == NULL)
593 ip->i_goal = ip->i_no_addr;
594}
595
8339ee54
SW
596void gfs2_free_clones(struct gfs2_rgrpd *rgd)
597{
598 int x;
599
600 for (x = 0; x < rgd->rd_length; x++) {
601 struct gfs2_bitmap *bi = rgd->rd_bits + x;
602 kfree(bi->bi_clone);
603 bi->bi_clone = NULL;
604 }
605}
606
0a305e49 607/**
b54e9a0b
BP
608 * gfs2_rsqa_alloc - make sure we have a reservation assigned to the inode
609 * plus a quota allocations data structure, if necessary
0a305e49
BP
610 * @ip: the inode for this reservation
611 */
b54e9a0b 612int gfs2_rsqa_alloc(struct gfs2_inode *ip)
0a305e49 613{
a097dc7e 614 return gfs2_qa_alloc(ip);
0a305e49
BP
615}
616
9e733d39 617static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs)
8e2e0047 618{
f85c10e2
BP
619 struct gfs2_inode *ip = container_of(rs, struct gfs2_inode, i_res);
620
9e733d39 621 gfs2_print_dbg(seq, " B: n:%llu s:%llu b:%u f:%u\n",
f85c10e2 622 (unsigned long long)ip->i_no_addr,
9e733d39 623 (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm),
4a993fb1 624 rs->rs_rbm.offset, rs->rs_free);
8e2e0047
BP
625}
626
0a305e49 627/**
8e2e0047
BP
628 * __rs_deltree - remove a multi-block reservation from the rgd tree
629 * @rs: The reservation to remove
630 *
631 */
20095218 632static void __rs_deltree(struct gfs2_blkreserv *rs)
8e2e0047
BP
633{
634 struct gfs2_rgrpd *rgd;
635
636 if (!gfs2_rs_active(rs))
637 return;
638
4a993fb1 639 rgd = rs->rs_rbm.rgd;
9e733d39 640 trace_gfs2_rs(rs, TRACE_RS_TREEDEL);
4a993fb1 641 rb_erase(&rs->rs_node, &rgd->rd_rstree);
24d634e8 642 RB_CLEAR_NODE(&rs->rs_node);
8e2e0047
BP
643
644 if (rs->rs_free) {
e579ed4f
BP
645 struct gfs2_bitmap *bi = rbm_bi(&rs->rs_rbm);
646
20095218 647 /* return reserved blocks to the rgrp */
4a993fb1
SW
648 BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free);
649 rs->rs_rbm.rgd->rd_reserved -= rs->rs_free;
5ea5050c
BP
650 /* The rgrp extent failure point is likely not to increase;
651 it will only do so if the freed blocks are somehow
652 contiguous with a span of free blocks that follows. Still,
653 it will force the number to be recalculated later. */
654 rgd->rd_extfail_pt += rs->rs_free;
8e2e0047 655 rs->rs_free = 0;
e579ed4f 656 clear_bit(GBF_FULL, &bi->bi_flags);
8e2e0047 657 }
8e2e0047
BP
658}
659
660/**
661 * gfs2_rs_deltree - remove a multi-block reservation from the rgd tree
662 * @rs: The reservation to remove
663 *
664 */
20095218 665void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
8e2e0047
BP
666{
667 struct gfs2_rgrpd *rgd;
668
4a993fb1
SW
669 rgd = rs->rs_rbm.rgd;
670 if (rgd) {
671 spin_lock(&rgd->rd_rsspin);
20095218 672 __rs_deltree(rs);
44f52122 673 BUG_ON(rs->rs_free);
4a993fb1
SW
674 spin_unlock(&rgd->rd_rsspin);
675 }
8e2e0047
BP
676}
677
678/**
b54e9a0b 679 * gfs2_rsqa_delete - delete a multi-block reservation and quota allocation
0a305e49 680 * @ip: The inode for this reservation
af5c2697 681 * @wcount: The inode's write count, or NULL
0a305e49
BP
682 *
683 */
b54e9a0b 684void gfs2_rsqa_delete(struct gfs2_inode *ip, atomic_t *wcount)
0a305e49
BP
685{
686 down_write(&ip->i_rw_mutex);
44f52122 687 if ((wcount == NULL) || (atomic_read(wcount) <= 1))
a097dc7e 688 gfs2_rs_deltree(&ip->i_res);
0a305e49 689 up_write(&ip->i_rw_mutex);
a097dc7e 690 gfs2_qa_delete(ip, wcount);
0a305e49
BP
691}
692
8e2e0047
BP
693/**
694 * return_all_reservations - return all reserved blocks back to the rgrp.
695 * @rgd: the rgrp that needs its space back
696 *
697 * We previously reserved a bunch of blocks for allocation. Now we need to
698 * give them back. This leave the reservation structures in tact, but removes
699 * all of their corresponding "no-fly zones".
700 */
701static void return_all_reservations(struct gfs2_rgrpd *rgd)
702{
703 struct rb_node *n;
704 struct gfs2_blkreserv *rs;
705
706 spin_lock(&rgd->rd_rsspin);
707 while ((n = rb_first(&rgd->rd_rstree))) {
708 rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
20095218 709 __rs_deltree(rs);
8e2e0047
BP
710 }
711 spin_unlock(&rgd->rd_rsspin);
712}
713
8339ee54 714void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
b3b94faa 715{
7c9ca621 716 struct rb_node *n;
b3b94faa
DT
717 struct gfs2_rgrpd *rgd;
718 struct gfs2_glock *gl;
719
7c9ca621
BP
720 while ((n = rb_first(&sdp->sd_rindex_tree))) {
721 rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
b3b94faa
DT
722 gl = rgd->rd_gl;
723
7c9ca621 724 rb_erase(n, &sdp->sd_rindex_tree);
b3b94faa
DT
725
726 if (gl) {
7023a0b1 727 glock_clear_object(gl, rgd);
b3b94faa
DT
728 gfs2_glock_put(gl);
729 }
730
8339ee54 731 gfs2_free_clones(rgd);
b3b94faa 732 kfree(rgd->rd_bits);
36e4ad03 733 rgd->rd_bits = NULL;
8e2e0047 734 return_all_reservations(rgd);
6bdd9be6 735 kmem_cache_free(gfs2_rgrpd_cachep, rgd);
b3b94faa
DT
736 }
737}
738
bb8d8a6f
SW
739static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd)
740{
d77d1b58
JP
741 pr_info("ri_addr = %llu\n", (unsigned long long)rgd->rd_addr);
742 pr_info("ri_length = %u\n", rgd->rd_length);
743 pr_info("ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0);
744 pr_info("ri_data = %u\n", rgd->rd_data);
745 pr_info("ri_bitbytes = %u\n", rgd->rd_bitbytes);
bb8d8a6f
SW
746}
747
b3b94faa
DT
748/**
749 * gfs2_compute_bitstructs - Compute the bitmap sizes
750 * @rgd: The resource group descriptor
751 *
752 * Calculates bitmap descriptors, one for each block that contains bitmap data
753 *
754 * Returns: errno
755 */
756
757static int compute_bitstructs(struct gfs2_rgrpd *rgd)
758{
759 struct gfs2_sbd *sdp = rgd->rd_sbd;
760 struct gfs2_bitmap *bi;
bb8d8a6f 761 u32 length = rgd->rd_length; /* # blocks in hdr & bitmap */
cd915493 762 u32 bytes_left, bytes;
b3b94faa
DT
763 int x;
764
feaa7bba
SW
765 if (!length)
766 return -EINVAL;
767
dd894be8 768 rgd->rd_bits = kcalloc(length, sizeof(struct gfs2_bitmap), GFP_NOFS);
b3b94faa
DT
769 if (!rgd->rd_bits)
770 return -ENOMEM;
771
bb8d8a6f 772 bytes_left = rgd->rd_bitbytes;
b3b94faa
DT
773
774 for (x = 0; x < length; x++) {
775 bi = rgd->rd_bits + x;
776
60a0b8f9 777 bi->bi_flags = 0;
b3b94faa
DT
778 /* small rgrp; bitmap stored completely in header block */
779 if (length == 1) {
780 bytes = bytes_left;
781 bi->bi_offset = sizeof(struct gfs2_rgrp);
782 bi->bi_start = 0;
783 bi->bi_len = bytes;
7e230f57 784 bi->bi_blocks = bytes * GFS2_NBBY;
b3b94faa
DT
785 /* header block */
786 } else if (x == 0) {
787 bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_rgrp);
788 bi->bi_offset = sizeof(struct gfs2_rgrp);
789 bi->bi_start = 0;
790 bi->bi_len = bytes;
7e230f57 791 bi->bi_blocks = bytes * GFS2_NBBY;
b3b94faa
DT
792 /* last block */
793 } else if (x + 1 == length) {
794 bytes = bytes_left;
795 bi->bi_offset = sizeof(struct gfs2_meta_header);
bb8d8a6f 796 bi->bi_start = rgd->rd_bitbytes - bytes_left;
b3b94faa 797 bi->bi_len = bytes;
7e230f57 798 bi->bi_blocks = bytes * GFS2_NBBY;
b3b94faa
DT
799 /* other blocks */
800 } else {
568f4c96
SW
801 bytes = sdp->sd_sb.sb_bsize -
802 sizeof(struct gfs2_meta_header);
b3b94faa 803 bi->bi_offset = sizeof(struct gfs2_meta_header);
bb8d8a6f 804 bi->bi_start = rgd->rd_bitbytes - bytes_left;
b3b94faa 805 bi->bi_len = bytes;
7e230f57 806 bi->bi_blocks = bytes * GFS2_NBBY;
b3b94faa
DT
807 }
808
809 bytes_left -= bytes;
810 }
811
812 if (bytes_left) {
813 gfs2_consist_rgrpd(rgd);
814 return -EIO;
815 }
816 bi = rgd->rd_bits + (length - 1);
bb8d8a6f 817 if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_data) {
b3b94faa 818 if (gfs2_consist_rgrpd(rgd)) {
bb8d8a6f 819 gfs2_rindex_print(rgd);
b3b94faa
DT
820 fs_err(sdp, "start=%u len=%u offset=%u\n",
821 bi->bi_start, bi->bi_len, bi->bi_offset);
822 }
823 return -EIO;
824 }
825
826 return 0;
827}
828
7ae8fa84
RP
829/**
830 * gfs2_ri_total - Total up the file system space, according to the rindex.
886b1416 831 * @sdp: the filesystem
7ae8fa84
RP
832 *
833 */
834u64 gfs2_ri_total(struct gfs2_sbd *sdp)
835{
836 u64 total_data = 0;
837 struct inode *inode = sdp->sd_rindex;
838 struct gfs2_inode *ip = GFS2_I(inode);
7ae8fa84 839 char buf[sizeof(struct gfs2_rindex)];
7ae8fa84
RP
840 int error, rgrps;
841
7ae8fa84
RP
842 for (rgrps = 0;; rgrps++) {
843 loff_t pos = rgrps * sizeof(struct gfs2_rindex);
844
bcd7278d 845 if (pos + sizeof(struct gfs2_rindex) > i_size_read(inode))
7ae8fa84 846 break;
4306629e 847 error = gfs2_internal_read(ip, buf, &pos,
7ae8fa84
RP
848 sizeof(struct gfs2_rindex));
849 if (error != sizeof(struct gfs2_rindex))
850 break;
bb8d8a6f 851 total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data);
7ae8fa84 852 }
7ae8fa84
RP
853 return total_data;
854}
855
6aad1c3d 856static int rgd_insert(struct gfs2_rgrpd *rgd)
7c9ca621
BP
857{
858 struct gfs2_sbd *sdp = rgd->rd_sbd;
859 struct rb_node **newn = &sdp->sd_rindex_tree.rb_node, *parent = NULL;
860
861 /* Figure out where to put new node */
862 while (*newn) {
863 struct gfs2_rgrpd *cur = rb_entry(*newn, struct gfs2_rgrpd,
864 rd_node);
865
866 parent = *newn;
867 if (rgd->rd_addr < cur->rd_addr)
868 newn = &((*newn)->rb_left);
869 else if (rgd->rd_addr > cur->rd_addr)
870 newn = &((*newn)->rb_right);
871 else
6aad1c3d 872 return -EEXIST;
7c9ca621
BP
873 }
874
875 rb_link_node(&rgd->rd_node, parent, newn);
876 rb_insert_color(&rgd->rd_node, &sdp->sd_rindex_tree);
6aad1c3d
BP
877 sdp->sd_rgrps++;
878 return 0;
7c9ca621
BP
879}
880
b3b94faa 881/**
6c53267f 882 * read_rindex_entry - Pull in a new resource index entry from the disk
4306629e 883 * @ip: Pointer to the rindex inode
b3b94faa 884 *
8339ee54 885 * Returns: 0 on success, > 0 on EOF, error code otherwise
6c53267f
RP
886 */
887
4306629e 888static int read_rindex_entry(struct gfs2_inode *ip)
6c53267f
RP
889{
890 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
7005c3e4 891 const unsigned bsize = sdp->sd_sb.sb_bsize;
6c53267f 892 loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex);
8339ee54 893 struct gfs2_rindex buf;
6c53267f
RP
894 int error;
895 struct gfs2_rgrpd *rgd;
896
8339ee54
SW
897 if (pos >= i_size_read(&ip->i_inode))
898 return 1;
899
4306629e 900 error = gfs2_internal_read(ip, (char *)&buf, &pos,
6c53267f 901 sizeof(struct gfs2_rindex));
8339ee54
SW
902
903 if (error != sizeof(struct gfs2_rindex))
904 return (error == 0) ? 1 : error;
6c53267f 905
6bdd9be6 906 rgd = kmem_cache_zalloc(gfs2_rgrpd_cachep, GFP_NOFS);
6c53267f
RP
907 error = -ENOMEM;
908 if (!rgd)
909 return error;
910
6c53267f 911 rgd->rd_sbd = sdp;
8339ee54
SW
912 rgd->rd_addr = be64_to_cpu(buf.ri_addr);
913 rgd->rd_length = be32_to_cpu(buf.ri_length);
914 rgd->rd_data0 = be64_to_cpu(buf.ri_data0);
915 rgd->rd_data = be32_to_cpu(buf.ri_data);
916 rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes);
8e2e0047 917 spin_lock_init(&rgd->rd_rsspin);
7c9ca621 918
6c53267f
RP
919 error = compute_bitstructs(rgd);
920 if (error)
8339ee54 921 goto fail;
6c53267f 922
bb8d8a6f 923 error = gfs2_glock_get(sdp, rgd->rd_addr,
6c53267f
RP
924 &gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
925 if (error)
8339ee54 926 goto fail;
6c53267f 927
4e2f8849 928 rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr;
0e27c18c 929 rgd->rd_flags &= ~(GFS2_RDF_UPTODATE | GFS2_RDF_PREFERRED);
7c9ca621
BP
930 if (rgd->rd_data > sdp->sd_max_rg_data)
931 sdp->sd_max_rg_data = rgd->rd_data;
8339ee54 932 spin_lock(&sdp->sd_rindex_spin);
6aad1c3d 933 error = rgd_insert(rgd);
8339ee54 934 spin_unlock(&sdp->sd_rindex_spin);
36e4ad03 935 if (!error) {
6f6597ba 936 glock_set_object(rgd->rd_gl, rgd);
36e4ad03
BP
937 rgd->rd_gl->gl_vm.start = (rgd->rd_addr * bsize) & PAGE_MASK;
938 rgd->rd_gl->gl_vm.end = PAGE_ALIGN((rgd->rd_addr +
939 rgd->rd_length) * bsize) - 1;
6aad1c3d 940 return 0;
36e4ad03 941 }
6aad1c3d
BP
942
943 error = 0; /* someone else read in the rgrp; free it and ignore it */
c1ac539e 944 gfs2_glock_put(rgd->rd_gl);
8339ee54
SW
945
946fail:
947 kfree(rgd->rd_bits);
36e4ad03 948 rgd->rd_bits = NULL;
8339ee54 949 kmem_cache_free(gfs2_rgrpd_cachep, rgd);
6c53267f
RP
950 return error;
951}
952
0e27c18c
BP
953/**
954 * set_rgrp_preferences - Run all the rgrps, selecting some we prefer to use
955 * @sdp: the GFS2 superblock
956 *
957 * The purpose of this function is to select a subset of the resource groups
958 * and mark them as PREFERRED. We do it in such a way that each node prefers
959 * to use a unique set of rgrps to minimize glock contention.
960 */
961static void set_rgrp_preferences(struct gfs2_sbd *sdp)
962{
963 struct gfs2_rgrpd *rgd, *first;
964 int i;
965
966 /* Skip an initial number of rgrps, based on this node's journal ID.
967 That should start each node out on its own set. */
968 rgd = gfs2_rgrpd_get_first(sdp);
969 for (i = 0; i < sdp->sd_lockstruct.ls_jid; i++)
970 rgd = gfs2_rgrpd_get_next(rgd);
971 first = rgd;
972
973 do {
974 rgd->rd_flags |= GFS2_RDF_PREFERRED;
975 for (i = 0; i < sdp->sd_journals; i++) {
976 rgd = gfs2_rgrpd_get_next(rgd);
959b6717 977 if (!rgd || rgd == first)
0e27c18c
BP
978 break;
979 }
959b6717 980 } while (rgd && rgd != first);
0e27c18c
BP
981}
982
6c53267f
RP
983/**
984 * gfs2_ri_update - Pull in a new resource index from the disk
985 * @ip: pointer to the rindex inode
986 *
b3b94faa
DT
987 * Returns: 0 on successful update, error code otherwise
988 */
989
8339ee54 990static int gfs2_ri_update(struct gfs2_inode *ip)
b3b94faa 991{
feaa7bba 992 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
b3b94faa
DT
993 int error;
994
8339ee54 995 do {
4306629e 996 error = read_rindex_entry(ip);
8339ee54
SW
997 } while (error == 0);
998
999 if (error < 0)
1000 return error;
b3b94faa 1001
0e27c18c
BP
1002 set_rgrp_preferences(sdp);
1003
cf45b752 1004 sdp->sd_rindex_uptodate = 1;
6c53267f
RP
1005 return 0;
1006}
b3b94faa 1007
b3b94faa 1008/**
8339ee54 1009 * gfs2_rindex_update - Update the rindex if required
b3b94faa 1010 * @sdp: The GFS2 superblock
b3b94faa
DT
1011 *
1012 * We grab a lock on the rindex inode to make sure that it doesn't
1013 * change whilst we are performing an operation. We keep this lock
1014 * for quite long periods of time compared to other locks. This
1015 * doesn't matter, since it is shared and it is very, very rarely
1016 * accessed in the exclusive mode (i.e. only when expanding the filesystem).
1017 *
1018 * This makes sure that we're using the latest copy of the resource index
1019 * special file, which might have been updated if someone expanded the
1020 * filesystem (via gfs2_grow utility), which adds new resource groups.
1021 *
8339ee54 1022 * Returns: 0 on succeess, error code otherwise
b3b94faa
DT
1023 */
1024
8339ee54 1025int gfs2_rindex_update(struct gfs2_sbd *sdp)
b3b94faa 1026{
feaa7bba 1027 struct gfs2_inode *ip = GFS2_I(sdp->sd_rindex);
b3b94faa 1028 struct gfs2_glock *gl = ip->i_gl;
8339ee54
SW
1029 struct gfs2_holder ri_gh;
1030 int error = 0;
a365fbf3 1031 int unlock_required = 0;
b3b94faa
DT
1032
1033 /* Read new copy from disk if we don't have the latest */
cf45b752 1034 if (!sdp->sd_rindex_uptodate) {
a365fbf3
SW
1035 if (!gfs2_glock_is_locked_by_me(gl)) {
1036 error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh);
1037 if (error)
6aad1c3d 1038 return error;
a365fbf3
SW
1039 unlock_required = 1;
1040 }
8339ee54 1041 if (!sdp->sd_rindex_uptodate)
b3b94faa 1042 error = gfs2_ri_update(ip);
a365fbf3
SW
1043 if (unlock_required)
1044 gfs2_glock_dq_uninit(&ri_gh);
b3b94faa
DT
1045 }
1046
1047 return error;
1048}
1049
42d52e38 1050static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf)
bb8d8a6f
SW
1051{
1052 const struct gfs2_rgrp *str = buf;
42d52e38 1053 u32 rg_flags;
bb8d8a6f 1054
42d52e38 1055 rg_flags = be32_to_cpu(str->rg_flags);
09010978 1056 rg_flags &= ~GFS2_RDF_MASK;
1ce97e56
SW
1057 rgd->rd_flags &= GFS2_RDF_MASK;
1058 rgd->rd_flags |= rg_flags;
cfc8b549 1059 rgd->rd_free = be32_to_cpu(str->rg_free);
73f74948 1060 rgd->rd_dinodes = be32_to_cpu(str->rg_dinodes);
d8b71f73 1061 rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration);
166725d9 1062 /* rd_data0, rd_data and rd_bitbytes already set from rindex */
bb8d8a6f
SW
1063}
1064
3f30f929
BP
1065static void gfs2_rgrp_ondisk2lvb(struct gfs2_rgrp_lvb *rgl, const void *buf)
1066{
1067 const struct gfs2_rgrp *str = buf;
1068
1069 rgl->rl_magic = cpu_to_be32(GFS2_MAGIC);
1070 rgl->rl_flags = str->rg_flags;
1071 rgl->rl_free = str->rg_free;
1072 rgl->rl_dinodes = str->rg_dinodes;
1073 rgl->rl_igeneration = str->rg_igeneration;
1074 rgl->__pad = 0UL;
1075}
1076
42d52e38 1077static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
bb8d8a6f 1078{
65adc273 1079 struct gfs2_rgrpd *next = gfs2_rgrpd_get_next(rgd);
bb8d8a6f 1080 struct gfs2_rgrp *str = buf;
850d2d91 1081 u32 crc;
bb8d8a6f 1082
09010978 1083 str->rg_flags = cpu_to_be32(rgd->rd_flags & ~GFS2_RDF_MASK);
cfc8b549 1084 str->rg_free = cpu_to_be32(rgd->rd_free);
73f74948 1085 str->rg_dinodes = cpu_to_be32(rgd->rd_dinodes);
65adc273
AP
1086 if (next == NULL)
1087 str->rg_skip = 0;
1088 else if (next->rd_addr > rgd->rd_addr)
1089 str->rg_skip = cpu_to_be32(next->rd_addr - rgd->rd_addr);
d8b71f73 1090 str->rg_igeneration = cpu_to_be64(rgd->rd_igeneration);
166725d9
AP
1091 str->rg_data0 = cpu_to_be64(rgd->rd_data0);
1092 str->rg_data = cpu_to_be32(rgd->rd_data);
1093 str->rg_bitbytes = cpu_to_be32(rgd->rd_bitbytes);
850d2d91
AP
1094 str->rg_crc = 0;
1095 crc = gfs2_disk_hash(buf, sizeof(struct gfs2_rgrp));
1096 str->rg_crc = cpu_to_be32(crc);
166725d9 1097
bb8d8a6f 1098 memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
3f30f929 1099 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, buf);
bb8d8a6f
SW
1100}
1101
90306c41
BM
1102static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd)
1103{
1104 struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
1105 struct gfs2_rgrp *str = (struct gfs2_rgrp *)rgd->rd_bits[0].bi_bh->b_data;
1106
1107 if (rgl->rl_flags != str->rg_flags || rgl->rl_free != str->rg_free ||
1108 rgl->rl_dinodes != str->rg_dinodes ||
1109 rgl->rl_igeneration != str->rg_igeneration)
1110 return 0;
1111 return 1;
1112}
1113
90306c41
BM
1114static void update_rgrp_lvb_unlinked(struct gfs2_rgrpd *rgd, u32 change)
1115{
1116 struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
1117 u32 unlinked = be32_to_cpu(rgl->rl_unlinked) + change;
1118 rgl->rl_unlinked = cpu_to_be32(unlinked);
1119}
1120
1121static u32 count_unlinked(struct gfs2_rgrpd *rgd)
1122{
1123 struct gfs2_bitmap *bi;
1124 const u32 length = rgd->rd_length;
1125 const u8 *buffer = NULL;
1126 u32 i, goal, count = 0;
1127
1128 for (i = 0, bi = rgd->rd_bits; i < length; i++, bi++) {
1129 goal = 0;
1130 buffer = bi->bi_bh->b_data + bi->bi_offset;
1131 WARN_ON(!buffer_uptodate(bi->bi_bh));
1132 while (goal < bi->bi_len * GFS2_NBBY) {
1133 goal = gfs2_bitfit(buffer, bi->bi_len, goal,
1134 GFS2_BLKST_UNLINKED);
1135 if (goal == BFITNOENT)
1136 break;
1137 count++;
1138 goal++;
1139 }
1140 }
1141
1142 return count;
1143}
1144
1145
b3b94faa 1146/**
90306c41
BM
1147 * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
1148 * @rgd: the struct gfs2_rgrpd describing the RG to read in
b3b94faa
DT
1149 *
1150 * Read in all of a Resource Group's header and bitmap blocks.
1151 * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps.
1152 *
1153 * Returns: errno
1154 */
1155
c2b0b30e 1156static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
b3b94faa
DT
1157{
1158 struct gfs2_sbd *sdp = rgd->rd_sbd;
1159 struct gfs2_glock *gl = rgd->rd_gl;
bb8d8a6f 1160 unsigned int length = rgd->rd_length;
b3b94faa
DT
1161 struct gfs2_bitmap *bi;
1162 unsigned int x, y;
1163 int error;
1164
90306c41
BM
1165 if (rgd->rd_bits[0].bi_bh != NULL)
1166 return 0;
1167
b3b94faa
DT
1168 for (x = 0; x < length; x++) {
1169 bi = rgd->rd_bits + x;
c8d57703 1170 error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, 0, &bi->bi_bh);
b3b94faa
DT
1171 if (error)
1172 goto fail;
1173 }
1174
1175 for (y = length; y--;) {
1176 bi = rgd->rd_bits + y;
7276b3b0 1177 error = gfs2_meta_wait(sdp, bi->bi_bh);
b3b94faa
DT
1178 if (error)
1179 goto fail;
feaa7bba 1180 if (gfs2_metatype_check(sdp, bi->bi_bh, y ? GFS2_METATYPE_RB :
b3b94faa
DT
1181 GFS2_METATYPE_RG)) {
1182 error = -EIO;
1183 goto fail;
1184 }
1185 }
1186
cf45b752 1187 if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) {
60a0b8f9
SW
1188 for (x = 0; x < length; x++)
1189 clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags);
42d52e38 1190 gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data);
1ce97e56 1191 rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
7c9ca621 1192 rgd->rd_free_clone = rgd->rd_free;
5ea5050c
BP
1193 /* max out the rgrp allocation failure point */
1194 rgd->rd_extfail_pt = rgd->rd_free;
b3b94faa 1195 }
951b4bd5 1196 if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) {
90306c41
BM
1197 rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd));
1198 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl,
1199 rgd->rd_bits[0].bi_bh->b_data);
1200 }
1201 else if (sdp->sd_args.ar_rgrplvb) {
1202 if (!gfs2_rgrp_lvb_valid(rgd)){
1203 gfs2_consist_rgrpd(rgd);
1204 error = -EIO;
1205 goto fail;
1206 }
1207 if (rgd->rd_rgl->rl_unlinked == 0)
1208 rgd->rd_flags &= ~GFS2_RDF_CHECK;
1209 }
b3b94faa
DT
1210 return 0;
1211
feaa7bba 1212fail:
b3b94faa
DT
1213 while (x--) {
1214 bi = rgd->rd_bits + x;
1215 brelse(bi->bi_bh);
1216 bi->bi_bh = NULL;
1217 gfs2_assert_warn(sdp, !bi->bi_clone);
1218 }
b3b94faa
DT
1219
1220 return error;
1221}
1222
c2b0b30e 1223static int update_rgrp_lvb(struct gfs2_rgrpd *rgd)
90306c41
BM
1224{
1225 u32 rl_flags;
1226
1227 if (rgd->rd_flags & GFS2_RDF_UPTODATE)
1228 return 0;
1229
951b4bd5 1230 if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic)
90306c41
BM
1231 return gfs2_rgrp_bh_get(rgd);
1232
1233 rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags);
1234 rl_flags &= ~GFS2_RDF_MASK;
1235 rgd->rd_flags &= GFS2_RDF_MASK;
1236 rgd->rd_flags |= (rl_flags | GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
1237 if (rgd->rd_rgl->rl_unlinked == 0)
1238 rgd->rd_flags &= ~GFS2_RDF_CHECK;
1239 rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free);
1240 rgd->rd_free_clone = rgd->rd_free;
1241 rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes);
1242 rgd->rd_igeneration = be64_to_cpu(rgd->rd_rgl->rl_igeneration);
1243 return 0;
1244}
1245
1246int gfs2_rgrp_go_lock(struct gfs2_holder *gh)
1247{
1248 struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
1249 struct gfs2_sbd *sdp = rgd->rd_sbd;
1250
1251 if (gh->gh_flags & GL_SKIP && sdp->sd_args.ar_rgrplvb)
1252 return 0;
8b127d04 1253 return gfs2_rgrp_bh_get(rgd);
90306c41
BM
1254}
1255
b3b94faa 1256/**
39b0f1e9
BP
1257 * gfs2_rgrp_brelse - Release RG bitmaps read in with gfs2_rgrp_bh_get()
1258 * @rgd: The resource group
b3b94faa
DT
1259 *
1260 */
1261
39b0f1e9 1262void gfs2_rgrp_brelse(struct gfs2_rgrpd *rgd)
b3b94faa 1263{
bb8d8a6f 1264 int x, length = rgd->rd_length;
b3b94faa 1265
b3b94faa
DT
1266 for (x = 0; x < length; x++) {
1267 struct gfs2_bitmap *bi = rgd->rd_bits + x;
90306c41
BM
1268 if (bi->bi_bh) {
1269 brelse(bi->bi_bh);
1270 bi->bi_bh = NULL;
1271 }
b3b94faa
DT
1272 }
1273
b3b94faa
DT
1274}
1275
39b0f1e9
BP
1276/**
1277 * gfs2_rgrp_go_unlock - Unlock a rgrp glock
1278 * @gh: The glock holder for the resource group
1279 *
1280 */
1281
1282void gfs2_rgrp_go_unlock(struct gfs2_holder *gh)
1283{
1284 struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
1285 int demote_requested = test_bit(GLF_DEMOTE, &gh->gh_gl->gl_flags) |
1286 test_bit(GLF_PENDING_DEMOTE, &gh->gh_gl->gl_flags);
1287
1288 if (rgd && demote_requested)
1289 gfs2_rgrp_brelse(rgd);
1290}
1291
66fc061b 1292int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
7c9ca621 1293 struct buffer_head *bh,
66fc061b 1294 const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed)
f15ab561
SW
1295{
1296 struct super_block *sb = sdp->sd_vfs;
f15ab561 1297 u64 blk;
64d576ba 1298 sector_t start = 0;
b2c87cae 1299 sector_t nr_blks = 0;
f15ab561
SW
1300 int rv;
1301 unsigned int x;
66fc061b
SW
1302 u32 trimmed = 0;
1303 u8 diff;
f15ab561
SW
1304
1305 for (x = 0; x < bi->bi_len; x++) {
66fc061b
SW
1306 const u8 *clone = bi->bi_clone ? bi->bi_clone : bi->bi_bh->b_data;
1307 clone += bi->bi_offset;
1308 clone += x;
1309 if (bh) {
1310 const u8 *orig = bh->b_data + bi->bi_offset + x;
1311 diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1));
1312 } else {
1313 diff = ~(*clone | (*clone >> 1));
1314 }
f15ab561
SW
1315 diff &= 0x55;
1316 if (diff == 0)
1317 continue;
1318 blk = offset + ((bi->bi_start + x) * GFS2_NBBY);
f15ab561
SW
1319 while(diff) {
1320 if (diff & 1) {
b2c87cae 1321 if (nr_blks == 0)
f15ab561 1322 goto start_new_extent;
b2c87cae
BP
1323 if ((start + nr_blks) != blk) {
1324 if (nr_blks >= minlen) {
1325 rv = sb_issue_discard(sb,
1326 start, nr_blks,
66fc061b
SW
1327 GFP_NOFS, 0);
1328 if (rv)
1329 goto fail;
b2c87cae 1330 trimmed += nr_blks;
66fc061b 1331 }
b2c87cae 1332 nr_blks = 0;
f15ab561
SW
1333start_new_extent:
1334 start = blk;
1335 }
b2c87cae 1336 nr_blks++;
f15ab561
SW
1337 }
1338 diff >>= 2;
b2c87cae 1339 blk++;
f15ab561
SW
1340 }
1341 }
b2c87cae
BP
1342 if (nr_blks >= minlen) {
1343 rv = sb_issue_discard(sb, start, nr_blks, GFP_NOFS, 0);
f15ab561
SW
1344 if (rv)
1345 goto fail;
b2c87cae 1346 trimmed += nr_blks;
f15ab561 1347 }
66fc061b
SW
1348 if (ptrimmed)
1349 *ptrimmed = trimmed;
1350 return 0;
1351
f15ab561 1352fail:
66fc061b 1353 if (sdp->sd_args.ar_discard)
af38816e 1354 fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem\n", rv);
f15ab561 1355 sdp->sd_args.ar_discard = 0;
66fc061b
SW
1356 return -EIO;
1357}
1358
1359/**
1360 * gfs2_fitrim - Generate discard requests for unused bits of the filesystem
1361 * @filp: Any file on the filesystem
1362 * @argp: Pointer to the arguments (also used to pass result)
1363 *
1364 * Returns: 0 on success, otherwise error code
1365 */
1366
1367int gfs2_fitrim(struct file *filp, void __user *argp)
1368{
496ad9aa 1369 struct inode *inode = file_inode(filp);
66fc061b
SW
1370 struct gfs2_sbd *sdp = GFS2_SB(inode);
1371 struct request_queue *q = bdev_get_queue(sdp->sd_vfs->s_bdev);
1372 struct buffer_head *bh;
1373 struct gfs2_rgrpd *rgd;
1374 struct gfs2_rgrpd *rgd_end;
1375 struct gfs2_holder gh;
1376 struct fstrim_range r;
1377 int ret = 0;
1378 u64 amt;
1379 u64 trimmed = 0;
076f0faa 1380 u64 start, end, minlen;
66fc061b 1381 unsigned int x;
076f0faa 1382 unsigned bs_shift = sdp->sd_sb.sb_bsize_shift;
66fc061b
SW
1383
1384 if (!capable(CAP_SYS_ADMIN))
1385 return -EPERM;
1386
1387 if (!blk_queue_discard(q))
1388 return -EOPNOTSUPP;
1389
3a238ade 1390 if (copy_from_user(&r, argp, sizeof(r)))
66fc061b
SW
1391 return -EFAULT;
1392
5e2f7d61
BP
1393 ret = gfs2_rindex_update(sdp);
1394 if (ret)
1395 return ret;
1396
076f0faa
LC
1397 start = r.start >> bs_shift;
1398 end = start + (r.len >> bs_shift);
1399 minlen = max_t(u64, r.minlen,
1400 q->limits.discard_granularity) >> bs_shift;
1401
6a98c333
AD
1402 if (end <= start || minlen > sdp->sd_max_rg_data)
1403 return -EINVAL;
1404
076f0faa 1405 rgd = gfs2_blk2rgrpd(sdp, start, 0);
6a98c333 1406 rgd_end = gfs2_blk2rgrpd(sdp, end, 0);
076f0faa 1407
6a98c333
AD
1408 if ((gfs2_rgrpd_get_first(sdp) == gfs2_rgrpd_get_next(rgd_end))
1409 && (start > rgd_end->rd_data0 + rgd_end->rd_data))
1410 return -EINVAL; /* start is beyond the end of the fs */
66fc061b
SW
1411
1412 while (1) {
1413
1414 ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh);
1415 if (ret)
1416 goto out;
1417
1418 if (!(rgd->rd_flags & GFS2_RGF_TRIMMED)) {
1419 /* Trim each bitmap in the rgrp */
1420 for (x = 0; x < rgd->rd_length; x++) {
1421 struct gfs2_bitmap *bi = rgd->rd_bits + x;
076f0faa
LC
1422 ret = gfs2_rgrp_send_discards(sdp,
1423 rgd->rd_data0, NULL, bi, minlen,
1424 &amt);
66fc061b
SW
1425 if (ret) {
1426 gfs2_glock_dq_uninit(&gh);
1427 goto out;
1428 }
1429 trimmed += amt;
1430 }
1431
1432 /* Mark rgrp as having been trimmed */
1433 ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0);
1434 if (ret == 0) {
1435 bh = rgd->rd_bits[0].bi_bh;
1436 rgd->rd_flags |= GFS2_RGF_TRIMMED;
350a9b0a 1437 gfs2_trans_add_meta(rgd->rd_gl, bh);
66fc061b
SW
1438 gfs2_rgrp_out(rgd, bh->b_data);
1439 gfs2_trans_end(sdp);
1440 }
1441 }
1442 gfs2_glock_dq_uninit(&gh);
1443
1444 if (rgd == rgd_end)
1445 break;
1446
1447 rgd = gfs2_rgrpd_get_next(rgd);
1448 }
1449
1450out:
6a98c333 1451 r.len = trimmed << bs_shift;
3a238ade 1452 if (copy_to_user(argp, &r, sizeof(r)))
66fc061b
SW
1453 return -EFAULT;
1454
1455 return ret;
f15ab561
SW
1456}
1457
8e2e0047
BP
1458/**
1459 * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree
8e2e0047 1460 * @ip: the inode structure
8e2e0047 1461 *
8e2e0047 1462 */
ff7f4cb4 1463static void rs_insert(struct gfs2_inode *ip)
8e2e0047
BP
1464{
1465 struct rb_node **newn, *parent = NULL;
1466 int rc;
a097dc7e 1467 struct gfs2_blkreserv *rs = &ip->i_res;
4a993fb1 1468 struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd;
ff7f4cb4 1469 u64 fsblock = gfs2_rbm_to_block(&rs->rs_rbm);
8e2e0047 1470
8e2e0047 1471 BUG_ON(gfs2_rs_active(rs));
c743ffd0 1472
ff7f4cb4
SW
1473 spin_lock(&rgd->rd_rsspin);
1474 newn = &rgd->rd_rstree.rb_node;
8e2e0047
BP
1475 while (*newn) {
1476 struct gfs2_blkreserv *cur =
1477 rb_entry(*newn, struct gfs2_blkreserv, rs_node);
1478
1479 parent = *newn;
ff7f4cb4 1480 rc = rs_cmp(fsblock, rs->rs_free, cur);
8e2e0047
BP
1481 if (rc > 0)
1482 newn = &((*newn)->rb_right);
1483 else if (rc < 0)
1484 newn = &((*newn)->rb_left);
1485 else {
1486 spin_unlock(&rgd->rd_rsspin);
ff7f4cb4
SW
1487 WARN_ON(1);
1488 return;
8e2e0047
BP
1489 }
1490 }
1491
8e2e0047
BP
1492 rb_link_node(&rs->rs_node, parent, newn);
1493 rb_insert_color(&rs->rs_node, &rgd->rd_rstree);
1494
8e2e0047 1495 /* Do our rgrp accounting for the reservation */
ff7f4cb4 1496 rgd->rd_reserved += rs->rs_free; /* blocks reserved */
8e2e0047 1497 spin_unlock(&rgd->rd_rsspin);
9e733d39 1498 trace_gfs2_rs(rs, TRACE_RS_INSERT);
8e2e0047
BP
1499}
1500
f6753df3
BP
1501/**
1502 * rgd_free - return the number of free blocks we can allocate.
1503 * @rgd: the resource group
1504 *
1505 * This function returns the number of free blocks for an rgrp.
1506 * That's the clone-free blocks (blocks that are free, not including those
1507 * still being used for unlinked files that haven't been deleted.)
1508 *
1509 * It also subtracts any blocks reserved by someone else, but does not
1510 * include free blocks that are still part of our current reservation,
1511 * because obviously we can (and will) allocate them.
1512 */
1513static inline u32 rgd_free(struct gfs2_rgrpd *rgd, struct gfs2_blkreserv *rs)
1514{
1515 u32 tot_reserved, tot_free;
1516
1517 if (WARN_ON_ONCE(rgd->rd_reserved < rs->rs_free))
1518 return 0;
1519 tot_reserved = rgd->rd_reserved - rs->rs_free;
1520
1521 if (rgd->rd_free_clone < tot_reserved)
1522 tot_reserved = 0;
1523
1524 tot_free = rgd->rd_free_clone - tot_reserved;
1525
1526 return tot_free;
1527}
1528
8e2e0047 1529/**
ff7f4cb4 1530 * rg_mblk_search - find a group of multiple free blocks to form a reservation
8e2e0047 1531 * @rgd: the resource group descriptor
8e2e0047 1532 * @ip: pointer to the inode for which we're reserving blocks
7b9cff46 1533 * @ap: the allocation parameters
8e2e0047 1534 *
8e2e0047
BP
1535 */
1536
ff7f4cb4 1537static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
7b9cff46 1538 const struct gfs2_alloc_parms *ap)
8e2e0047 1539{
ff7f4cb4
SW
1540 struct gfs2_rbm rbm = { .rgd = rgd, };
1541 u64 goal;
a097dc7e 1542 struct gfs2_blkreserv *rs = &ip->i_res;
ff7f4cb4 1543 u32 extlen;
f6753df3 1544 u32 free_blocks = rgd_free(rgd, rs);
ff7f4cb4 1545 int ret;
af21ca8e 1546 struct inode *inode = &ip->i_inode;
8e2e0047 1547
af21ca8e
BP
1548 if (S_ISDIR(inode->i_mode))
1549 extlen = 1;
1550 else {
7b9cff46 1551 extlen = max_t(u32, atomic_read(&rs->rs_sizehint), ap->target);
af21ca8e
BP
1552 extlen = clamp(extlen, RGRP_RSRV_MINBLKS, free_blocks);
1553 }
ff7f4cb4 1554 if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen))
c743ffd0
SW
1555 return;
1556
8e2e0047
BP
1557 /* Find bitmap block that contains bits for goal block */
1558 if (rgrp_contains_block(rgd, ip->i_goal))
ff7f4cb4 1559 goal = ip->i_goal;
8e2e0047 1560 else
ff7f4cb4 1561 goal = rgd->rd_last_alloc + rgd->rd_data0;
8e2e0047 1562
ff7f4cb4
SW
1563 if (WARN_ON(gfs2_rbm_from_block(&rbm, goal)))
1564 return;
8e2e0047 1565
8381e602 1566 ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &extlen, ip, true);
ff7f4cb4
SW
1567 if (ret == 0) {
1568 rs->rs_rbm = rbm;
1569 rs->rs_free = extlen;
ff7f4cb4 1570 rs_insert(ip);
13d2eb01
BP
1571 } else {
1572 if (goal == rgd->rd_last_alloc + rgd->rd_data0)
1573 rgd->rd_last_alloc = 0;
8e2e0047 1574 }
b3e47ca0
BP
1575}
1576
5b924ae2
SW
1577/**
1578 * gfs2_next_unreserved_block - Return next block that is not reserved
1579 * @rgd: The resource group
1580 * @block: The starting block
ff7f4cb4 1581 * @length: The required length
5b924ae2
SW
1582 * @ip: Ignore any reservations for this inode
1583 *
1584 * If the block does not appear in any reservation, then return the
1585 * block number unchanged. If it does appear in the reservation, then
1586 * keep looking through the tree of reservations in order to find the
1587 * first block number which is not reserved.
1588 */
1589
1590static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
ff7f4cb4 1591 u32 length,
5b924ae2
SW
1592 const struct gfs2_inode *ip)
1593{
1594 struct gfs2_blkreserv *rs;
1595 struct rb_node *n;
1596 int rc;
1597
1598 spin_lock(&rgd->rd_rsspin);
ff7f4cb4 1599 n = rgd->rd_rstree.rb_node;
5b924ae2
SW
1600 while (n) {
1601 rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
ff7f4cb4 1602 rc = rs_cmp(block, length, rs);
5b924ae2
SW
1603 if (rc < 0)
1604 n = n->rb_left;
1605 else if (rc > 0)
1606 n = n->rb_right;
1607 else
1608 break;
1609 }
1610
1611 if (n) {
a097dc7e 1612 while ((rs_cmp(block, length, rs) == 0) && (&ip->i_res != rs)) {
5b924ae2 1613 block = gfs2_rbm_to_block(&rs->rs_rbm) + rs->rs_free;
ff7f4cb4 1614 n = n->rb_right;
5b924ae2
SW
1615 if (n == NULL)
1616 break;
1617 rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
1618 }
1619 }
1620
1621 spin_unlock(&rgd->rd_rsspin);
1622 return block;
1623}
1624
5b924ae2
SW
1625/**
1626 * gfs2_reservation_check_and_update - Check for reservations during block alloc
1627 * @rbm: The current position in the resource group
ff7f4cb4
SW
1628 * @ip: The inode for which we are searching for blocks
1629 * @minext: The minimum extent length
5ce13431 1630 * @maxext: A pointer to the maximum extent structure
5b924ae2
SW
1631 *
1632 * This checks the current position in the rgrp to see whether there is
1633 * a reservation covering this block. If not then this function is a
1634 * no-op. If there is, then the position is moved to the end of the
1635 * contiguous reservation(s) so that we are pointing at the first
1636 * non-reserved block.
1637 *
1638 * Returns: 0 if no reservation, 1 if @rbm has changed, otherwise an error
1639 */
1640
1641static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm,
ff7f4cb4 1642 const struct gfs2_inode *ip,
5ce13431
BP
1643 u32 minext,
1644 struct gfs2_extent *maxext)
5b924ae2
SW
1645{
1646 u64 block = gfs2_rbm_to_block(rbm);
ff7f4cb4 1647 u32 extlen = 1;
5b924ae2
SW
1648 u64 nblock;
1649 int ret;
1650
ff7f4cb4
SW
1651 /*
1652 * If we have a minimum extent length, then skip over any extent
1653 * which is less than the min extent length in size.
1654 */
1655 if (minext) {
1656 extlen = gfs2_free_extlen(rbm, minext);
5ce13431 1657 if (extlen <= maxext->len)
ff7f4cb4
SW
1658 goto fail;
1659 }
1660
1661 /*
1662 * Check the extent which has been found against the reservations
1663 * and skip if parts of it are already reserved
1664 */
1665 nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip);
5ce13431
BP
1666 if (nblock == block) {
1667 if (!minext || extlen >= minext)
1668 return 0;
1669
1670 if (extlen > maxext->len) {
1671 maxext->len = extlen;
1672 maxext->rbm = *rbm;
1673 }
ff7f4cb4 1674fail:
5ce13431
BP
1675 nblock = block + extlen;
1676 }
5b924ae2
SW
1677 ret = gfs2_rbm_from_block(rbm, nblock);
1678 if (ret < 0)
1679 return ret;
1680 return 1;
1681}
1682
1683/**
1684 * gfs2_rbm_find - Look for blocks of a particular state
1685 * @rbm: Value/result starting position and final position
1686 * @state: The state which we want to find
5ce13431
BP
1687 * @minext: Pointer to the requested extent length (NULL for a single block)
1688 * This is updated to be the actual reservation size.
5b924ae2
SW
1689 * @ip: If set, check for reservations
1690 * @nowrap: Stop looking at the end of the rgrp, rather than wrapping
1691 * around until we've reached the starting point.
1692 *
1693 * Side effects:
1694 * - If looking for free blocks, we set GBF_FULL on each bitmap which
1695 * has no free blocks in it.
5ea5050c
BP
1696 * - If looking for free blocks, we set rd_extfail_pt on each rgrp which
1697 * has come up short on a free block search.
5b924ae2
SW
1698 *
1699 * Returns: 0 on success, -ENOSPC if there is no block of the requested state
1700 */
1701
5ce13431 1702static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
8381e602 1703 const struct gfs2_inode *ip, bool nowrap)
5b924ae2
SW
1704{
1705 struct buffer_head *bh;
e579ed4f 1706 int initial_bii;
5b924ae2 1707 u32 initial_offset;
5ea5050c
BP
1708 int first_bii = rbm->bii;
1709 u32 first_offset = rbm->offset;
5b924ae2
SW
1710 u32 offset;
1711 u8 *buffer;
5b924ae2
SW
1712 int n = 0;
1713 int iters = rbm->rgd->rd_length;
1714 int ret;
e579ed4f 1715 struct gfs2_bitmap *bi;
5ce13431 1716 struct gfs2_extent maxext = { .rbm.rgd = rbm->rgd, };
5b924ae2
SW
1717
1718 /* If we are not starting at the beginning of a bitmap, then we
1719 * need to add one to the bitmap count to ensure that we search
1720 * the starting bitmap twice.
1721 */
1722 if (rbm->offset != 0)
1723 iters++;
1724
1725 while(1) {
e579ed4f 1726 bi = rbm_bi(rbm);
e79e0e14
BP
1727 if ((ip == NULL || !gfs2_rs_active(&ip->i_res)) &&
1728 test_bit(GBF_FULL, &bi->bi_flags) &&
5b924ae2
SW
1729 (state == GFS2_BLKST_FREE))
1730 goto next_bitmap;
1731
e579ed4f
BP
1732 bh = bi->bi_bh;
1733 buffer = bh->b_data + bi->bi_offset;
5b924ae2 1734 WARN_ON(!buffer_uptodate(bh));
e579ed4f
BP
1735 if (state != GFS2_BLKST_UNLINKED && bi->bi_clone)
1736 buffer = bi->bi_clone + bi->bi_offset;
5b924ae2 1737 initial_offset = rbm->offset;
e579ed4f 1738 offset = gfs2_bitfit(buffer, bi->bi_len, rbm->offset, state);
5b924ae2
SW
1739 if (offset == BFITNOENT)
1740 goto bitmap_full;
1741 rbm->offset = offset;
1742 if (ip == NULL)
1743 return 0;
1744
e579ed4f 1745 initial_bii = rbm->bii;
5ce13431
BP
1746 ret = gfs2_reservation_check_and_update(rbm, ip,
1747 minext ? *minext : 0,
1748 &maxext);
5b924ae2
SW
1749 if (ret == 0)
1750 return 0;
1751 if (ret > 0) {
e579ed4f 1752 n += (rbm->bii - initial_bii);
8d8b752a 1753 goto next_iter;
5b924ae2 1754 }
5d50d532 1755 if (ret == -E2BIG) {
e579ed4f 1756 rbm->bii = 0;
5d50d532 1757 rbm->offset = 0;
e579ed4f 1758 n += (rbm->bii - initial_bii);
5d50d532
SW
1759 goto res_covered_end_of_rgrp;
1760 }
5b924ae2
SW
1761 return ret;
1762
1763bitmap_full: /* Mark bitmap as full and fall through */
a3e32136 1764 if ((state == GFS2_BLKST_FREE) && initial_offset == 0)
e579ed4f 1765 set_bit(GBF_FULL, &bi->bi_flags);
5b924ae2
SW
1766
1767next_bitmap: /* Find next bitmap in the rgrp */
1768 rbm->offset = 0;
e579ed4f
BP
1769 rbm->bii++;
1770 if (rbm->bii == rbm->rgd->rd_length)
1771 rbm->bii = 0;
5d50d532 1772res_covered_end_of_rgrp:
e579ed4f 1773 if ((rbm->bii == 0) && nowrap)
5b924ae2
SW
1774 break;
1775 n++;
8d8b752a 1776next_iter:
5b924ae2
SW
1777 if (n >= iters)
1778 break;
1779 }
1780
5ce13431
BP
1781 if (minext == NULL || state != GFS2_BLKST_FREE)
1782 return -ENOSPC;
1783
5ea5050c
BP
1784 /* If the extent was too small, and it's smaller than the smallest
1785 to have failed before, remember for future reference that it's
1786 useless to search this rgrp again for this amount or more. */
1787 if ((first_offset == 0) && (first_bii == 0) &&
1788 (*minext < rbm->rgd->rd_extfail_pt))
1789 rbm->rgd->rd_extfail_pt = *minext;
1790
5ce13431
BP
1791 /* If the maximum extent we found is big enough to fulfill the
1792 minimum requirements, use it anyway. */
1793 if (maxext.len) {
1794 *rbm = maxext.rbm;
1795 *minext = maxext.len;
1796 return 0;
1797 }
1798
5b924ae2
SW
1799 return -ENOSPC;
1800}
1801
c8cdf479
SW
1802/**
1803 * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes
1804 * @rgd: The rgrp
886b1416
BP
1805 * @last_unlinked: block address of the last dinode we unlinked
1806 * @skip: block address we should explicitly not unlink
c8cdf479 1807 *
1a0eae88
BP
1808 * Returns: 0 if no error
1809 * The inode, if one has been found, in inode.
c8cdf479
SW
1810 */
1811
044b9414 1812static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip)
c8cdf479 1813{
5b924ae2 1814 u64 block;
5f3eae75 1815 struct gfs2_sbd *sdp = rgd->rd_sbd;
044b9414
SW
1816 struct gfs2_glock *gl;
1817 struct gfs2_inode *ip;
1818 int error;
1819 int found = 0;
e579ed4f 1820 struct gfs2_rbm rbm = { .rgd = rgd, .bii = 0, .offset = 0 };
c8cdf479 1821
5b924ae2 1822 while (1) {
5f3eae75 1823 down_write(&sdp->sd_log_flush_lock);
5ce13431 1824 error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, NULL, NULL,
8381e602 1825 true);
5f3eae75 1826 up_write(&sdp->sd_log_flush_lock);
5b924ae2
SW
1827 if (error == -ENOSPC)
1828 break;
1829 if (WARN_ON_ONCE(error))
24c73873 1830 break;
b3e47ca0 1831
5b924ae2
SW
1832 block = gfs2_rbm_to_block(&rbm);
1833 if (gfs2_rbm_from_block(&rbm, block + 1))
1834 break;
1835 if (*last_unlinked != NO_BLOCK && block <= *last_unlinked)
c8cdf479 1836 continue;
5b924ae2 1837 if (block == skip)
1e19a195 1838 continue;
5b924ae2 1839 *last_unlinked = block;
044b9414 1840
5ea31bc0 1841 error = gfs2_glock_get(sdp, block, &gfs2_iopen_glops, CREATE, &gl);
044b9414
SW
1842 if (error)
1843 continue;
1844
1845 /* If the inode is already in cache, we can ignore it here
1846 * because the existing inode disposal code will deal with
1847 * it when all refs have gone away. Accessing gl_object like
1848 * this is not safe in general. Here it is ok because we do
1849 * not dereference the pointer, and we only need an approx
1850 * answer to whether it is NULL or not.
1851 */
1852 ip = gl->gl_object;
1853
1854 if (ip || queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0)
1855 gfs2_glock_put(gl);
1856 else
1857 found++;
1858
1859 /* Limit reclaim to sensible number of tasks */
44ad37d6 1860 if (found > NR_CPUS)
044b9414 1861 return;
c8cdf479
SW
1862 }
1863
1864 rgd->rd_flags &= ~GFS2_RDF_CHECK;
044b9414 1865 return;
c8cdf479
SW
1866}
1867
bcd97c06
SW
1868/**
1869 * gfs2_rgrp_congested - Use stats to figure out whether an rgrp is congested
1870 * @rgd: The rgrp in question
1871 * @loops: An indication of how picky we can be (0=very, 1=less so)
1872 *
1873 * This function uses the recently added glock statistics in order to
1874 * figure out whether a parciular resource group is suffering from
1875 * contention from multiple nodes. This is done purely on the basis
1876 * of timings, since this is the only data we have to work with and
1877 * our aim here is to reject a resource group which is highly contended
1878 * but (very important) not to do this too often in order to ensure that
1879 * we do not land up introducing fragmentation by changing resource
1880 * groups when not actually required.
1881 *
1882 * The calculation is fairly simple, we want to know whether the SRTTB
1883 * (i.e. smoothed round trip time for blocking operations) to acquire
1884 * the lock for this rgrp's glock is significantly greater than the
1885 * time taken for resource groups on average. We introduce a margin in
1886 * the form of the variable @var which is computed as the sum of the two
1887 * respective variences, and multiplied by a factor depending on @loops
1888 * and whether we have a lot of data to base the decision on. This is
1889 * then tested against the square difference of the means in order to
1890 * decide whether the result is statistically significant or not.
1891 *
1892 * Returns: A boolean verdict on the congestion status
1893 */
1894
1895static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops)
1896{
1897 const struct gfs2_glock *gl = rgd->rd_gl;
15562c43 1898 const struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
bcd97c06 1899 struct gfs2_lkstats *st;
4d207133
BH
1900 u64 r_dcount, l_dcount;
1901 u64 l_srttb, a_srttb = 0;
bcd97c06 1902 s64 srttb_diff;
4d207133
BH
1903 u64 sqr_diff;
1904 u64 var;
0166b197 1905 int cpu, nonzero = 0;
bcd97c06
SW
1906
1907 preempt_disable();
f4a3ae93
BP
1908 for_each_present_cpu(cpu) {
1909 st = &per_cpu_ptr(sdp->sd_lkstats, cpu)->lkstats[LM_TYPE_RGRP];
0166b197
BP
1910 if (st->stats[GFS2_LKS_SRTTB]) {
1911 a_srttb += st->stats[GFS2_LKS_SRTTB];
1912 nonzero++;
1913 }
f4a3ae93 1914 }
bcd97c06 1915 st = &this_cpu_ptr(sdp->sd_lkstats)->lkstats[LM_TYPE_RGRP];
0166b197
BP
1916 if (nonzero)
1917 do_div(a_srttb, nonzero);
bcd97c06
SW
1918 r_dcount = st->stats[GFS2_LKS_DCOUNT];
1919 var = st->stats[GFS2_LKS_SRTTVARB] +
1920 gl->gl_stats.stats[GFS2_LKS_SRTTVARB];
1921 preempt_enable();
1922
1923 l_srttb = gl->gl_stats.stats[GFS2_LKS_SRTTB];
1924 l_dcount = gl->gl_stats.stats[GFS2_LKS_DCOUNT];
1925
f4a3ae93 1926 if ((l_dcount < 1) || (r_dcount < 1) || (a_srttb == 0))
bcd97c06
SW
1927 return false;
1928
f4a3ae93 1929 srttb_diff = a_srttb - l_srttb;
bcd97c06
SW
1930 sqr_diff = srttb_diff * srttb_diff;
1931
1932 var *= 2;
1933 if (l_dcount < 8 || r_dcount < 8)
1934 var *= 2;
1935 if (loops == 1)
1936 var *= 2;
1937
1938 return ((srttb_diff < 0) && (sqr_diff > var));
1939}
1940
1941/**
1942 * gfs2_rgrp_used_recently
1943 * @rs: The block reservation with the rgrp to test
1944 * @msecs: The time limit in milliseconds
1945 *
1946 * Returns: True if the rgrp glock has been used within the time limit
1947 */
1948static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs,
1949 u64 msecs)
1950{
1951 u64 tdiff;
1952
1953 tdiff = ktime_to_ns(ktime_sub(ktime_get_real(),
1954 rs->rs_rbm.rgd->rd_gl->gl_dstamp));
1955
1956 return tdiff > (msecs * 1000 * 1000);
1957}
1958
9dbe9610
SW
1959static u32 gfs2_orlov_skip(const struct gfs2_inode *ip)
1960{
1961 const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1962 u32 skip;
1963
1964 get_random_bytes(&skip, sizeof(skip));
1965 return skip % sdp->sd_rgrps;
1966}
1967
c743ffd0
SW
1968static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin)
1969{
1970 struct gfs2_rgrpd *rgd = *pos;
aa8920c9 1971 struct gfs2_sbd *sdp = rgd->rd_sbd;
c743ffd0
SW
1972
1973 rgd = gfs2_rgrpd_get_next(rgd);
1974 if (rgd == NULL)
aa8920c9 1975 rgd = gfs2_rgrpd_get_first(sdp);
c743ffd0
SW
1976 *pos = rgd;
1977 if (rgd != begin) /* If we didn't wrap */
1978 return true;
1979 return false;
1980}
1981
0e27c18c
BP
1982/**
1983 * fast_to_acquire - determine if a resource group will be fast to acquire
1984 *
1985 * If this is one of our preferred rgrps, it should be quicker to acquire,
1986 * because we tried to set ourselves up as dlm lock master.
1987 */
1988static inline int fast_to_acquire(struct gfs2_rgrpd *rgd)
1989{
1990 struct gfs2_glock *gl = rgd->rd_gl;
1991
1992 if (gl->gl_state != LM_ST_UNLOCKED && list_empty(&gl->gl_holders) &&
1993 !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) &&
1994 !test_bit(GLF_DEMOTE, &gl->gl_flags))
1995 return 1;
1996 if (rgd->rd_flags & GFS2_RDF_PREFERRED)
1997 return 1;
1998 return 0;
1999}
2000
b3b94faa 2001/**
666d1d8a 2002 * gfs2_inplace_reserve - Reserve space in the filesystem
b3b94faa 2003 * @ip: the inode to reserve space for
7b9cff46 2004 * @ap: the allocation parameters
b3b94faa 2005 *
25435e5e
AD
2006 * We try our best to find an rgrp that has at least ap->target blocks
2007 * available. After a couple of passes (loops == 2), the prospects of finding
2008 * such an rgrp diminish. At this stage, we return the first rgrp that has
2009 * atleast ap->min_target blocks available. Either way, we set ap->allowed to
2010 * the number of blocks available in the chosen rgrp.
2011 *
2012 * Returns: 0 on success,
2013 * -ENOMEM if a suitable rgrp can't be found
2014 * errno otherwise
b3b94faa
DT
2015 */
2016
25435e5e 2017int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
b3b94faa 2018{
feaa7bba 2019 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
8e2e0047 2020 struct gfs2_rgrpd *begin = NULL;
a097dc7e 2021 struct gfs2_blkreserv *rs = &ip->i_res;
bcd97c06 2022 int error = 0, rg_locked, flags = 0;
666d1d8a 2023 u64 last_unlinked = NO_BLOCK;
7c9ca621 2024 int loops = 0;
f6753df3 2025 u32 free_blocks, skip = 0;
b3b94faa 2026
90306c41
BM
2027 if (sdp->sd_args.ar_rgrplvb)
2028 flags |= GL_SKIP;
7b9cff46 2029 if (gfs2_assert_warn(sdp, ap->target))
c743ffd0 2030 return -EINVAL;
8e2e0047 2031 if (gfs2_rs_active(rs)) {
4a993fb1 2032 begin = rs->rs_rbm.rgd;
b7eba890
AG
2033 } else if (rs->rs_rbm.rgd &&
2034 rgrp_contains_block(rs->rs_rbm.rgd, ip->i_goal)) {
2035 begin = rs->rs_rbm.rgd;
8e2e0047 2036 } else {
00a158be 2037 check_and_update_goal(ip);
4a993fb1 2038 rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
8e2e0047 2039 }
7b9cff46 2040 if (S_ISDIR(ip->i_inode.i_mode) && (ap->aflags & GFS2_AF_ORLOV))
9dbe9610 2041 skip = gfs2_orlov_skip(ip);
4a993fb1 2042 if (rs->rs_rbm.rgd == NULL)
7c9ca621
BP
2043 return -EBADSLT;
2044
2045 while (loops < 3) {
c743ffd0
SW
2046 rg_locked = 1;
2047
2048 if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) {
2049 rg_locked = 0;
9dbe9610
SW
2050 if (skip && skip--)
2051 goto next_rgrp;
0e27c18c
BP
2052 if (!gfs2_rs_active(rs)) {
2053 if (loops == 0 &&
2054 !fast_to_acquire(rs->rs_rbm.rgd))
2055 goto next_rgrp;
2056 if ((loops < 2) &&
2057 gfs2_rgrp_used_recently(rs, 1000) &&
2058 gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
2059 goto next_rgrp;
2060 }
4a993fb1 2061 error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl,
8e2e0047
BP
2062 LM_ST_EXCLUSIVE, flags,
2063 &rs->rs_rgd_gh);
c743ffd0
SW
2064 if (unlikely(error))
2065 return error;
bcd97c06
SW
2066 if (!gfs2_rs_active(rs) && (loops < 2) &&
2067 gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
2068 goto skip_rgrp;
c743ffd0 2069 if (sdp->sd_args.ar_rgrplvb) {
4a993fb1 2070 error = update_rgrp_lvb(rs->rs_rbm.rgd);
c743ffd0 2071 if (unlikely(error)) {
90306c41
BM
2072 gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
2073 return error;
2074 }
2075 }
292c8c14 2076 }
666d1d8a 2077
c743ffd0 2078 /* Skip unuseable resource groups */
5ea5050c
BP
2079 if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC |
2080 GFS2_RDF_ERROR)) ||
25435e5e 2081 (loops == 0 && ap->target > rs->rs_rbm.rgd->rd_extfail_pt))
c743ffd0
SW
2082 goto skip_rgrp;
2083
2084 if (sdp->sd_args.ar_rgrplvb)
2085 gfs2_rgrp_bh_get(rs->rs_rbm.rgd);
2086
2087 /* Get a reservation if we don't already have one */
2088 if (!gfs2_rs_active(rs))
7b9cff46 2089 rg_mblk_search(rs->rs_rbm.rgd, ip, ap);
c743ffd0
SW
2090
2091 /* Skip rgrps when we can't get a reservation on first pass */
2092 if (!gfs2_rs_active(rs) && (loops < 1))
2093 goto check_rgrp;
2094
2095 /* If rgrp has enough free space, use it */
f6753df3
BP
2096 free_blocks = rgd_free(rs->rs_rbm.rgd, rs);
2097 if (free_blocks >= ap->target ||
25435e5e 2098 (loops == 2 && ap->min_target &&
f6753df3
BP
2099 free_blocks >= ap->min_target)) {
2100 ap->allowed = free_blocks;
c743ffd0 2101 return 0;
b3b94faa 2102 }
c743ffd0
SW
2103check_rgrp:
2104 /* Check for unlinked inodes which can be reclaimed */
2105 if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK)
2106 try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked,
2107 ip->i_no_addr);
2108skip_rgrp:
1330edbe
BP
2109 /* Drop reservation, if we couldn't use reserved rgrp */
2110 if (gfs2_rs_active(rs))
2111 gfs2_rs_deltree(rs);
2112
c743ffd0
SW
2113 /* Unlock rgrp if required */
2114 if (!rg_locked)
2115 gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
2116next_rgrp:
2117 /* Find the next rgrp, and continue looking */
2118 if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin))
2119 continue;
9dbe9610
SW
2120 if (skip)
2121 continue;
c743ffd0
SW
2122
2123 /* If we've scanned all the rgrps, but found no free blocks
2124 * then this checks for some less likely conditions before
2125 * trying again.
2126 */
c743ffd0
SW
2127 loops++;
2128 /* Check that fs hasn't grown if writing to rindex */
2129 if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) {
2130 error = gfs2_ri_update(ip);
2131 if (error)
2132 return error;
2133 }
2134 /* Flushing the log may release space */
2135 if (loops == 2)
805c0907
BP
2136 gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
2137 GFS2_LFC_INPLACE_RESERVE);
b3b94faa 2138 }
b3b94faa 2139
c743ffd0 2140 return -ENOSPC;
b3b94faa
DT
2141}
2142
2143/**
2144 * gfs2_inplace_release - release an inplace reservation
2145 * @ip: the inode the reservation was taken out on
2146 *
2147 * Release a reservation made by gfs2_inplace_reserve().
2148 */
2149
2150void gfs2_inplace_release(struct gfs2_inode *ip)
2151{
a097dc7e 2152 struct gfs2_blkreserv *rs = &ip->i_res;
b3b94faa 2153
6df9f9a2 2154 if (gfs2_holder_initialized(&rs->rs_rgd_gh))
564e12b1 2155 gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
b3b94faa
DT
2156}
2157
b3e47ca0
BP
2158/**
2159 * gfs2_alloc_extent - allocate an extent from a given bitmap
4a993fb1 2160 * @rbm: the resource group information
b3e47ca0 2161 * @dinode: TRUE if the first block we allocate is for a dinode
c04a2ef3 2162 * @n: The extent length (value/result)
b3e47ca0 2163 *
c04a2ef3 2164 * Add the bitmap buffer to the transaction.
b3e47ca0 2165 * Set the found bits to @new_state to change block's allocation state.
b3e47ca0 2166 */
c04a2ef3 2167static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode,
4a993fb1 2168 unsigned int *n)
b3e47ca0 2169{
c04a2ef3 2170 struct gfs2_rbm pos = { .rgd = rbm->rgd, };
b3e47ca0 2171 const unsigned int elen = *n;
c04a2ef3
SW
2172 u64 block;
2173 int ret;
b3e47ca0 2174
c04a2ef3
SW
2175 *n = 1;
2176 block = gfs2_rbm_to_block(rbm);
e579ed4f 2177 gfs2_trans_add_meta(rbm->rgd->rd_gl, rbm_bi(rbm)->bi_bh);
3e6339dd 2178 gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
c04a2ef3 2179 block++;
60a0b8f9 2180 while (*n < elen) {
c04a2ef3 2181 ret = gfs2_rbm_from_block(&pos, block);
dffe12a8 2182 if (ret || gfs2_testbit(&pos, true) != GFS2_BLKST_FREE)
60a0b8f9 2183 break;
e579ed4f 2184 gfs2_trans_add_meta(pos.rgd->rd_gl, rbm_bi(&pos)->bi_bh);
3e6339dd 2185 gfs2_setbit(&pos, true, GFS2_BLKST_USED);
60a0b8f9 2186 (*n)++;
c04a2ef3 2187 block++;
c8cdf479 2188 }
b3b94faa
DT
2189}
2190
2191/**
2192 * rgblk_free - Change alloc state of given block(s)
2193 * @sdp: the filesystem
2194 * @bstart: the start of a run of blocks to free
2195 * @blen: the length of the block run (all must lie within ONE RG!)
2196 * @new_state: GFS2_BLKST_XXX the after-allocation block state
2197 *
2198 * Returns: Resource group containing the block(s)
2199 */
2200
cd915493
SW
2201static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
2202 u32 blen, unsigned char new_state)
b3b94faa 2203{
3b1d0b9d 2204 struct gfs2_rbm rbm;
d24e0569 2205 struct gfs2_bitmap *bi, *bi_prev = NULL;
b3b94faa 2206
3b1d0b9d
SW
2207 rbm.rgd = gfs2_blk2rgrpd(sdp, bstart, 1);
2208 if (!rbm.rgd) {
b3b94faa 2209 if (gfs2_consist(sdp))
382066da 2210 fs_err(sdp, "block = %llu\n", (unsigned long long)bstart);
b3b94faa
DT
2211 return NULL;
2212 }
2213
d24e0569 2214 gfs2_rbm_from_block(&rbm, bstart);
b3b94faa 2215 while (blen--) {
e579ed4f 2216 bi = rbm_bi(&rbm);
d24e0569
BP
2217 if (bi != bi_prev) {
2218 if (!bi->bi_clone) {
2219 bi->bi_clone = kmalloc(bi->bi_bh->b_size,
2220 GFP_NOFS | __GFP_NOFAIL);
2221 memcpy(bi->bi_clone + bi->bi_offset,
2222 bi->bi_bh->b_data + bi->bi_offset,
2223 bi->bi_len);
2224 }
2225 gfs2_trans_add_meta(rbm.rgd->rd_gl, bi->bi_bh);
2226 bi_prev = bi;
b3b94faa 2227 }
3e6339dd 2228 gfs2_setbit(&rbm, false, new_state);
d24e0569 2229 gfs2_rbm_incr(&rbm);
b3b94faa
DT
2230 }
2231
3b1d0b9d 2232 return rbm.rgd;
b3b94faa
DT
2233}
2234
2235/**
09010978
SW
2236 * gfs2_rgrp_dump - print out an rgrp
2237 * @seq: The iterator
2238 * @gl: The glock in question
2239 *
2240 */
2241
ac3beb6a 2242void gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl)
09010978 2243{
8e2e0047
BP
2244 struct gfs2_rgrpd *rgd = gl->gl_object;
2245 struct gfs2_blkreserv *trs;
2246 const struct rb_node *n;
2247
09010978 2248 if (rgd == NULL)
ac3beb6a 2249 return;
5ea5050c 2250 gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n",
09010978 2251 (unsigned long long)rgd->rd_addr, rgd->rd_flags,
8e2e0047 2252 rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes,
5ea5050c 2253 rgd->rd_reserved, rgd->rd_extfail_pt);
8e2e0047
BP
2254 spin_lock(&rgd->rd_rsspin);
2255 for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) {
2256 trs = rb_entry(n, struct gfs2_blkreserv, rs_node);
2257 dump_rs(seq, trs);
2258 }
2259 spin_unlock(&rgd->rd_rsspin);
09010978
SW
2260}
2261
6050b9c7
SW
2262static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
2263{
2264 struct gfs2_sbd *sdp = rgd->rd_sbd;
2265 fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n",
86d00636 2266 (unsigned long long)rgd->rd_addr);
6050b9c7
SW
2267 fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n");
2268 gfs2_rgrp_dump(NULL, rgd->rd_gl);
2269 rgd->rd_flags |= GFS2_RDF_ERROR;
2270}
2271
8e2e0047 2272/**
5b924ae2
SW
2273 * gfs2_adjust_reservation - Adjust (or remove) a reservation after allocation
2274 * @ip: The inode we have just allocated blocks for
2275 * @rbm: The start of the allocated blocks
2276 * @len: The extent length
8e2e0047 2277 *
5b924ae2
SW
2278 * Adjusts a reservation after an allocation has taken place. If the
2279 * reservation does not match the allocation, or if it is now empty
2280 * then it is removed.
8e2e0047 2281 */
5b924ae2
SW
2282
2283static void gfs2_adjust_reservation(struct gfs2_inode *ip,
2284 const struct gfs2_rbm *rbm, unsigned len)
8e2e0047 2285{
a097dc7e 2286 struct gfs2_blkreserv *rs = &ip->i_res;
5b924ae2
SW
2287 struct gfs2_rgrpd *rgd = rbm->rgd;
2288 unsigned rlen;
2289 u64 block;
2290 int ret;
8e2e0047 2291
5b924ae2
SW
2292 spin_lock(&rgd->rd_rsspin);
2293 if (gfs2_rs_active(rs)) {
2294 if (gfs2_rbm_eq(&rs->rs_rbm, rbm)) {
2295 block = gfs2_rbm_to_block(rbm);
2296 ret = gfs2_rbm_from_block(&rs->rs_rbm, block + len);
2297 rlen = min(rs->rs_free, len);
2298 rs->rs_free -= rlen;
2299 rgd->rd_reserved -= rlen;
9e733d39 2300 trace_gfs2_rs(rs, TRACE_RS_CLAIM);
5b924ae2
SW
2301 if (rs->rs_free && !ret)
2302 goto out;
1a855033
BP
2303 /* We used up our block reservation, so we should
2304 reserve more blocks next time. */
2305 atomic_add(RGRP_RSRV_ADDBLKS, &rs->rs_sizehint);
5b924ae2 2306 }
20095218 2307 __rs_deltree(rs);
8e2e0047 2308 }
5b924ae2
SW
2309out:
2310 spin_unlock(&rgd->rd_rsspin);
8e2e0047
BP
2311}
2312
9e07f2cb
SW
2313/**
2314 * gfs2_set_alloc_start - Set starting point for block allocation
2315 * @rbm: The rbm which will be set to the required location
2316 * @ip: The gfs2 inode
2317 * @dinode: Flag to say if allocation includes a new inode
2318 *
2319 * This sets the starting point from the reservation if one is active
2320 * otherwise it falls back to guessing a start point based on the
2321 * inode's goal block or the last allocation point in the rgrp.
2322 */
2323
2324static void gfs2_set_alloc_start(struct gfs2_rbm *rbm,
2325 const struct gfs2_inode *ip, bool dinode)
2326{
2327 u64 goal;
2328
a097dc7e
BP
2329 if (gfs2_rs_active(&ip->i_res)) {
2330 *rbm = ip->i_res.rs_rbm;
9e07f2cb
SW
2331 return;
2332 }
2333
2334 if (!dinode && rgrp_contains_block(rbm->rgd, ip->i_goal))
2335 goal = ip->i_goal;
2336 else
2337 goal = rbm->rgd->rd_last_alloc + rbm->rgd->rd_data0;
2338
2339 gfs2_rbm_from_block(rbm, goal);
2340}
2341
09010978 2342/**
6e87ed0f 2343 * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode
1639431a 2344 * @ip: the inode to allocate the block for
09010978 2345 * @bn: Used to return the starting block number
8e2e0047 2346 * @nblocks: requested number of blocks/extent length (value/result)
6e87ed0f 2347 * @dinode: 1 if we're allocating a dinode block, else 0
3c5d785a 2348 * @generation: the generation number of the inode
b3b94faa 2349 *
09010978 2350 * Returns: 0 or error
b3b94faa
DT
2351 */
2352
6a8099ed 2353int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
6e87ed0f 2354 bool dinode, u64 *generation)
b3b94faa 2355{
feaa7bba 2356 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
d9ba7615 2357 struct buffer_head *dibh;
b7eba890 2358 struct gfs2_rbm rbm = { .rgd = ip->i_res.rs_rbm.rgd, };
6a8099ed 2359 unsigned int ndata;
3c5d785a 2360 u64 block; /* block, within the file system scope */
d9ba7615 2361 int error;
b3b94faa 2362
9e07f2cb 2363 gfs2_set_alloc_start(&rbm, ip, dinode);
8381e602 2364 error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, ip, false);
62e252ee 2365
137834a6 2366 if (error == -ENOSPC) {
9e07f2cb 2367 gfs2_set_alloc_start(&rbm, ip, dinode);
8381e602 2368 error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, NULL, false);
137834a6
SW
2369 }
2370
62e252ee 2371 /* Since all blocks are reserved in advance, this shouldn't happen */
5b924ae2 2372 if (error) {
5ea5050c 2373 fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, full=%d fail_pt=%d\n",
9e733d39 2374 (unsigned long long)ip->i_no_addr, error, *nblocks,
5ea5050c
BP
2375 test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags),
2376 rbm.rgd->rd_extfail_pt);
62e252ee 2377 goto rgrp_error;
8e2e0047 2378 }
62e252ee 2379
c04a2ef3
SW
2380 gfs2_alloc_extent(&rbm, dinode, nblocks);
2381 block = gfs2_rbm_to_block(&rbm);
c743ffd0 2382 rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0;
a097dc7e 2383 if (gfs2_rs_active(&ip->i_res))
5b924ae2 2384 gfs2_adjust_reservation(ip, &rbm, *nblocks);
6a8099ed
SW
2385 ndata = *nblocks;
2386 if (dinode)
2387 ndata--;
b3e47ca0 2388
3c5d785a 2389 if (!dinode) {
6a8099ed 2390 ip->i_goal = block + ndata - 1;
3c5d785a
BP
2391 error = gfs2_meta_inode_buffer(ip, &dibh);
2392 if (error == 0) {
2393 struct gfs2_dinode *di =
2394 (struct gfs2_dinode *)dibh->b_data;
350a9b0a 2395 gfs2_trans_add_meta(ip->i_gl, dibh);
3c5d785a
BP
2396 di->di_goal_meta = di->di_goal_data =
2397 cpu_to_be64(ip->i_goal);
2398 brelse(dibh);
2399 }
d9ba7615 2400 }
4a993fb1 2401 if (rbm.rgd->rd_free < *nblocks) {
fc554ed3 2402 pr_warn("nblocks=%u\n", *nblocks);
09010978 2403 goto rgrp_error;
8e2e0047 2404 }
09010978 2405
4a993fb1 2406 rbm.rgd->rd_free -= *nblocks;
3c5d785a 2407 if (dinode) {
4a993fb1
SW
2408 rbm.rgd->rd_dinodes++;
2409 *generation = rbm.rgd->rd_igeneration++;
3c5d785a 2410 if (*generation == 0)
4a993fb1 2411 *generation = rbm.rgd->rd_igeneration++;
3c5d785a 2412 }
b3b94faa 2413
350a9b0a 2414 gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh);
4a993fb1 2415 gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data);
b3b94faa 2416
6a8099ed 2417 gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0);
3c5d785a 2418 if (dinode)
b2c8b3ea 2419 gfs2_trans_add_unrevoke(sdp, block, *nblocks);
6a8099ed 2420
fd4b4e04 2421 gfs2_quota_change(ip, *nblocks, ip->i_inode.i_uid, ip->i_inode.i_gid);
b3b94faa 2422
4a993fb1
SW
2423 rbm.rgd->rd_free_clone -= *nblocks;
2424 trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks,
6e87ed0f 2425 dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
6050b9c7
SW
2426 *bn = block;
2427 return 0;
2428
2429rgrp_error:
4a993fb1 2430 gfs2_rgrp_error(rbm.rgd);
6050b9c7 2431 return -EIO;
b3b94faa
DT
2432}
2433
2434/**
46fcb2ed 2435 * __gfs2_free_blocks - free a contiguous run of block(s)
b3b94faa
DT
2436 * @ip: the inode these blocks are being freed from
2437 * @bstart: first block of a run of contiguous blocks
2438 * @blen: the length of the block run
46fcb2ed 2439 * @meta: 1 if the blocks represent metadata
b3b94faa
DT
2440 *
2441 */
2442
46fcb2ed 2443void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)
b3b94faa 2444{
feaa7bba 2445 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
b3b94faa
DT
2446 struct gfs2_rgrpd *rgd;
2447
2448 rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
2449 if (!rgd)
2450 return;
41db1ab9 2451 trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE);
cfc8b549 2452 rgd->rd_free += blen;
66fc061b 2453 rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
350a9b0a 2454 gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
42d52e38 2455 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
b3b94faa 2456
6d3117b4 2457 /* Directories keep their data in the metadata address space */
46fcb2ed 2458 if (meta || ip->i_depth)
6d3117b4 2459 gfs2_meta_wipe(ip, bstart, blen);
4c16c36a 2460}
b3b94faa 2461
4c16c36a
BP
2462/**
2463 * gfs2_free_meta - free a contiguous run of data block(s)
2464 * @ip: the inode these blocks are being freed from
2465 * @bstart: first block of a run of contiguous blocks
2466 * @blen: the length of the block run
2467 *
2468 */
2469
2470void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
2471{
2472 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2473
46fcb2ed 2474 __gfs2_free_blocks(ip, bstart, blen, 1);
b3b94faa 2475 gfs2_statfs_change(sdp, 0, +blen, 0);
2933f925 2476 gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
b3b94faa
DT
2477}
2478
feaa7bba
SW
2479void gfs2_unlink_di(struct inode *inode)
2480{
2481 struct gfs2_inode *ip = GFS2_I(inode);
2482 struct gfs2_sbd *sdp = GFS2_SB(inode);
2483 struct gfs2_rgrpd *rgd;
dbb7cae2 2484 u64 blkno = ip->i_no_addr;
feaa7bba
SW
2485
2486 rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED);
2487 if (!rgd)
2488 return;
41db1ab9 2489 trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
350a9b0a 2490 gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
42d52e38 2491 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
90306c41 2492 update_rgrp_lvb_unlinked(rgd, 1);
feaa7bba
SW
2493}
2494
a18c78c5 2495void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
b3b94faa
DT
2496{
2497 struct gfs2_sbd *sdp = rgd->rd_sbd;
2498 struct gfs2_rgrpd *tmp_rgd;
2499
a18c78c5 2500 tmp_rgd = rgblk_free(sdp, ip->i_no_addr, 1, GFS2_BLKST_FREE);
b3b94faa
DT
2501 if (!tmp_rgd)
2502 return;
2503 gfs2_assert_withdraw(sdp, rgd == tmp_rgd);
2504
73f74948 2505 if (!rgd->rd_dinodes)
b3b94faa 2506 gfs2_consist_rgrpd(rgd);
73f74948 2507 rgd->rd_dinodes--;
cfc8b549 2508 rgd->rd_free++;
b3b94faa 2509
350a9b0a 2510 gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
42d52e38 2511 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
90306c41 2512 update_rgrp_lvb_unlinked(rgd, -1);
b3b94faa
DT
2513
2514 gfs2_statfs_change(sdp, 0, +1, -1);
41db1ab9 2515 trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
2933f925 2516 gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
dbb7cae2 2517 gfs2_meta_wipe(ip, ip->i_no_addr, 1);
b3b94faa
DT
2518}
2519
acf7e244
SW
2520/**
2521 * gfs2_check_blk_type - Check the type of a block
2522 * @sdp: The superblock
2523 * @no_addr: The block number to check
2524 * @type: The block type we are looking for
2525 *
2526 * Returns: 0 if the block type matches the expected type
2527 * -ESTALE if it doesn't match
2528 * or -ve errno if something went wrong while checking
2529 */
2530
2531int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
2532{
2533 struct gfs2_rgrpd *rgd;
8339ee54 2534 struct gfs2_holder rgd_gh;
dffe12a8 2535 struct gfs2_rbm rbm;
58884c4d 2536 int error = -EINVAL;
acf7e244 2537
66fc061b 2538 rgd = gfs2_blk2rgrpd(sdp, no_addr, 1);
acf7e244 2539 if (!rgd)
8339ee54 2540 goto fail;
acf7e244
SW
2541
2542 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh);
2543 if (error)
8339ee54 2544 goto fail;
acf7e244 2545
dffe12a8
BP
2546 rbm.rgd = rgd;
2547 error = gfs2_rbm_from_block(&rbm, no_addr);
2548 WARN_ON_ONCE(error != 0);
2549
2550 if (gfs2_testbit(&rbm, false) != type)
acf7e244
SW
2551 error = -ESTALE;
2552
2553 gfs2_glock_dq_uninit(&rgd_gh);
acf7e244
SW
2554fail:
2555 return error;
2556}
2557
b3b94faa
DT
2558/**
2559 * gfs2_rlist_add - add a RG to a list of RGs
70b0c365 2560 * @ip: the inode
b3b94faa
DT
2561 * @rlist: the list of resource groups
2562 * @block: the block
2563 *
2564 * Figure out what RG a block belongs to and add that RG to the list
2565 *
2566 * FIXME: Don't use NOFAIL
2567 *
2568 */
2569
70b0c365 2570void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
cd915493 2571 u64 block)
b3b94faa 2572{
70b0c365 2573 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
b3b94faa
DT
2574 struct gfs2_rgrpd *rgd;
2575 struct gfs2_rgrpd **tmp;
2576 unsigned int new_space;
2577 unsigned int x;
2578
2579 if (gfs2_assert_warn(sdp, !rlist->rl_ghs))
2580 return;
2581
03f8c41c
AG
2582 /*
2583 * The resource group last accessed is kept in the last position.
2584 */
2585
2586 if (rlist->rl_rgrps) {
2587 rgd = rlist->rl_rgd[rlist->rl_rgrps - 1];
2588 if (rgrp_contains_block(rgd, block))
2589 return;
66fc061b 2590 rgd = gfs2_blk2rgrpd(sdp, block, 1);
03f8c41c 2591 } else {
b7eba890 2592 rgd = ip->i_res.rs_rbm.rgd;
03f8c41c
AG
2593 if (!rgd || !rgrp_contains_block(rgd, block))
2594 rgd = gfs2_blk2rgrpd(sdp, block, 1);
2595 }
2596
b3b94faa 2597 if (!rgd) {
03f8c41c
AG
2598 fs_err(sdp, "rlist_add: no rgrp for block %llu\n",
2599 (unsigned long long)block);
b3b94faa
DT
2600 return;
2601 }
2602
03f8c41c
AG
2603 for (x = 0; x < rlist->rl_rgrps; x++) {
2604 if (rlist->rl_rgd[x] == rgd) {
2605 swap(rlist->rl_rgd[x],
2606 rlist->rl_rgd[rlist->rl_rgrps - 1]);
b3b94faa 2607 return;
03f8c41c
AG
2608 }
2609 }
b3b94faa
DT
2610
2611 if (rlist->rl_rgrps == rlist->rl_space) {
2612 new_space = rlist->rl_space + 10;
2613
2614 tmp = kcalloc(new_space, sizeof(struct gfs2_rgrpd *),
dd894be8 2615 GFP_NOFS | __GFP_NOFAIL);
b3b94faa
DT
2616
2617 if (rlist->rl_rgd) {
2618 memcpy(tmp, rlist->rl_rgd,
2619 rlist->rl_space * sizeof(struct gfs2_rgrpd *));
2620 kfree(rlist->rl_rgd);
2621 }
2622
2623 rlist->rl_space = new_space;
2624 rlist->rl_rgd = tmp;
2625 }
2626
2627 rlist->rl_rgd[rlist->rl_rgrps++] = rgd;
2628}
2629
2630/**
2631 * gfs2_rlist_alloc - all RGs have been added to the rlist, now allocate
2632 * and initialize an array of glock holders for them
2633 * @rlist: the list of resource groups
2634 * @state: the lock state to acquire the RG lock in
b3b94faa
DT
2635 *
2636 * FIXME: Don't use NOFAIL
2637 *
2638 */
2639
fe6c991c 2640void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state)
b3b94faa
DT
2641{
2642 unsigned int x;
2643
6da2ec56
KC
2644 rlist->rl_ghs = kmalloc_array(rlist->rl_rgrps,
2645 sizeof(struct gfs2_holder),
2646 GFP_NOFS | __GFP_NOFAIL);
b3b94faa
DT
2647 for (x = 0; x < rlist->rl_rgrps; x++)
2648 gfs2_holder_init(rlist->rl_rgd[x]->rd_gl,
fe6c991c 2649 state, 0,
b3b94faa
DT
2650 &rlist->rl_ghs[x]);
2651}
2652
2653/**
2654 * gfs2_rlist_free - free a resource group list
27ff6a0f 2655 * @rlist: the list of resource groups
b3b94faa
DT
2656 *
2657 */
2658
2659void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
2660{
2661 unsigned int x;
2662
2663 kfree(rlist->rl_rgd);
2664
2665 if (rlist->rl_ghs) {
2666 for (x = 0; x < rlist->rl_rgrps; x++)
2667 gfs2_holder_uninit(&rlist->rl_ghs[x]);
2668 kfree(rlist->rl_ghs);
8e2e0047 2669 rlist->rl_ghs = NULL;
b3b94faa
DT
2670 }
2671}
2672