]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - fs/xfs/xfs_alloc.c
[XFS] implement generic xfs_btree_updkey
[mirror_ubuntu-bionic-kernel.git] / fs / xfs / xfs_alloc.c
CommitLineData
1da177e4 1/*
7b718769
NS
2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
1da177e4 4 *
7b718769
NS
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
1da177e4
LT
7 * published by the Free Software Foundation.
8 *
7b718769
NS
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
1da177e4 13 *
7b718769
NS
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1da177e4 17 */
1da177e4 18#include "xfs.h"
a844f451 19#include "xfs_fs.h"
1da177e4 20#include "xfs_types.h"
a844f451 21#include "xfs_bit.h"
1da177e4 22#include "xfs_log.h"
a844f451 23#include "xfs_inum.h"
1da177e4
LT
24#include "xfs_trans.h"
25#include "xfs_sb.h"
26#include "xfs_ag.h"
a844f451 27#include "xfs_dir2.h"
1da177e4
LT
28#include "xfs_dmapi.h"
29#include "xfs_mount.h"
1da177e4 30#include "xfs_bmap_btree.h"
a844f451 31#include "xfs_alloc_btree.h"
1da177e4 32#include "xfs_ialloc_btree.h"
a844f451
NS
33#include "xfs_dir2_sf.h"
34#include "xfs_attr_sf.h"
35#include "xfs_dinode.h"
36#include "xfs_inode.h"
1da177e4
LT
37#include "xfs_btree.h"
38#include "xfs_ialloc.h"
39#include "xfs_alloc.h"
1da177e4
LT
40#include "xfs_error.h"
41
42
43#define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
44
45#define XFSA_FIXUP_BNO_OK 1
46#define XFSA_FIXUP_CNT_OK 2
47
f4586e40 48STATIC void
1da177e4
LT
49xfs_alloc_search_busy(xfs_trans_t *tp,
50 xfs_agnumber_t agno,
51 xfs_agblock_t bno,
52 xfs_extlen_t len);
53
54#if defined(XFS_ALLOC_TRACE)
55ktrace_t *xfs_alloc_trace_buf;
56
57#define TRACE_ALLOC(s,a) \
34a622b2 58 xfs_alloc_trace_alloc(__func__, s, a, __LINE__)
1da177e4 59#define TRACE_FREE(s,a,b,x,f) \
34a622b2 60 xfs_alloc_trace_free(__func__, s, mp, a, b, x, f, __LINE__)
1da177e4 61#define TRACE_MODAGF(s,a,f) \
34a622b2
HH
62 xfs_alloc_trace_modagf(__func__, s, mp, a, f, __LINE__)
63#define TRACE_BUSY(__func__,s,ag,agb,l,sl,tp) \
64 xfs_alloc_trace_busy(__func__, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSY, __LINE__)
65#define TRACE_UNBUSY(__func__,s,ag,sl,tp) \
66 xfs_alloc_trace_busy(__func__, s, mp, ag, -1, -1, sl, tp, XFS_ALLOC_KTRACE_UNBUSY, __LINE__)
f4586e40
DC
67#define TRACE_BUSYSEARCH(__func__,s,ag,agb,l,tp) \
68 xfs_alloc_trace_busy(__func__, s, mp, ag, agb, l, 0, tp, XFS_ALLOC_KTRACE_BUSYSEARCH, __LINE__)
1da177e4
LT
69#else
70#define TRACE_ALLOC(s,a)
71#define TRACE_FREE(s,a,b,x,f)
72#define TRACE_MODAGF(s,a,f)
73#define TRACE_BUSY(s,a,ag,agb,l,sl,tp)
74#define TRACE_UNBUSY(fname,s,ag,sl,tp)
f4586e40 75#define TRACE_BUSYSEARCH(fname,s,ag,agb,l,tp)
1da177e4
LT
76#endif /* XFS_ALLOC_TRACE */
77
78/*
79 * Prototypes for per-ag allocation routines
80 */
81
82STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
83STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
84STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
85STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
86 xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
87
88/*
89 * Internal functions.
90 */
91
fe033cc8
CH
92/*
93 * Lookup the record equal to [bno, len] in the btree given by cur.
94 */
95STATIC int /* error */
96xfs_alloc_lookup_eq(
97 struct xfs_btree_cur *cur, /* btree cursor */
98 xfs_agblock_t bno, /* starting block of extent */
99 xfs_extlen_t len, /* length of extent */
100 int *stat) /* success/failure */
101{
102 cur->bc_rec.a.ar_startblock = bno;
103 cur->bc_rec.a.ar_blockcount = len;
104 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
105}
106
107/*
108 * Lookup the first record greater than or equal to [bno, len]
109 * in the btree given by cur.
110 */
111STATIC int /* error */
112xfs_alloc_lookup_ge(
113 struct xfs_btree_cur *cur, /* btree cursor */
114 xfs_agblock_t bno, /* starting block of extent */
115 xfs_extlen_t len, /* length of extent */
116 int *stat) /* success/failure */
117{
118 cur->bc_rec.a.ar_startblock = bno;
119 cur->bc_rec.a.ar_blockcount = len;
120 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
121}
122
123/*
124 * Lookup the first record less than or equal to [bno, len]
125 * in the btree given by cur.
126 */
127STATIC int /* error */
128xfs_alloc_lookup_le(
129 struct xfs_btree_cur *cur, /* btree cursor */
130 xfs_agblock_t bno, /* starting block of extent */
131 xfs_extlen_t len, /* length of extent */
132 int *stat) /* success/failure */
133{
134 cur->bc_rec.a.ar_startblock = bno;
135 cur->bc_rec.a.ar_blockcount = len;
136 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
137}
138
139
1da177e4
LT
140/*
141 * Compute aligned version of the found extent.
142 * Takes alignment and min length into account.
143 */
12375c82 144STATIC void
1da177e4
LT
145xfs_alloc_compute_aligned(
146 xfs_agblock_t foundbno, /* starting block in found extent */
147 xfs_extlen_t foundlen, /* length in found extent */
148 xfs_extlen_t alignment, /* alignment for allocation */
149 xfs_extlen_t minlen, /* minimum length for allocation */
150 xfs_agblock_t *resbno, /* result block number */
151 xfs_extlen_t *reslen) /* result length */
152{
153 xfs_agblock_t bno;
154 xfs_extlen_t diff;
155 xfs_extlen_t len;
156
157 if (alignment > 1 && foundlen >= minlen) {
158 bno = roundup(foundbno, alignment);
159 diff = bno - foundbno;
160 len = diff >= foundlen ? 0 : foundlen - diff;
161 } else {
162 bno = foundbno;
163 len = foundlen;
164 }
165 *resbno = bno;
166 *reslen = len;
1da177e4
LT
167}
168
169/*
170 * Compute best start block and diff for "near" allocations.
171 * freelen >= wantlen already checked by caller.
172 */
173STATIC xfs_extlen_t /* difference value (absolute) */
174xfs_alloc_compute_diff(
175 xfs_agblock_t wantbno, /* target starting block */
176 xfs_extlen_t wantlen, /* target length */
177 xfs_extlen_t alignment, /* target alignment */
178 xfs_agblock_t freebno, /* freespace's starting block */
179 xfs_extlen_t freelen, /* freespace's length */
180 xfs_agblock_t *newbnop) /* result: best start block from free */
181{
182 xfs_agblock_t freeend; /* end of freespace extent */
183 xfs_agblock_t newbno1; /* return block number */
184 xfs_agblock_t newbno2; /* other new block number */
185 xfs_extlen_t newlen1=0; /* length with newbno1 */
186 xfs_extlen_t newlen2=0; /* length with newbno2 */
187 xfs_agblock_t wantend; /* end of target extent */
188
189 ASSERT(freelen >= wantlen);
190 freeend = freebno + freelen;
191 wantend = wantbno + wantlen;
192 if (freebno >= wantbno) {
193 if ((newbno1 = roundup(freebno, alignment)) >= freeend)
194 newbno1 = NULLAGBLOCK;
195 } else if (freeend >= wantend && alignment > 1) {
196 newbno1 = roundup(wantbno, alignment);
197 newbno2 = newbno1 - alignment;
198 if (newbno1 >= freeend)
199 newbno1 = NULLAGBLOCK;
200 else
201 newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
202 if (newbno2 < freebno)
203 newbno2 = NULLAGBLOCK;
204 else
205 newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
206 if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
207 if (newlen1 < newlen2 ||
208 (newlen1 == newlen2 &&
209 XFS_ABSDIFF(newbno1, wantbno) >
210 XFS_ABSDIFF(newbno2, wantbno)))
211 newbno1 = newbno2;
212 } else if (newbno2 != NULLAGBLOCK)
213 newbno1 = newbno2;
214 } else if (freeend >= wantend) {
215 newbno1 = wantbno;
216 } else if (alignment > 1) {
217 newbno1 = roundup(freeend - wantlen, alignment);
218 if (newbno1 > freeend - wantlen &&
219 newbno1 - alignment >= freebno)
220 newbno1 -= alignment;
221 else if (newbno1 >= freeend)
222 newbno1 = NULLAGBLOCK;
223 } else
224 newbno1 = freeend - wantlen;
225 *newbnop = newbno1;
226 return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
227}
228
229/*
230 * Fix up the length, based on mod and prod.
231 * len should be k * prod + mod for some k.
232 * If len is too small it is returned unchanged.
233 * If len hits maxlen it is left alone.
234 */
235STATIC void
236xfs_alloc_fix_len(
237 xfs_alloc_arg_t *args) /* allocation argument structure */
238{
239 xfs_extlen_t k;
240 xfs_extlen_t rlen;
241
242 ASSERT(args->mod < args->prod);
243 rlen = args->len;
244 ASSERT(rlen >= args->minlen);
245 ASSERT(rlen <= args->maxlen);
246 if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
247 (args->mod == 0 && rlen < args->prod))
248 return;
249 k = rlen % args->prod;
250 if (k == args->mod)
251 return;
252 if (k > args->mod) {
253 if ((int)(rlen = rlen - k - args->mod) < (int)args->minlen)
254 return;
255 } else {
256 if ((int)(rlen = rlen - args->prod - (args->mod - k)) <
257 (int)args->minlen)
258 return;
259 }
260 ASSERT(rlen >= args->minlen);
261 ASSERT(rlen <= args->maxlen);
262 args->len = rlen;
263}
264
265/*
266 * Fix up length if there is too little space left in the a.g.
267 * Return 1 if ok, 0 if too little, should give up.
268 */
269STATIC int
270xfs_alloc_fix_minleft(
271 xfs_alloc_arg_t *args) /* allocation argument structure */
272{
273 xfs_agf_t *agf; /* a.g. freelist header */
274 int diff; /* free space difference */
275
276 if (args->minleft == 0)
277 return 1;
278 agf = XFS_BUF_TO_AGF(args->agbp);
16259e7d
CH
279 diff = be32_to_cpu(agf->agf_freeblks)
280 + be32_to_cpu(agf->agf_flcount)
1da177e4
LT
281 - args->len - args->minleft;
282 if (diff >= 0)
283 return 1;
284 args->len += diff; /* shrink the allocated space */
285 if (args->len >= args->minlen)
286 return 1;
287 args->agbno = NULLAGBLOCK;
288 return 0;
289}
290
291/*
292 * Update the two btrees, logically removing from freespace the extent
293 * starting at rbno, rlen blocks. The extent is contained within the
294 * actual (current) free extent fbno for flen blocks.
295 * Flags are passed in indicating whether the cursors are set to the
296 * relevant records.
297 */
298STATIC int /* error code */
299xfs_alloc_fixup_trees(
300 xfs_btree_cur_t *cnt_cur, /* cursor for by-size btree */
301 xfs_btree_cur_t *bno_cur, /* cursor for by-block btree */
302 xfs_agblock_t fbno, /* starting block of free extent */
303 xfs_extlen_t flen, /* length of free extent */
304 xfs_agblock_t rbno, /* starting block of returned extent */
305 xfs_extlen_t rlen, /* length of returned extent */
306 int flags) /* flags, XFSA_FIXUP_... */
307{
308 int error; /* error code */
309 int i; /* operation results */
310 xfs_agblock_t nfbno1; /* first new free startblock */
311 xfs_agblock_t nfbno2; /* second new free startblock */
312 xfs_extlen_t nflen1=0; /* first new free length */
313 xfs_extlen_t nflen2=0; /* second new free length */
314
315 /*
316 * Look up the record in the by-size tree if necessary.
317 */
318 if (flags & XFSA_FIXUP_CNT_OK) {
319#ifdef DEBUG
320 if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
321 return error;
322 XFS_WANT_CORRUPTED_RETURN(
323 i == 1 && nfbno1 == fbno && nflen1 == flen);
324#endif
325 } else {
326 if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
327 return error;
328 XFS_WANT_CORRUPTED_RETURN(i == 1);
329 }
330 /*
331 * Look up the record in the by-block tree if necessary.
332 */
333 if (flags & XFSA_FIXUP_BNO_OK) {
334#ifdef DEBUG
335 if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
336 return error;
337 XFS_WANT_CORRUPTED_RETURN(
338 i == 1 && nfbno1 == fbno && nflen1 == flen);
339#endif
340 } else {
341 if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
342 return error;
343 XFS_WANT_CORRUPTED_RETURN(i == 1);
344 }
345#ifdef DEBUG
346 {
347 xfs_alloc_block_t *bnoblock;
348 xfs_alloc_block_t *cntblock;
349
350 if (bno_cur->bc_nlevels == 1 &&
351 cnt_cur->bc_nlevels == 1) {
352 bnoblock = XFS_BUF_TO_ALLOC_BLOCK(bno_cur->bc_bufs[0]);
353 cntblock = XFS_BUF_TO_ALLOC_BLOCK(cnt_cur->bc_bufs[0]);
354 XFS_WANT_CORRUPTED_RETURN(
16259e7d
CH
355 be16_to_cpu(bnoblock->bb_numrecs) ==
356 be16_to_cpu(cntblock->bb_numrecs));
1da177e4
LT
357 }
358 }
359#endif
360 /*
361 * Deal with all four cases: the allocated record is contained
362 * within the freespace record, so we can have new freespace
363 * at either (or both) end, or no freespace remaining.
364 */
365 if (rbno == fbno && rlen == flen)
366 nfbno1 = nfbno2 = NULLAGBLOCK;
367 else if (rbno == fbno) {
368 nfbno1 = rbno + rlen;
369 nflen1 = flen - rlen;
370 nfbno2 = NULLAGBLOCK;
371 } else if (rbno + rlen == fbno + flen) {
372 nfbno1 = fbno;
373 nflen1 = flen - rlen;
374 nfbno2 = NULLAGBLOCK;
375 } else {
376 nfbno1 = fbno;
377 nflen1 = rbno - fbno;
378 nfbno2 = rbno + rlen;
379 nflen2 = (fbno + flen) - nfbno2;
380 }
381 /*
382 * Delete the entry from the by-size btree.
383 */
384 if ((error = xfs_alloc_delete(cnt_cur, &i)))
385 return error;
386 XFS_WANT_CORRUPTED_RETURN(i == 1);
387 /*
388 * Add new by-size btree entry(s).
389 */
390 if (nfbno1 != NULLAGBLOCK) {
391 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
392 return error;
393 XFS_WANT_CORRUPTED_RETURN(i == 0);
394 if ((error = xfs_alloc_insert(cnt_cur, &i)))
395 return error;
396 XFS_WANT_CORRUPTED_RETURN(i == 1);
397 }
398 if (nfbno2 != NULLAGBLOCK) {
399 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
400 return error;
401 XFS_WANT_CORRUPTED_RETURN(i == 0);
402 if ((error = xfs_alloc_insert(cnt_cur, &i)))
403 return error;
404 XFS_WANT_CORRUPTED_RETURN(i == 1);
405 }
406 /*
407 * Fix up the by-block btree entry(s).
408 */
409 if (nfbno1 == NULLAGBLOCK) {
410 /*
411 * No remaining freespace, just delete the by-block tree entry.
412 */
413 if ((error = xfs_alloc_delete(bno_cur, &i)))
414 return error;
415 XFS_WANT_CORRUPTED_RETURN(i == 1);
416 } else {
417 /*
418 * Update the by-block entry to start later|be shorter.
419 */
420 if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
421 return error;
422 }
423 if (nfbno2 != NULLAGBLOCK) {
424 /*
425 * 2 resulting free entries, need to add one.
426 */
427 if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
428 return error;
429 XFS_WANT_CORRUPTED_RETURN(i == 0);
430 if ((error = xfs_alloc_insert(bno_cur, &i)))
431 return error;
432 XFS_WANT_CORRUPTED_RETURN(i == 1);
433 }
434 return 0;
435}
436
437/*
438 * Read in the allocation group free block array.
439 */
440STATIC int /* error */
441xfs_alloc_read_agfl(
442 xfs_mount_t *mp, /* mount point structure */
443 xfs_trans_t *tp, /* transaction pointer */
444 xfs_agnumber_t agno, /* allocation group number */
445 xfs_buf_t **bpp) /* buffer for the ag free block array */
446{
447 xfs_buf_t *bp; /* return value */
448 int error;
449
450 ASSERT(agno != NULLAGNUMBER);
451 error = xfs_trans_read_buf(
452 mp, tp, mp->m_ddev_targp,
453 XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
454 XFS_FSS_TO_BB(mp, 1), 0, &bp);
455 if (error)
456 return error;
457 ASSERT(bp);
458 ASSERT(!XFS_BUF_GETERROR(bp));
459 XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGFL, XFS_AGFL_REF);
460 *bpp = bp;
461 return 0;
462}
463
464#if defined(XFS_ALLOC_TRACE)
465/*
466 * Add an allocation trace entry for an alloc call.
467 */
468STATIC void
469xfs_alloc_trace_alloc(
3a59c94c 470 const char *name, /* function tag string */
1da177e4
LT
471 char *str, /* additional string */
472 xfs_alloc_arg_t *args, /* allocation argument structure */
473 int line) /* source line number */
474{
475 ktrace_enter(xfs_alloc_trace_buf,
476 (void *)(__psint_t)(XFS_ALLOC_KTRACE_ALLOC | (line << 16)),
477 (void *)name,
478 (void *)str,
479 (void *)args->mp,
480 (void *)(__psunsigned_t)args->agno,
481 (void *)(__psunsigned_t)args->agbno,
482 (void *)(__psunsigned_t)args->minlen,
483 (void *)(__psunsigned_t)args->maxlen,
484 (void *)(__psunsigned_t)args->mod,
485 (void *)(__psunsigned_t)args->prod,
486 (void *)(__psunsigned_t)args->minleft,
487 (void *)(__psunsigned_t)args->total,
488 (void *)(__psunsigned_t)args->alignment,
489 (void *)(__psunsigned_t)args->len,
490 (void *)((((__psint_t)args->type) << 16) |
491 (__psint_t)args->otype),
492 (void *)(__psint_t)((args->wasdel << 3) |
493 (args->wasfromfl << 2) |
494 (args->isfl << 1) |
495 (args->userdata << 0)));
496}
497
498/*
499 * Add an allocation trace entry for a free call.
500 */
501STATIC void
502xfs_alloc_trace_free(
3a59c94c 503 const char *name, /* function tag string */
1da177e4
LT
504 char *str, /* additional string */
505 xfs_mount_t *mp, /* file system mount point */
506 xfs_agnumber_t agno, /* allocation group number */
507 xfs_agblock_t agbno, /* a.g. relative block number */
508 xfs_extlen_t len, /* length of extent */
509 int isfl, /* set if is freelist allocation/free */
510 int line) /* source line number */
511{
512 ktrace_enter(xfs_alloc_trace_buf,
513 (void *)(__psint_t)(XFS_ALLOC_KTRACE_FREE | (line << 16)),
514 (void *)name,
515 (void *)str,
516 (void *)mp,
517 (void *)(__psunsigned_t)agno,
518 (void *)(__psunsigned_t)agbno,
519 (void *)(__psunsigned_t)len,
520 (void *)(__psint_t)isfl,
521 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
522}
523
524/*
525 * Add an allocation trace entry for modifying an agf.
526 */
527STATIC void
528xfs_alloc_trace_modagf(
3a59c94c 529 const char *name, /* function tag string */
1da177e4
LT
530 char *str, /* additional string */
531 xfs_mount_t *mp, /* file system mount point */
532 xfs_agf_t *agf, /* new agf value */
533 int flags, /* logging flags for agf */
534 int line) /* source line number */
535{
536 ktrace_enter(xfs_alloc_trace_buf,
537 (void *)(__psint_t)(XFS_ALLOC_KTRACE_MODAGF | (line << 16)),
538 (void *)name,
539 (void *)str,
540 (void *)mp,
541 (void *)(__psint_t)flags,
16259e7d
CH
542 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_seqno),
543 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_length),
544 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]),
545 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]),
546 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]),
547 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]),
548 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_flfirst),
549 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_fllast),
550 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_flcount),
551 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_freeblks),
552 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_longest));
1da177e4
LT
553}
554
555STATIC void
556xfs_alloc_trace_busy(
3a59c94c 557 const char *name, /* function tag string */
1da177e4 558 char *str, /* additional string */
c41564b5 559 xfs_mount_t *mp, /* file system mount point */
1da177e4
LT
560 xfs_agnumber_t agno, /* allocation group number */
561 xfs_agblock_t agbno, /* a.g. relative block number */
562 xfs_extlen_t len, /* length of extent */
563 int slot, /* perag Busy slot */
564 xfs_trans_t *tp,
565 int trtype, /* type: add, delete, search */
566 int line) /* source line number */
567{
568 ktrace_enter(xfs_alloc_trace_buf,
569 (void *)(__psint_t)(trtype | (line << 16)),
570 (void *)name,
571 (void *)str,
572 (void *)mp,
573 (void *)(__psunsigned_t)agno,
574 (void *)(__psunsigned_t)agbno,
575 (void *)(__psunsigned_t)len,
576 (void *)(__psint_t)slot,
577 (void *)tp,
578 NULL, NULL, NULL, NULL, NULL, NULL, NULL);
579}
580#endif /* XFS_ALLOC_TRACE */
581
582/*
583 * Allocation group level functions.
584 */
585
586/*
587 * Allocate a variable extent in the allocation group agno.
588 * Type and bno are used to determine where in the allocation group the
589 * extent will start.
590 * Extent's length (returned in *len) will be between minlen and maxlen,
591 * and of the form k * prod + mod unless there's nothing that large.
592 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
593 */
594STATIC int /* error */
595xfs_alloc_ag_vextent(
596 xfs_alloc_arg_t *args) /* argument structure for allocation */
597{
598 int error=0;
1da177e4
LT
599
600 ASSERT(args->minlen > 0);
601 ASSERT(args->maxlen > 0);
602 ASSERT(args->minlen <= args->maxlen);
603 ASSERT(args->mod < args->prod);
604 ASSERT(args->alignment > 0);
605 /*
606 * Branch to correct routine based on the type.
607 */
608 args->wasfromfl = 0;
609 switch (args->type) {
610 case XFS_ALLOCTYPE_THIS_AG:
611 error = xfs_alloc_ag_vextent_size(args);
612 break;
613 case XFS_ALLOCTYPE_NEAR_BNO:
614 error = xfs_alloc_ag_vextent_near(args);
615 break;
616 case XFS_ALLOCTYPE_THIS_BNO:
617 error = xfs_alloc_ag_vextent_exact(args);
618 break;
619 default:
620 ASSERT(0);
621 /* NOTREACHED */
622 }
623 if (error)
624 return error;
625 /*
626 * If the allocation worked, need to change the agf structure
627 * (and log it), and the superblock.
628 */
629 if (args->agbno != NULLAGBLOCK) {
630 xfs_agf_t *agf; /* allocation group freelist header */
631#ifdef XFS_ALLOC_TRACE
632 xfs_mount_t *mp = args->mp;
633#endif
634 long slen = (long)args->len;
635
636 ASSERT(args->len >= args->minlen && args->len <= args->maxlen);
637 ASSERT(!(args->wasfromfl) || !args->isfl);
638 ASSERT(args->agbno % args->alignment == 0);
639 if (!(args->wasfromfl)) {
640
641 agf = XFS_BUF_TO_AGF(args->agbp);
413d57c9 642 be32_add_cpu(&agf->agf_freeblks, -(args->len));
1da177e4
LT
643 xfs_trans_agblocks_delta(args->tp,
644 -((long)(args->len)));
645 args->pag->pagf_freeblks -= args->len;
16259e7d
CH
646 ASSERT(be32_to_cpu(agf->agf_freeblks) <=
647 be32_to_cpu(agf->agf_length));
1da177e4
LT
648 TRACE_MODAGF(NULL, agf, XFS_AGF_FREEBLKS);
649 xfs_alloc_log_agf(args->tp, args->agbp,
650 XFS_AGF_FREEBLKS);
651 /* search the busylist for these blocks */
652 xfs_alloc_search_busy(args->tp, args->agno,
653 args->agbno, args->len);
654 }
655 if (!args->isfl)
656 xfs_trans_mod_sb(args->tp,
657 args->wasdel ? XFS_TRANS_SB_RES_FDBLOCKS :
658 XFS_TRANS_SB_FDBLOCKS, -slen);
659 XFS_STATS_INC(xs_allocx);
660 XFS_STATS_ADD(xs_allocb, args->len);
661 }
662 return 0;
663}
664
665/*
666 * Allocate a variable extent at exactly agno/bno.
667 * Extent's length (returned in *len) will be between minlen and maxlen,
668 * and of the form k * prod + mod unless there's nothing that large.
669 * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
670 */
671STATIC int /* error */
672xfs_alloc_ag_vextent_exact(
673 xfs_alloc_arg_t *args) /* allocation argument structure */
674{
675 xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */
676 xfs_btree_cur_t *cnt_cur;/* by count btree cursor */
677 xfs_agblock_t end; /* end of allocated extent */
678 int error;
679 xfs_agblock_t fbno; /* start block of found extent */
680 xfs_agblock_t fend; /* end block of found extent */
681 xfs_extlen_t flen; /* length of found extent */
1da177e4
LT
682 int i; /* success/failure of operation */
683 xfs_agblock_t maxend; /* end of maximal extent */
684 xfs_agblock_t minend; /* end of minimal extent */
685 xfs_extlen_t rlen; /* length of returned extent */
686
687 ASSERT(args->alignment == 1);
688 /*
689 * Allocate/initialize a cursor for the by-number freespace btree.
690 */
561f7d17
CH
691 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
692 args->agno, XFS_BTNUM_BNO);
1da177e4
LT
693 /*
694 * Lookup bno and minlen in the btree (minlen is irrelevant, really).
695 * Look for the closest free block <= bno, it must contain bno
696 * if any free block does.
697 */
698 if ((error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i)))
699 goto error0;
700 if (!i) {
701 /*
702 * Didn't find it, return null.
703 */
704 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
705 args->agbno = NULLAGBLOCK;
706 return 0;
707 }
708 /*
709 * Grab the freespace record.
710 */
711 if ((error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i)))
712 goto error0;
713 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
714 ASSERT(fbno <= args->agbno);
715 minend = args->agbno + args->minlen;
716 maxend = args->agbno + args->maxlen;
717 fend = fbno + flen;
718 /*
719 * Give up if the freespace isn't long enough for the minimum request.
720 */
721 if (fend < minend) {
722 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
723 args->agbno = NULLAGBLOCK;
724 return 0;
725 }
726 /*
727 * End of extent will be smaller of the freespace end and the
728 * maximal requested end.
729 */
730 end = XFS_AGBLOCK_MIN(fend, maxend);
731 /*
732 * Fix the length according to mod and prod if given.
733 */
734 args->len = end - args->agbno;
735 xfs_alloc_fix_len(args);
736 if (!xfs_alloc_fix_minleft(args)) {
737 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
738 return 0;
739 }
740 rlen = args->len;
741 ASSERT(args->agbno + rlen <= fend);
742 end = args->agbno + rlen;
743 /*
744 * We are allocating agbno for rlen [agbno .. end]
745 * Allocate/initialize a cursor for the by-size btree.
746 */
561f7d17
CH
747 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
748 args->agno, XFS_BTNUM_CNT);
1da177e4 749 ASSERT(args->agbno + args->len <=
16259e7d 750 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
1da177e4
LT
751 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
752 args->agbno, args->len, XFSA_FIXUP_BNO_OK))) {
753 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
754 goto error0;
755 }
756 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
757 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
758 TRACE_ALLOC("normal", args);
759 args->wasfromfl = 0;
760 return 0;
761
762error0:
763 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
764 TRACE_ALLOC("error", args);
765 return error;
766}
767
768/*
769 * Allocate a variable extent near bno in the allocation group agno.
770 * Extent's length (returned in len) will be between minlen and maxlen,
771 * and of the form k * prod + mod unless there's nothing that large.
772 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
773 */
774STATIC int /* error */
775xfs_alloc_ag_vextent_near(
776 xfs_alloc_arg_t *args) /* allocation argument structure */
777{
778 xfs_btree_cur_t *bno_cur_gt; /* cursor for bno btree, right side */
779 xfs_btree_cur_t *bno_cur_lt; /* cursor for bno btree, left side */
780 xfs_btree_cur_t *cnt_cur; /* cursor for count btree */
1da177e4
LT
781 xfs_agblock_t gtbno; /* start bno of right side entry */
782 xfs_agblock_t gtbnoa; /* aligned ... */
783 xfs_extlen_t gtdiff; /* difference to right side entry */
784 xfs_extlen_t gtlen; /* length of right side entry */
785 xfs_extlen_t gtlena; /* aligned ... */
786 xfs_agblock_t gtnew; /* useful start bno of right side */
787 int error; /* error code */
788 int i; /* result code, temporary */
789 int j; /* result code, temporary */
790 xfs_agblock_t ltbno; /* start bno of left side entry */
791 xfs_agblock_t ltbnoa; /* aligned ... */
792 xfs_extlen_t ltdiff; /* difference to left side entry */
793 /*REFERENCED*/
794 xfs_agblock_t ltend; /* end bno of left side entry */
795 xfs_extlen_t ltlen; /* length of left side entry */
796 xfs_extlen_t ltlena; /* aligned ... */
797 xfs_agblock_t ltnew; /* useful start bno of left side */
798 xfs_extlen_t rlen; /* length of returned extent */
799#if defined(DEBUG) && defined(__KERNEL__)
800 /*
801 * Randomly don't execute the first algorithm.
802 */
803 int dofirst; /* set to do first algorithm */
804
e7a23a9b 805 dofirst = random32() & 1;
1da177e4
LT
806#endif
807 /*
808 * Get a cursor for the by-size btree.
809 */
561f7d17
CH
810 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
811 args->agno, XFS_BTNUM_CNT);
1da177e4
LT
812 ltlen = 0;
813 bno_cur_lt = bno_cur_gt = NULL;
814 /*
815 * See if there are any free extents as big as maxlen.
816 */
817 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i)))
818 goto error0;
819 /*
820 * If none, then pick up the last entry in the tree unless the
821 * tree is empty.
822 */
823 if (!i) {
824 if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &ltbno,
825 &ltlen, &i)))
826 goto error0;
827 if (i == 0 || ltlen == 0) {
828 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
829 return 0;
830 }
831 ASSERT(i == 1);
832 }
833 args->wasfromfl = 0;
834 /*
835 * First algorithm.
836 * If the requested extent is large wrt the freespaces available
837 * in this a.g., then the cursor will be pointing to a btree entry
838 * near the right edge of the tree. If it's in the last btree leaf
839 * block, then we just examine all the entries in that block
840 * that are big enough, and pick the best one.
841 * This is written as a while loop so we can break out of it,
842 * but we never loop back to the top.
843 */
844 while (xfs_btree_islastblock(cnt_cur, 0)) {
845 xfs_extlen_t bdiff;
846 int besti=0;
847 xfs_extlen_t blen=0;
848 xfs_agblock_t bnew=0;
849
850#if defined(DEBUG) && defined(__KERNEL__)
851 if (!dofirst)
852 break;
853#endif
854 /*
855 * Start from the entry that lookup found, sequence through
856 * all larger free blocks. If we're actually pointing at a
857 * record smaller than maxlen, go to the start of this block,
858 * and skip all those smaller than minlen.
859 */
860 if (ltlen || args->alignment > 1) {
861 cnt_cur->bc_ptrs[0] = 1;
862 do {
863 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno,
864 &ltlen, &i)))
865 goto error0;
866 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
867 if (ltlen >= args->minlen)
868 break;
637aa50f 869 if ((error = xfs_btree_increment(cnt_cur, 0, &i)))
1da177e4
LT
870 goto error0;
871 } while (i);
872 ASSERT(ltlen >= args->minlen);
873 if (!i)
874 break;
875 }
876 i = cnt_cur->bc_ptrs[0];
877 for (j = 1, blen = 0, bdiff = 0;
878 !error && j && (blen < args->maxlen || bdiff > 0);
637aa50f 879 error = xfs_btree_increment(cnt_cur, 0, &j)) {
1da177e4
LT
880 /*
881 * For each entry, decide if it's better than
882 * the previous best entry.
883 */
884 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
885 goto error0;
886 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
12375c82
DC
887 xfs_alloc_compute_aligned(ltbno, ltlen, args->alignment,
888 args->minlen, &ltbnoa, &ltlena);
e6430037 889 if (ltlena < args->minlen)
1da177e4
LT
890 continue;
891 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
892 xfs_alloc_fix_len(args);
893 ASSERT(args->len >= args->minlen);
894 if (args->len < blen)
895 continue;
896 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
897 args->alignment, ltbno, ltlen, &ltnew);
898 if (ltnew != NULLAGBLOCK &&
899 (args->len > blen || ltdiff < bdiff)) {
900 bdiff = ltdiff;
901 bnew = ltnew;
902 blen = args->len;
903 besti = cnt_cur->bc_ptrs[0];
904 }
905 }
906 /*
907 * It didn't work. We COULD be in a case where
908 * there's a good record somewhere, so try again.
909 */
910 if (blen == 0)
911 break;
912 /*
913 * Point at the best entry, and retrieve it again.
914 */
915 cnt_cur->bc_ptrs[0] = besti;
916 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
917 goto error0;
918 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
919 ltend = ltbno + ltlen;
16259e7d 920 ASSERT(ltend <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
1da177e4
LT
921 args->len = blen;
922 if (!xfs_alloc_fix_minleft(args)) {
923 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
924 TRACE_ALLOC("nominleft", args);
925 return 0;
926 }
927 blen = args->len;
928 /*
929 * We are allocating starting at bnew for blen blocks.
930 */
931 args->agbno = bnew;
932 ASSERT(bnew >= ltbno);
933 ASSERT(bnew + blen <= ltend);
934 /*
935 * Set up a cursor for the by-bno tree.
936 */
561f7d17
CH
937 bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp,
938 args->agbp, args->agno, XFS_BTNUM_BNO);
1da177e4
LT
939 /*
940 * Fix up the btree entries.
941 */
942 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno,
943 ltlen, bnew, blen, XFSA_FIXUP_CNT_OK)))
944 goto error0;
945 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
946 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
947 TRACE_ALLOC("first", args);
948 return 0;
949 }
950 /*
951 * Second algorithm.
952 * Search in the by-bno tree to the left and to the right
953 * simultaneously, until in each case we find a space big enough,
954 * or run into the edge of the tree. When we run into the edge,
955 * we deallocate that cursor.
956 * If both searches succeed, we compare the two spaces and pick
957 * the better one.
958 * With alignment, it's possible for both to fail; the upper
959 * level algorithm that picks allocation groups for allocations
960 * is not supposed to do this.
961 */
962 /*
963 * Allocate and initialize the cursor for the leftward search.
964 */
561f7d17
CH
965 bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
966 args->agno, XFS_BTNUM_BNO);
1da177e4
LT
967 /*
968 * Lookup <= bno to find the leftward search's starting point.
969 */
970 if ((error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen, &i)))
971 goto error0;
972 if (!i) {
973 /*
974 * Didn't find anything; use this cursor for the rightward
975 * search.
976 */
977 bno_cur_gt = bno_cur_lt;
978 bno_cur_lt = NULL;
979 }
980 /*
981 * Found something. Duplicate the cursor for the rightward search.
982 */
983 else if ((error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt)))
984 goto error0;
985 /*
986 * Increment the cursor, so we will point at the entry just right
987 * of the leftward entry if any, or to the leftmost entry.
988 */
637aa50f 989 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
1da177e4
LT
990 goto error0;
991 if (!i) {
992 /*
993 * It failed, there are no rightward entries.
994 */
995 xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR);
996 bno_cur_gt = NULL;
997 }
998 /*
999 * Loop going left with the leftward cursor, right with the
1000 * rightward cursor, until either both directions give up or
1001 * we find an entry at least as big as minlen.
1002 */
1003 do {
1004 if (bno_cur_lt) {
1005 if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
1006 goto error0;
1007 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
12375c82
DC
1008 xfs_alloc_compute_aligned(ltbno, ltlen, args->alignment,
1009 args->minlen, &ltbnoa, &ltlena);
1010 if (ltlena >= args->minlen)
1da177e4 1011 break;
8df4da4a 1012 if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
1da177e4
LT
1013 goto error0;
1014 if (!i) {
1015 xfs_btree_del_cursor(bno_cur_lt,
1016 XFS_BTREE_NOERROR);
1017 bno_cur_lt = NULL;
1018 }
1019 }
1020 if (bno_cur_gt) {
1021 if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
1022 goto error0;
1023 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
12375c82
DC
1024 xfs_alloc_compute_aligned(gtbno, gtlen, args->alignment,
1025 args->minlen, &gtbnoa, &gtlena);
1026 if (gtlena >= args->minlen)
1da177e4 1027 break;
637aa50f 1028 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
1da177e4
LT
1029 goto error0;
1030 if (!i) {
1031 xfs_btree_del_cursor(bno_cur_gt,
1032 XFS_BTREE_NOERROR);
1033 bno_cur_gt = NULL;
1034 }
1035 }
1036 } while (bno_cur_lt || bno_cur_gt);
1037 /*
1038 * Got both cursors still active, need to find better entry.
1039 */
1040 if (bno_cur_lt && bno_cur_gt) {
1041 /*
1042 * Left side is long enough, look for a right side entry.
1043 */
1044 if (ltlena >= args->minlen) {
1045 /*
1046 * Fix up the length.
1047 */
1048 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1049 xfs_alloc_fix_len(args);
1050 rlen = args->len;
1051 ltdiff = xfs_alloc_compute_diff(args->agbno, rlen,
1052 args->alignment, ltbno, ltlen, &ltnew);
1053 /*
1054 * Not perfect.
1055 */
1056 if (ltdiff) {
1057 /*
1058 * Look until we find a better one, run out of
1059 * space, or run off the end.
1060 */
1061 while (bno_cur_lt && bno_cur_gt) {
1062 if ((error = xfs_alloc_get_rec(
1063 bno_cur_gt, &gtbno,
1064 &gtlen, &i)))
1065 goto error0;
1066 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1067 xfs_alloc_compute_aligned(gtbno, gtlen,
1068 args->alignment, args->minlen,
1069 &gtbnoa, &gtlena);
1070 /*
1071 * The left one is clearly better.
1072 */
1073 if (gtbnoa >= args->agbno + ltdiff) {
1074 xfs_btree_del_cursor(
1075 bno_cur_gt,
1076 XFS_BTREE_NOERROR);
1077 bno_cur_gt = NULL;
1078 break;
1079 }
1080 /*
1081 * If we reach a big enough entry,
1082 * compare the two and pick the best.
1083 */
1084 if (gtlena >= args->minlen) {
1085 args->len =
1086 XFS_EXTLEN_MIN(gtlena,
1087 args->maxlen);
1088 xfs_alloc_fix_len(args);
1089 rlen = args->len;
1090 gtdiff = xfs_alloc_compute_diff(
1091 args->agbno, rlen,
1092 args->alignment,
1093 gtbno, gtlen, &gtnew);
1094 /*
1095 * Right side is better.
1096 */
1097 if (gtdiff < ltdiff) {
1098 xfs_btree_del_cursor(
1099 bno_cur_lt,
1100 XFS_BTREE_NOERROR);
1101 bno_cur_lt = NULL;
1102 }
1103 /*
1104 * Left side is better.
1105 */
1106 else {
1107 xfs_btree_del_cursor(
1108 bno_cur_gt,
1109 XFS_BTREE_NOERROR);
1110 bno_cur_gt = NULL;
1111 }
1112 break;
1113 }
1114 /*
1115 * Fell off the right end.
1116 */
637aa50f 1117 if ((error = xfs_btree_increment(
1da177e4
LT
1118 bno_cur_gt, 0, &i)))
1119 goto error0;
1120 if (!i) {
1121 xfs_btree_del_cursor(
1122 bno_cur_gt,
1123 XFS_BTREE_NOERROR);
1124 bno_cur_gt = NULL;
1125 break;
1126 }
1127 }
1128 }
1129 /*
1130 * The left side is perfect, trash the right side.
1131 */
1132 else {
1133 xfs_btree_del_cursor(bno_cur_gt,
1134 XFS_BTREE_NOERROR);
1135 bno_cur_gt = NULL;
1136 }
1137 }
1138 /*
1139 * It's the right side that was found first, look left.
1140 */
1141 else {
1142 /*
1143 * Fix up the length.
1144 */
1145 args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
1146 xfs_alloc_fix_len(args);
1147 rlen = args->len;
1148 gtdiff = xfs_alloc_compute_diff(args->agbno, rlen,
1149 args->alignment, gtbno, gtlen, &gtnew);
1150 /*
1151 * Right side entry isn't perfect.
1152 */
1153 if (gtdiff) {
1154 /*
1155 * Look until we find a better one, run out of
1156 * space, or run off the end.
1157 */
1158 while (bno_cur_lt && bno_cur_gt) {
1159 if ((error = xfs_alloc_get_rec(
1160 bno_cur_lt, &ltbno,
1161 &ltlen, &i)))
1162 goto error0;
1163 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1164 xfs_alloc_compute_aligned(ltbno, ltlen,
1165 args->alignment, args->minlen,
1166 &ltbnoa, &ltlena);
1167 /*
1168 * The right one is clearly better.
1169 */
1170 if (ltbnoa <= args->agbno - gtdiff) {
1171 xfs_btree_del_cursor(
1172 bno_cur_lt,
1173 XFS_BTREE_NOERROR);
1174 bno_cur_lt = NULL;
1175 break;
1176 }
1177 /*
1178 * If we reach a big enough entry,
1179 * compare the two and pick the best.
1180 */
1181 if (ltlena >= args->minlen) {
1182 args->len = XFS_EXTLEN_MIN(
1183 ltlena, args->maxlen);
1184 xfs_alloc_fix_len(args);
1185 rlen = args->len;
1186 ltdiff = xfs_alloc_compute_diff(
1187 args->agbno, rlen,
1188 args->alignment,
1189 ltbno, ltlen, &ltnew);
1190 /*
1191 * Left side is better.
1192 */
1193 if (ltdiff < gtdiff) {
1194 xfs_btree_del_cursor(
1195 bno_cur_gt,
1196 XFS_BTREE_NOERROR);
1197 bno_cur_gt = NULL;
1198 }
1199 /*
1200 * Right side is better.
1201 */
1202 else {
1203 xfs_btree_del_cursor(
1204 bno_cur_lt,
1205 XFS_BTREE_NOERROR);
1206 bno_cur_lt = NULL;
1207 }
1208 break;
1209 }
1210 /*
1211 * Fell off the left end.
1212 */
8df4da4a 1213 if ((error = xfs_btree_decrement(
1da177e4
LT
1214 bno_cur_lt, 0, &i)))
1215 goto error0;
1216 if (!i) {
1217 xfs_btree_del_cursor(bno_cur_lt,
1218 XFS_BTREE_NOERROR);
1219 bno_cur_lt = NULL;
1220 break;
1221 }
1222 }
1223 }
1224 /*
1225 * The right side is perfect, trash the left side.
1226 */
1227 else {
1228 xfs_btree_del_cursor(bno_cur_lt,
1229 XFS_BTREE_NOERROR);
1230 bno_cur_lt = NULL;
1231 }
1232 }
1233 }
1234 /*
1235 * If we couldn't get anything, give up.
1236 */
1237 if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
1238 TRACE_ALLOC("neither", args);
1239 args->agbno = NULLAGBLOCK;
1240 return 0;
1241 }
1242 /*
1243 * At this point we have selected a freespace entry, either to the
1244 * left or to the right. If it's on the right, copy all the
1245 * useful variables to the "left" set so we only have one
1246 * copy of this code.
1247 */
1248 if (bno_cur_gt) {
1249 bno_cur_lt = bno_cur_gt;
1250 bno_cur_gt = NULL;
1251 ltbno = gtbno;
1252 ltbnoa = gtbnoa;
1253 ltlen = gtlen;
1254 ltlena = gtlena;
1255 j = 1;
1256 } else
1257 j = 0;
1258 /*
1259 * Fix up the length and compute the useful address.
1260 */
1261 ltend = ltbno + ltlen;
1262 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1263 xfs_alloc_fix_len(args);
1264 if (!xfs_alloc_fix_minleft(args)) {
1265 TRACE_ALLOC("nominleft", args);
1266 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1267 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1268 return 0;
1269 }
1270 rlen = args->len;
1271 (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, ltbno,
1272 ltlen, &ltnew);
1273 ASSERT(ltnew >= ltbno);
1274 ASSERT(ltnew + rlen <= ltend);
16259e7d 1275 ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
1da177e4
LT
1276 args->agbno = ltnew;
1277 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
1278 ltnew, rlen, XFSA_FIXUP_BNO_OK)))
1279 goto error0;
1280 TRACE_ALLOC(j ? "gt" : "lt", args);
1281 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1282 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1283 return 0;
1284
1285 error0:
1286 TRACE_ALLOC("error", args);
1287 if (cnt_cur != NULL)
1288 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1289 if (bno_cur_lt != NULL)
1290 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_ERROR);
1291 if (bno_cur_gt != NULL)
1292 xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_ERROR);
1293 return error;
1294}
1295
1296/*
1297 * Allocate a variable extent anywhere in the allocation group agno.
1298 * Extent's length (returned in len) will be between minlen and maxlen,
1299 * and of the form k * prod + mod unless there's nothing that large.
1300 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1301 */
1302STATIC int /* error */
1303xfs_alloc_ag_vextent_size(
1304 xfs_alloc_arg_t *args) /* allocation argument structure */
1305{
1306 xfs_btree_cur_t *bno_cur; /* cursor for bno btree */
1307 xfs_btree_cur_t *cnt_cur; /* cursor for cnt btree */
1308 int error; /* error result */
1309 xfs_agblock_t fbno; /* start of found freespace */
1310 xfs_extlen_t flen; /* length of found freespace */
1da177e4
LT
1311 int i; /* temp status variable */
1312 xfs_agblock_t rbno; /* returned block number */
1313 xfs_extlen_t rlen; /* length of returned extent */
1314
1315 /*
1316 * Allocate and initialize a cursor for the by-size btree.
1317 */
561f7d17
CH
1318 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1319 args->agno, XFS_BTNUM_CNT);
1da177e4
LT
1320 bno_cur = NULL;
1321 /*
1322 * Look for an entry >= maxlen+alignment-1 blocks.
1323 */
1324 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
1325 args->maxlen + args->alignment - 1, &i)))
1326 goto error0;
1327 /*
1328 * If none, then pick up the last entry in the tree unless the
1329 * tree is empty.
1330 */
1331 if (!i) {
1332 if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &fbno,
1333 &flen, &i)))
1334 goto error0;
1335 if (i == 0 || flen == 0) {
1336 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1337 TRACE_ALLOC("noentry", args);
1338 return 0;
1339 }
1340 ASSERT(i == 1);
1341 }
1342 /*
1343 * There's a freespace as big as maxlen+alignment-1, get it.
1344 */
1345 else {
1346 if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i)))
1347 goto error0;
1348 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1349 }
1350 /*
1351 * In the first case above, we got the last entry in the
1352 * by-size btree. Now we check to see if the space hits maxlen
1353 * once aligned; if not, we search left for something better.
1354 * This can't happen in the second case above.
1355 */
1356 xfs_alloc_compute_aligned(fbno, flen, args->alignment, args->minlen,
1357 &rbno, &rlen);
1358 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1359 XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
1360 (rlen <= flen && rbno + rlen <= fbno + flen), error0);
1361 if (rlen < args->maxlen) {
1362 xfs_agblock_t bestfbno;
1363 xfs_extlen_t bestflen;
1364 xfs_agblock_t bestrbno;
1365 xfs_extlen_t bestrlen;
1366
1367 bestrlen = rlen;
1368 bestrbno = rbno;
1369 bestflen = flen;
1370 bestfbno = fbno;
1371 for (;;) {
8df4da4a 1372 if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
1da177e4
LT
1373 goto error0;
1374 if (i == 0)
1375 break;
1376 if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
1377 &i)))
1378 goto error0;
1379 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1380 if (flen < bestrlen)
1381 break;
1382 xfs_alloc_compute_aligned(fbno, flen, args->alignment,
1383 args->minlen, &rbno, &rlen);
1384 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1385 XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
1386 (rlen <= flen && rbno + rlen <= fbno + flen),
1387 error0);
1388 if (rlen > bestrlen) {
1389 bestrlen = rlen;
1390 bestrbno = rbno;
1391 bestflen = flen;
1392 bestfbno = fbno;
1393 if (rlen == args->maxlen)
1394 break;
1395 }
1396 }
1397 if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
1398 &i)))
1399 goto error0;
1400 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1401 rlen = bestrlen;
1402 rbno = bestrbno;
1403 flen = bestflen;
1404 fbno = bestfbno;
1405 }
1406 args->wasfromfl = 0;
1407 /*
1408 * Fix up the length.
1409 */
1410 args->len = rlen;
1411 xfs_alloc_fix_len(args);
1412 if (rlen < args->minlen || !xfs_alloc_fix_minleft(args)) {
1413 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1414 TRACE_ALLOC("nominleft", args);
1415 args->agbno = NULLAGBLOCK;
1416 return 0;
1417 }
1418 rlen = args->len;
1419 XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0);
1420 /*
1421 * Allocate and initialize a cursor for the by-block tree.
1422 */
561f7d17
CH
1423 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1424 args->agno, XFS_BTNUM_BNO);
1da177e4
LT
1425 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
1426 rbno, rlen, XFSA_FIXUP_CNT_OK)))
1427 goto error0;
1428 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1429 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1430 cnt_cur = bno_cur = NULL;
1431 args->len = rlen;
1432 args->agbno = rbno;
1433 XFS_WANT_CORRUPTED_GOTO(
1434 args->agbno + args->len <=
16259e7d 1435 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1da177e4
LT
1436 error0);
1437 TRACE_ALLOC("normal", args);
1438 return 0;
1439
1440error0:
1441 TRACE_ALLOC("error", args);
1442 if (cnt_cur)
1443 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1444 if (bno_cur)
1445 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1446 return error;
1447}
1448
1449/*
1450 * Deal with the case where only small freespaces remain.
1451 * Either return the contents of the last freespace record,
1452 * or allocate space from the freelist if there is nothing in the tree.
1453 */
1454STATIC int /* error */
1455xfs_alloc_ag_vextent_small(
1456 xfs_alloc_arg_t *args, /* allocation argument structure */
1457 xfs_btree_cur_t *ccur, /* by-size cursor */
1458 xfs_agblock_t *fbnop, /* result block number */
1459 xfs_extlen_t *flenp, /* result length */
1460 int *stat) /* status: 0-freelist, 1-normal/none */
1461{
1462 int error;
1463 xfs_agblock_t fbno;
1464 xfs_extlen_t flen;
1da177e4
LT
1465 int i;
1466
8df4da4a 1467 if ((error = xfs_btree_decrement(ccur, 0, &i)))
1da177e4
LT
1468 goto error0;
1469 if (i) {
1470 if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i)))
1471 goto error0;
1472 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1473 }
1474 /*
1475 * Nothing in the btree, try the freelist. Make sure
1476 * to respect minleft even when pulling from the
1477 * freelist.
1478 */
1479 else if (args->minlen == 1 && args->alignment == 1 && !args->isfl &&
16259e7d
CH
1480 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount)
1481 > args->minleft)) {
92821e2b
DC
1482 error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
1483 if (error)
1da177e4
LT
1484 goto error0;
1485 if (fbno != NULLAGBLOCK) {
1486 if (args->userdata) {
1487 xfs_buf_t *bp;
1488
1489 bp = xfs_btree_get_bufs(args->mp, args->tp,
1490 args->agno, fbno, 0);
1491 xfs_trans_binval(args->tp, bp);
1492 }
1493 args->len = 1;
1494 args->agbno = fbno;
1495 XFS_WANT_CORRUPTED_GOTO(
1496 args->agbno + args->len <=
16259e7d 1497 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1da177e4
LT
1498 error0);
1499 args->wasfromfl = 1;
1500 TRACE_ALLOC("freelist", args);
1501 *stat = 0;
1502 return 0;
1503 }
1504 /*
1505 * Nothing in the freelist.
1506 */
1507 else
1508 flen = 0;
1509 }
1510 /*
1511 * Can't allocate from the freelist for some reason.
1512 */
d432c80e
NS
1513 else {
1514 fbno = NULLAGBLOCK;
1da177e4 1515 flen = 0;
d432c80e 1516 }
1da177e4
LT
1517 /*
1518 * Can't do the allocation, give up.
1519 */
1520 if (flen < args->minlen) {
1521 args->agbno = NULLAGBLOCK;
1522 TRACE_ALLOC("notenough", args);
1523 flen = 0;
1524 }
1525 *fbnop = fbno;
1526 *flenp = flen;
1527 *stat = 1;
1528 TRACE_ALLOC("normal", args);
1529 return 0;
1530
1531error0:
1532 TRACE_ALLOC("error", args);
1533 return error;
1534}
1535
1536/*
1537 * Free the extent starting at agno/bno for length.
1538 */
1539STATIC int /* error */
1540xfs_free_ag_extent(
1541 xfs_trans_t *tp, /* transaction pointer */
1542 xfs_buf_t *agbp, /* buffer for a.g. freelist header */
1543 xfs_agnumber_t agno, /* allocation group number */
1544 xfs_agblock_t bno, /* starting block number */
1545 xfs_extlen_t len, /* length of extent */
1546 int isfl) /* set if is freelist blocks - no sb acctg */
1547{
1548 xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */
1549 xfs_btree_cur_t *cnt_cur; /* cursor for by-size btree */
1550 int error; /* error return value */
1da177e4
LT
1551 xfs_agblock_t gtbno; /* start of right neighbor block */
1552 xfs_extlen_t gtlen; /* length of right neighbor block */
1553 int haveleft; /* have a left neighbor block */
1554 int haveright; /* have a right neighbor block */
1555 int i; /* temp, result code */
1556 xfs_agblock_t ltbno; /* start of left neighbor block */
1557 xfs_extlen_t ltlen; /* length of left neighbor block */
1558 xfs_mount_t *mp; /* mount point struct for filesystem */
1559 xfs_agblock_t nbno; /* new starting block of freespace */
1560 xfs_extlen_t nlen; /* new length of freespace */
1561
1562 mp = tp->t_mountp;
1563 /*
1564 * Allocate and initialize a cursor for the by-block btree.
1565 */
561f7d17 1566 bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
1da177e4
LT
1567 cnt_cur = NULL;
1568 /*
1569 * Look for a neighboring block on the left (lower block numbers)
1570 * that is contiguous with this space.
1571 */
1572 if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
1573 goto error0;
1574 if (haveleft) {
1575 /*
1576 * There is a block to our left.
1577 */
1578 if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
1579 goto error0;
1580 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1581 /*
1582 * It's not contiguous, though.
1583 */
1584 if (ltbno + ltlen < bno)
1585 haveleft = 0;
1586 else {
1587 /*
1588 * If this failure happens the request to free this
1589 * space was invalid, it's (partly) already free.
1590 * Very bad.
1591 */
1592 XFS_WANT_CORRUPTED_GOTO(ltbno + ltlen <= bno, error0);
1593 }
1594 }
1595 /*
1596 * Look for a neighboring block on the right (higher block numbers)
1597 * that is contiguous with this space.
1598 */
637aa50f 1599 if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
1da177e4
LT
1600 goto error0;
1601 if (haveright) {
1602 /*
1603 * There is a block to our right.
1604 */
1605 if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
1606 goto error0;
1607 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1608 /*
1609 * It's not contiguous, though.
1610 */
1611 if (bno + len < gtbno)
1612 haveright = 0;
1613 else {
1614 /*
1615 * If this failure happens the request to free this
1616 * space was invalid, it's (partly) already free.
1617 * Very bad.
1618 */
1619 XFS_WANT_CORRUPTED_GOTO(gtbno >= bno + len, error0);
1620 }
1621 }
1622 /*
1623 * Now allocate and initialize a cursor for the by-size tree.
1624 */
561f7d17 1625 cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT);
1da177e4
LT
1626 /*
1627 * Have both left and right contiguous neighbors.
1628 * Merge all three into a single free block.
1629 */
1630 if (haveleft && haveright) {
1631 /*
1632 * Delete the old by-size entry on the left.
1633 */
1634 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1635 goto error0;
1636 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1637 if ((error = xfs_alloc_delete(cnt_cur, &i)))
1638 goto error0;
1639 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1640 /*
1641 * Delete the old by-size entry on the right.
1642 */
1643 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1644 goto error0;
1645 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1646 if ((error = xfs_alloc_delete(cnt_cur, &i)))
1647 goto error0;
1648 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1649 /*
1650 * Delete the old by-block entry for the right block.
1651 */
1652 if ((error = xfs_alloc_delete(bno_cur, &i)))
1653 goto error0;
1654 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1655 /*
1656 * Move the by-block cursor back to the left neighbor.
1657 */
8df4da4a 1658 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1da177e4
LT
1659 goto error0;
1660 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1661#ifdef DEBUG
1662 /*
1663 * Check that this is the right record: delete didn't
1664 * mangle the cursor.
1665 */
1666 {
1667 xfs_agblock_t xxbno;
1668 xfs_extlen_t xxlen;
1669
1670 if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
1671 &i)))
1672 goto error0;
1673 XFS_WANT_CORRUPTED_GOTO(
1674 i == 1 && xxbno == ltbno && xxlen == ltlen,
1675 error0);
1676 }
1677#endif
1678 /*
1679 * Update remaining by-block entry to the new, joined block.
1680 */
1681 nbno = ltbno;
1682 nlen = len + ltlen + gtlen;
1683 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1684 goto error0;
1685 }
1686 /*
1687 * Have only a left contiguous neighbor.
1688 * Merge it together with the new freespace.
1689 */
1690 else if (haveleft) {
1691 /*
1692 * Delete the old by-size entry on the left.
1693 */
1694 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1695 goto error0;
1696 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1697 if ((error = xfs_alloc_delete(cnt_cur, &i)))
1698 goto error0;
1699 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1700 /*
1701 * Back up the by-block cursor to the left neighbor, and
1702 * update its length.
1703 */
8df4da4a 1704 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1da177e4
LT
1705 goto error0;
1706 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1707 nbno = ltbno;
1708 nlen = len + ltlen;
1709 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1710 goto error0;
1711 }
1712 /*
1713 * Have only a right contiguous neighbor.
1714 * Merge it together with the new freespace.
1715 */
1716 else if (haveright) {
1717 /*
1718 * Delete the old by-size entry on the right.
1719 */
1720 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1721 goto error0;
1722 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1723 if ((error = xfs_alloc_delete(cnt_cur, &i)))
1724 goto error0;
1725 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1726 /*
1727 * Update the starting block and length of the right
1728 * neighbor in the by-block tree.
1729 */
1730 nbno = bno;
1731 nlen = len + gtlen;
1732 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1733 goto error0;
1734 }
1735 /*
1736 * No contiguous neighbors.
1737 * Insert the new freespace into the by-block tree.
1738 */
1739 else {
1740 nbno = bno;
1741 nlen = len;
1742 if ((error = xfs_alloc_insert(bno_cur, &i)))
1743 goto error0;
1744 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1745 }
1746 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1747 bno_cur = NULL;
1748 /*
1749 * In all cases we need to insert the new freespace in the by-size tree.
1750 */
1751 if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
1752 goto error0;
1753 XFS_WANT_CORRUPTED_GOTO(i == 0, error0);
1754 if ((error = xfs_alloc_insert(cnt_cur, &i)))
1755 goto error0;
1756 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1757 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1758 cnt_cur = NULL;
1759 /*
1760 * Update the freespace totals in the ag and superblock.
1761 */
1762 {
1763 xfs_agf_t *agf;
1764 xfs_perag_t *pag; /* per allocation group data */
1765
1766 agf = XFS_BUF_TO_AGF(agbp);
1767 pag = &mp->m_perag[agno];
413d57c9 1768 be32_add_cpu(&agf->agf_freeblks, len);
1da177e4
LT
1769 xfs_trans_agblocks_delta(tp, len);
1770 pag->pagf_freeblks += len;
1771 XFS_WANT_CORRUPTED_GOTO(
16259e7d
CH
1772 be32_to_cpu(agf->agf_freeblks) <=
1773 be32_to_cpu(agf->agf_length),
1da177e4
LT
1774 error0);
1775 TRACE_MODAGF(NULL, agf, XFS_AGF_FREEBLKS);
1776 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
1777 if (!isfl)
1778 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len);
1779 XFS_STATS_INC(xs_freex);
1780 XFS_STATS_ADD(xs_freeb, len);
1781 }
1782 TRACE_FREE(haveleft ?
1783 (haveright ? "both" : "left") :
1784 (haveright ? "right" : "none"),
1785 agno, bno, len, isfl);
1786
1787 /*
1788 * Since blocks move to the free list without the coordination
1789 * used in xfs_bmap_finish, we can't allow block to be available
1790 * for reallocation and non-transaction writing (user data)
1791 * until we know that the transaction that moved it to the free
1792 * list is permanently on disk. We track the blocks by declaring
1793 * these blocks as "busy"; the busy list is maintained on a per-ag
1794 * basis and each transaction records which entries should be removed
1795 * when the iclog commits to disk. If a busy block is allocated,
1796 * the iclog is pushed up to the LSN that freed the block.
1797 */
1798 xfs_alloc_mark_busy(tp, agno, bno, len);
1799 return 0;
1800
1801 error0:
1802 TRACE_FREE("error", agno, bno, len, isfl);
1803 if (bno_cur)
1804 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1805 if (cnt_cur)
1806 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1807 return error;
1808}
1809
1810/*
1811 * Visible (exported) allocation/free functions.
1812 * Some of these are used just by xfs_alloc_btree.c and this file.
1813 */
1814
1815/*
1816 * Compute and fill in value of m_ag_maxlevels.
1817 */
1818void
1819xfs_alloc_compute_maxlevels(
1820 xfs_mount_t *mp) /* file system mount structure */
1821{
1822 int level;
1823 uint maxblocks;
1824 uint maxleafents;
1825 int minleafrecs;
1826 int minnoderecs;
1827
1828 maxleafents = (mp->m_sb.sb_agblocks + 1) / 2;
1829 minleafrecs = mp->m_alloc_mnr[0];
1830 minnoderecs = mp->m_alloc_mnr[1];
1831 maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
1832 for (level = 1; maxblocks > 1; level++)
1833 maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
1834 mp->m_ag_maxlevels = level;
1835}
1836
1837/*
1838 * Decide whether to use this allocation group for this allocation.
1839 * If so, fix up the btree freelist's size.
1840 */
1841STATIC int /* error */
1842xfs_alloc_fix_freelist(
1843 xfs_alloc_arg_t *args, /* allocation argument structure */
1844 int flags) /* XFS_ALLOC_FLAG_... */
1845{
1846 xfs_buf_t *agbp; /* agf buffer pointer */
1847 xfs_agf_t *agf; /* a.g. freespace structure pointer */
1848 xfs_buf_t *agflbp;/* agfl buffer pointer */
1849 xfs_agblock_t bno; /* freelist block */
1850 xfs_extlen_t delta; /* new blocks needed in freelist */
1851 int error; /* error result code */
1852 xfs_extlen_t longest;/* longest extent in allocation group */
1853 xfs_mount_t *mp; /* file system mount point structure */
1854 xfs_extlen_t need; /* total blocks needed in freelist */
1855 xfs_perag_t *pag; /* per-ag information structure */
1856 xfs_alloc_arg_t targs; /* local allocation arguments */
1857 xfs_trans_t *tp; /* transaction pointer */
1858
1859 mp = args->mp;
1860
1861 pag = args->pag;
1862 tp = args->tp;
1863 if (!pag->pagf_init) {
1864 if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags,
1865 &agbp)))
1866 return error;
1867 if (!pag->pagf_init) {
0e1edbd9
NS
1868 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
1869 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
1da177e4
LT
1870 args->agbp = NULL;
1871 return 0;
1872 }
1873 } else
1874 agbp = NULL;
1875
0e1edbd9
NS
1876 /*
1877 * If this is a metadata preferred pag and we are user data
1da177e4
LT
1878 * then try somewhere else if we are not being asked to
1879 * try harder at this point
1880 */
0e1edbd9
NS
1881 if (pag->pagf_metadata && args->userdata &&
1882 (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
1883 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
1da177e4
LT
1884 args->agbp = NULL;
1885 return 0;
1886 }
1887
0e1edbd9
NS
1888 if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
1889 need = XFS_MIN_FREELIST_PAG(pag, mp);
1890 delta = need > pag->pagf_flcount ? need - pag->pagf_flcount : 0;
1891 /*
1892 * If it looks like there isn't a long enough extent, or enough
1893 * total blocks, reject it.
1894 */
1895 longest = (pag->pagf_longest > delta) ?
1896 (pag->pagf_longest - delta) :
1897 (pag->pagf_flcount > 0 || pag->pagf_longest > 0);
1898 if ((args->minlen + args->alignment + args->minalignslop - 1) >
1899 longest ||
1900 ((int)(pag->pagf_freeblks + pag->pagf_flcount -
1901 need - args->total) < (int)args->minleft)) {
1902 if (agbp)
1903 xfs_trans_brelse(tp, agbp);
1904 args->agbp = NULL;
1905 return 0;
1906 }
1da177e4 1907 }
0e1edbd9 1908
1da177e4
LT
1909 /*
1910 * Get the a.g. freespace buffer.
1911 * Can fail if we're not blocking on locks, and it's held.
1912 */
1913 if (agbp == NULL) {
1914 if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags,
1915 &agbp)))
1916 return error;
1917 if (agbp == NULL) {
0e1edbd9
NS
1918 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
1919 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
1da177e4
LT
1920 args->agbp = NULL;
1921 return 0;
1922 }
1923 }
1924 /*
1925 * Figure out how many blocks we should have in the freelist.
1926 */
1927 agf = XFS_BUF_TO_AGF(agbp);
1928 need = XFS_MIN_FREELIST(agf, mp);
1da177e4
LT
1929 /*
1930 * If there isn't enough total or single-extent, reject it.
1931 */
0e1edbd9
NS
1932 if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
1933 delta = need > be32_to_cpu(agf->agf_flcount) ?
1934 (need - be32_to_cpu(agf->agf_flcount)) : 0;
1935 longest = be32_to_cpu(agf->agf_longest);
1936 longest = (longest > delta) ? (longest - delta) :
1937 (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0);
1938 if ((args->minlen + args->alignment + args->minalignslop - 1) >
1939 longest ||
1940 ((int)(be32_to_cpu(agf->agf_freeblks) +
1941 be32_to_cpu(agf->agf_flcount) - need - args->total) <
1942 (int)args->minleft)) {
1943 xfs_trans_brelse(tp, agbp);
1944 args->agbp = NULL;
1945 return 0;
1946 }
1da177e4
LT
1947 }
1948 /*
1949 * Make the freelist shorter if it's too long.
1950 */
16259e7d 1951 while (be32_to_cpu(agf->agf_flcount) > need) {
1da177e4
LT
1952 xfs_buf_t *bp;
1953
92821e2b
DC
1954 error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
1955 if (error)
1da177e4
LT
1956 return error;
1957 if ((error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1)))
1958 return error;
1959 bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
1960 xfs_trans_binval(tp, bp);
1961 }
1962 /*
1963 * Initialize the args structure.
1964 */
1965 targs.tp = tp;
1966 targs.mp = mp;
1967 targs.agbp = agbp;
1968 targs.agno = args->agno;
1969 targs.mod = targs.minleft = targs.wasdel = targs.userdata =
1970 targs.minalignslop = 0;
1971 targs.alignment = targs.minlen = targs.prod = targs.isfl = 1;
1972 targs.type = XFS_ALLOCTYPE_THIS_AG;
1973 targs.pag = pag;
1974 if ((error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp)))
1975 return error;
1976 /*
1977 * Make the freelist longer if it's too short.
1978 */
16259e7d 1979 while (be32_to_cpu(agf->agf_flcount) < need) {
1da177e4 1980 targs.agbno = 0;
16259e7d 1981 targs.maxlen = need - be32_to_cpu(agf->agf_flcount);
1da177e4
LT
1982 /*
1983 * Allocate as many blocks as possible at once.
1984 */
e63a3690
NS
1985 if ((error = xfs_alloc_ag_vextent(&targs))) {
1986 xfs_trans_brelse(tp, agflbp);
1da177e4 1987 return error;
e63a3690 1988 }
1da177e4
LT
1989 /*
1990 * Stop if we run out. Won't happen if callers are obeying
1991 * the restrictions correctly. Can happen for free calls
1992 * on a completely full ag.
1993 */
d210a28c 1994 if (targs.agbno == NULLAGBLOCK) {
0e1edbd9
NS
1995 if (flags & XFS_ALLOC_FLAG_FREEING)
1996 break;
1997 xfs_trans_brelse(tp, agflbp);
1998 args->agbp = NULL;
1999 return 0;
d210a28c 2000 }
1da177e4
LT
2001 /*
2002 * Put each allocated block on the list.
2003 */
2004 for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
92821e2b
DC
2005 error = xfs_alloc_put_freelist(tp, agbp,
2006 agflbp, bno, 0);
2007 if (error)
1da177e4
LT
2008 return error;
2009 }
2010 }
e63a3690 2011 xfs_trans_brelse(tp, agflbp);
1da177e4
LT
2012 args->agbp = agbp;
2013 return 0;
2014}
2015
2016/*
2017 * Get a block from the freelist.
2018 * Returns with the buffer for the block gotten.
2019 */
2020int /* error */
2021xfs_alloc_get_freelist(
2022 xfs_trans_t *tp, /* transaction pointer */
2023 xfs_buf_t *agbp, /* buffer containing the agf structure */
92821e2b
DC
2024 xfs_agblock_t *bnop, /* block address retrieved from freelist */
2025 int btreeblk) /* destination is a AGF btree */
1da177e4
LT
2026{
2027 xfs_agf_t *agf; /* a.g. freespace structure */
2028 xfs_agfl_t *agfl; /* a.g. freelist structure */
2029 xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */
2030 xfs_agblock_t bno; /* block number returned */
2031 int error;
92821e2b 2032 int logflags;
1da177e4
LT
2033 xfs_mount_t *mp; /* mount structure */
2034 xfs_perag_t *pag; /* per allocation group data */
2035
2036 agf = XFS_BUF_TO_AGF(agbp);
2037 /*
2038 * Freelist is empty, give up.
2039 */
2040 if (!agf->agf_flcount) {
2041 *bnop = NULLAGBLOCK;
2042 return 0;
2043 }
2044 /*
2045 * Read the array of free blocks.
2046 */
2047 mp = tp->t_mountp;
2048 if ((error = xfs_alloc_read_agfl(mp, tp,
16259e7d 2049 be32_to_cpu(agf->agf_seqno), &agflbp)))
1da177e4
LT
2050 return error;
2051 agfl = XFS_BUF_TO_AGFL(agflbp);
2052 /*
2053 * Get the block number and update the data structures.
2054 */
e2101005 2055 bno = be32_to_cpu(agfl->agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
413d57c9 2056 be32_add_cpu(&agf->agf_flfirst, 1);
1da177e4 2057 xfs_trans_brelse(tp, agflbp);
16259e7d 2058 if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
1da177e4 2059 agf->agf_flfirst = 0;
16259e7d 2060 pag = &mp->m_perag[be32_to_cpu(agf->agf_seqno)];
413d57c9 2061 be32_add_cpu(&agf->agf_flcount, -1);
1da177e4
LT
2062 xfs_trans_agflist_delta(tp, -1);
2063 pag->pagf_flcount--;
92821e2b
DC
2064
2065 logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
2066 if (btreeblk) {
413d57c9 2067 be32_add_cpu(&agf->agf_btreeblks, 1);
92821e2b
DC
2068 pag->pagf_btreeblks++;
2069 logflags |= XFS_AGF_BTREEBLKS;
2070 }
2071
2072 TRACE_MODAGF(NULL, agf, logflags);
2073 xfs_alloc_log_agf(tp, agbp, logflags);
1da177e4
LT
2074 *bnop = bno;
2075
2076 /*
2077 * As blocks are freed, they are added to the per-ag busy list
2078 * and remain there until the freeing transaction is committed to
2079 * disk. Now that we have allocated blocks, this list must be
2080 * searched to see if a block is being reused. If one is, then
2081 * the freeing transaction must be pushed to disk NOW by forcing
2082 * to disk all iclogs up that transaction's LSN.
2083 */
16259e7d 2084 xfs_alloc_search_busy(tp, be32_to_cpu(agf->agf_seqno), bno, 1);
1da177e4
LT
2085 return 0;
2086}
2087
2088/*
2089 * Log the given fields from the agf structure.
2090 */
2091void
2092xfs_alloc_log_agf(
2093 xfs_trans_t *tp, /* transaction pointer */
2094 xfs_buf_t *bp, /* buffer for a.g. freelist header */
2095 int fields) /* mask of fields to be logged (XFS_AGF_...) */
2096{
2097 int first; /* first byte offset */
2098 int last; /* last byte offset */
2099 static const short offsets[] = {
2100 offsetof(xfs_agf_t, agf_magicnum),
2101 offsetof(xfs_agf_t, agf_versionnum),
2102 offsetof(xfs_agf_t, agf_seqno),
2103 offsetof(xfs_agf_t, agf_length),
2104 offsetof(xfs_agf_t, agf_roots[0]),
2105 offsetof(xfs_agf_t, agf_levels[0]),
2106 offsetof(xfs_agf_t, agf_flfirst),
2107 offsetof(xfs_agf_t, agf_fllast),
2108 offsetof(xfs_agf_t, agf_flcount),
2109 offsetof(xfs_agf_t, agf_freeblks),
2110 offsetof(xfs_agf_t, agf_longest),
92821e2b 2111 offsetof(xfs_agf_t, agf_btreeblks),
1da177e4
LT
2112 sizeof(xfs_agf_t)
2113 };
2114
2115 xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
2116 xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
2117}
2118
2119/*
2120 * Interface for inode allocation to force the pag data to be initialized.
2121 */
2122int /* error */
2123xfs_alloc_pagf_init(
2124 xfs_mount_t *mp, /* file system mount structure */
2125 xfs_trans_t *tp, /* transaction pointer */
2126 xfs_agnumber_t agno, /* allocation group number */
2127 int flags) /* XFS_ALLOC_FLAGS_... */
2128{
2129 xfs_buf_t *bp;
2130 int error;
2131
2132 if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp)))
2133 return error;
2134 if (bp)
2135 xfs_trans_brelse(tp, bp);
2136 return 0;
2137}
2138
2139/*
2140 * Put the block on the freelist for the allocation group.
2141 */
2142int /* error */
2143xfs_alloc_put_freelist(
2144 xfs_trans_t *tp, /* transaction pointer */
2145 xfs_buf_t *agbp, /* buffer for a.g. freelist header */
2146 xfs_buf_t *agflbp,/* buffer for a.g. free block array */
92821e2b
DC
2147 xfs_agblock_t bno, /* block being freed */
2148 int btreeblk) /* block came from a AGF btree */
1da177e4
LT
2149{
2150 xfs_agf_t *agf; /* a.g. freespace structure */
2151 xfs_agfl_t *agfl; /* a.g. free block array */
e2101005 2152 __be32 *blockp;/* pointer to array entry */
1da177e4 2153 int error;
92821e2b 2154 int logflags;
1da177e4
LT
2155 xfs_mount_t *mp; /* mount structure */
2156 xfs_perag_t *pag; /* per allocation group data */
2157
2158 agf = XFS_BUF_TO_AGF(agbp);
2159 mp = tp->t_mountp;
2160
2161 if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
16259e7d 2162 be32_to_cpu(agf->agf_seqno), &agflbp)))
1da177e4
LT
2163 return error;
2164 agfl = XFS_BUF_TO_AGFL(agflbp);
413d57c9 2165 be32_add_cpu(&agf->agf_fllast, 1);
16259e7d 2166 if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp))
1da177e4 2167 agf->agf_fllast = 0;
16259e7d 2168 pag = &mp->m_perag[be32_to_cpu(agf->agf_seqno)];
413d57c9 2169 be32_add_cpu(&agf->agf_flcount, 1);
1da177e4
LT
2170 xfs_trans_agflist_delta(tp, 1);
2171 pag->pagf_flcount++;
92821e2b
DC
2172
2173 logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
2174 if (btreeblk) {
413d57c9 2175 be32_add_cpu(&agf->agf_btreeblks, -1);
92821e2b
DC
2176 pag->pagf_btreeblks--;
2177 logflags |= XFS_AGF_BTREEBLKS;
2178 }
2179
2180 TRACE_MODAGF(NULL, agf, logflags);
2181 xfs_alloc_log_agf(tp, agbp, logflags);
2182
16259e7d
CH
2183 ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
2184 blockp = &agfl->agfl_bno[be32_to_cpu(agf->agf_fllast)];
e2101005 2185 *blockp = cpu_to_be32(bno);
92821e2b
DC
2186 TRACE_MODAGF(NULL, agf, logflags);
2187 xfs_alloc_log_agf(tp, agbp, logflags);
1da177e4
LT
2188 xfs_trans_log_buf(tp, agflbp,
2189 (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl),
2190 (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl +
2191 sizeof(xfs_agblock_t) - 1));
2192 return 0;
2193}
2194
2195/*
2196 * Read in the allocation group header (free/alloc section).
2197 */
2198int /* error */
2199xfs_alloc_read_agf(
2200 xfs_mount_t *mp, /* mount point structure */
2201 xfs_trans_t *tp, /* transaction pointer */
2202 xfs_agnumber_t agno, /* allocation group number */
2203 int flags, /* XFS_ALLOC_FLAG_... */
2204 xfs_buf_t **bpp) /* buffer for the ag freelist header */
2205{
2206 xfs_agf_t *agf; /* ag freelist header */
2207 int agf_ok; /* set if agf is consistent */
2208 xfs_buf_t *bp; /* return value */
2209 xfs_perag_t *pag; /* per allocation group data */
2210 int error;
2211
2212 ASSERT(agno != NULLAGNUMBER);
2213 error = xfs_trans_read_buf(
2214 mp, tp, mp->m_ddev_targp,
2215 XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
2216 XFS_FSS_TO_BB(mp, 1),
2217 (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XFS_BUF_TRYLOCK : 0U,
2218 &bp);
2219 if (error)
2220 return error;
2221 ASSERT(!bp || !XFS_BUF_GETERROR(bp));
2222 if (!bp) {
2223 *bpp = NULL;
2224 return 0;
2225 }
2226 /*
2227 * Validate the magic number of the agf block.
2228 */
2229 agf = XFS_BUF_TO_AGF(bp);
2230 agf_ok =
16259e7d
CH
2231 be32_to_cpu(agf->agf_magicnum) == XFS_AGF_MAGIC &&
2232 XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
2233 be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
2234 be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
2235 be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
2236 be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp);
1da177e4
LT
2237 if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF,
2238 XFS_RANDOM_ALLOC_READ_AGF))) {
2239 XFS_CORRUPTION_ERROR("xfs_alloc_read_agf",
2240 XFS_ERRLEVEL_LOW, mp, agf);
2241 xfs_trans_brelse(tp, bp);
2242 return XFS_ERROR(EFSCORRUPTED);
2243 }
2244 pag = &mp->m_perag[agno];
2245 if (!pag->pagf_init) {
16259e7d 2246 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
92821e2b 2247 pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
16259e7d
CH
2248 pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
2249 pag->pagf_longest = be32_to_cpu(agf->agf_longest);
1da177e4 2250 pag->pagf_levels[XFS_BTNUM_BNOi] =
16259e7d 2251 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
1da177e4 2252 pag->pagf_levels[XFS_BTNUM_CNTi] =
16259e7d 2253 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
007c61c6 2254 spin_lock_init(&pag->pagb_lock);
1da177e4
LT
2255 pag->pagb_list = kmem_zalloc(XFS_PAGB_NUM_SLOTS *
2256 sizeof(xfs_perag_busy_t), KM_SLEEP);
2257 pag->pagf_init = 1;
2258 }
2259#ifdef DEBUG
2260 else if (!XFS_FORCED_SHUTDOWN(mp)) {
16259e7d
CH
2261 ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
2262 ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
2263 ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
1da177e4 2264 ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
16259e7d 2265 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
1da177e4 2266 ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
16259e7d 2267 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
1da177e4
LT
2268 }
2269#endif
2270 XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGF, XFS_AGF_REF);
2271 *bpp = bp;
2272 return 0;
2273}
2274
2275/*
2276 * Allocate an extent (variable-size).
2277 * Depending on the allocation type, we either look in a single allocation
2278 * group or loop over the allocation groups to find the result.
2279 */
2280int /* error */
2281xfs_alloc_vextent(
2282 xfs_alloc_arg_t *args) /* allocation argument structure */
2283{
2284 xfs_agblock_t agsize; /* allocation group size */
2285 int error;
2286 int flags; /* XFS_ALLOC_FLAG_... locking flags */
1da177e4
LT
2287 xfs_extlen_t minleft;/* minimum left value, temp copy */
2288 xfs_mount_t *mp; /* mount structure pointer */
2289 xfs_agnumber_t sagno; /* starting allocation group number */
2290 xfs_alloctype_t type; /* input allocation type */
2291 int bump_rotor = 0;
2292 int no_min = 0;
2293 xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */
2294
2295 mp = args->mp;
2296 type = args->otype = args->type;
2297 args->agbno = NULLAGBLOCK;
2298 /*
2299 * Just fix this up, for the case where the last a.g. is shorter
2300 * (or there's only one a.g.) and the caller couldn't easily figure
2301 * that out (xfs_bmap_alloc).
2302 */
2303 agsize = mp->m_sb.sb_agblocks;
2304 if (args->maxlen > agsize)
2305 args->maxlen = agsize;
2306 if (args->alignment == 0)
2307 args->alignment = 1;
2308 ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount);
2309 ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize);
2310 ASSERT(args->minlen <= args->maxlen);
2311 ASSERT(args->minlen <= agsize);
2312 ASSERT(args->mod < args->prod);
2313 if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount ||
2314 XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize ||
2315 args->minlen > args->maxlen || args->minlen > agsize ||
2316 args->mod >= args->prod) {
2317 args->fsbno = NULLFSBLOCK;
2318 TRACE_ALLOC("badargs", args);
2319 return 0;
2320 }
2321 minleft = args->minleft;
2322
2323 switch (type) {
2324 case XFS_ALLOCTYPE_THIS_AG:
2325 case XFS_ALLOCTYPE_NEAR_BNO:
2326 case XFS_ALLOCTYPE_THIS_BNO:
2327 /*
2328 * These three force us into a single a.g.
2329 */
2330 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2331 down_read(&mp->m_peraglock);
2332 args->pag = &mp->m_perag[args->agno];
2333 args->minleft = 0;
2334 error = xfs_alloc_fix_freelist(args, 0);
2335 args->minleft = minleft;
2336 if (error) {
2337 TRACE_ALLOC("nofix", args);
2338 goto error0;
2339 }
2340 if (!args->agbp) {
2341 up_read(&mp->m_peraglock);
2342 TRACE_ALLOC("noagbp", args);
2343 break;
2344 }
2345 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2346 if ((error = xfs_alloc_ag_vextent(args)))
2347 goto error0;
2348 up_read(&mp->m_peraglock);
2349 break;
2350 case XFS_ALLOCTYPE_START_BNO:
2351 /*
2352 * Try near allocation first, then anywhere-in-ag after
2353 * the first a.g. fails.
2354 */
2355 if ((args->userdata == XFS_ALLOC_INITIAL_USER_DATA) &&
2356 (mp->m_flags & XFS_MOUNT_32BITINODES)) {
2357 args->fsbno = XFS_AGB_TO_FSB(mp,
2358 ((mp->m_agfrotor / rotorstep) %
2359 mp->m_sb.sb_agcount), 0);
2360 bump_rotor = 1;
2361 }
2362 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2363 args->type = XFS_ALLOCTYPE_NEAR_BNO;
2364 /* FALLTHROUGH */
2365 case XFS_ALLOCTYPE_ANY_AG:
2366 case XFS_ALLOCTYPE_START_AG:
2367 case XFS_ALLOCTYPE_FIRST_AG:
2368 /*
2369 * Rotate through the allocation groups looking for a winner.
2370 */
2371 if (type == XFS_ALLOCTYPE_ANY_AG) {
2372 /*
2373 * Start with the last place we left off.
2374 */
2375 args->agno = sagno = (mp->m_agfrotor / rotorstep) %
2376 mp->m_sb.sb_agcount;
2377 args->type = XFS_ALLOCTYPE_THIS_AG;
2378 flags = XFS_ALLOC_FLAG_TRYLOCK;
2379 } else if (type == XFS_ALLOCTYPE_FIRST_AG) {
2380 /*
2381 * Start with allocation group given by bno.
2382 */
2383 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2384 args->type = XFS_ALLOCTYPE_THIS_AG;
2385 sagno = 0;
2386 flags = 0;
2387 } else {
2388 if (type == XFS_ALLOCTYPE_START_AG)
2389 args->type = XFS_ALLOCTYPE_THIS_AG;
2390 /*
2391 * Start with the given allocation group.
2392 */
2393 args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2394 flags = XFS_ALLOC_FLAG_TRYLOCK;
2395 }
2396 /*
2397 * Loop over allocation groups twice; first time with
2398 * trylock set, second time without.
2399 */
2400 down_read(&mp->m_peraglock);
2401 for (;;) {
2402 args->pag = &mp->m_perag[args->agno];
2403 if (no_min) args->minleft = 0;
2404 error = xfs_alloc_fix_freelist(args, flags);
2405 args->minleft = minleft;
2406 if (error) {
2407 TRACE_ALLOC("nofix", args);
2408 goto error0;
2409 }
2410 /*
2411 * If we get a buffer back then the allocation will fly.
2412 */
2413 if (args->agbp) {
2414 if ((error = xfs_alloc_ag_vextent(args)))
2415 goto error0;
2416 break;
2417 }
2418 TRACE_ALLOC("loopfailed", args);
2419 /*
2420 * Didn't work, figure out the next iteration.
2421 */
2422 if (args->agno == sagno &&
2423 type == XFS_ALLOCTYPE_START_BNO)
2424 args->type = XFS_ALLOCTYPE_THIS_AG;
d210a28c
YL
2425 /*
2426 * For the first allocation, we can try any AG to get
2427 * space. However, if we already have allocated a
2428 * block, we don't want to try AGs whose number is below
2429 * sagno. Otherwise, we may end up with out-of-order
2430 * locking of AGF, which might cause deadlock.
2431 */
2432 if (++(args->agno) == mp->m_sb.sb_agcount) {
2433 if (args->firstblock != NULLFSBLOCK)
2434 args->agno = sagno;
2435 else
2436 args->agno = 0;
2437 }
1da177e4
LT
2438 /*
2439 * Reached the starting a.g., must either be done
2440 * or switch to non-trylock mode.
2441 */
2442 if (args->agno == sagno) {
2443 if (no_min == 1) {
2444 args->agbno = NULLAGBLOCK;
2445 TRACE_ALLOC("allfailed", args);
2446 break;
2447 }
2448 if (flags == 0) {
2449 no_min = 1;
2450 } else {
2451 flags = 0;
2452 if (type == XFS_ALLOCTYPE_START_BNO) {
2453 args->agbno = XFS_FSB_TO_AGBNO(mp,
2454 args->fsbno);
2455 args->type = XFS_ALLOCTYPE_NEAR_BNO;
2456 }
2457 }
2458 }
2459 }
2460 up_read(&mp->m_peraglock);
2461 if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) {
2462 if (args->agno == sagno)
2463 mp->m_agfrotor = (mp->m_agfrotor + 1) %
2464 (mp->m_sb.sb_agcount * rotorstep);
2465 else
2466 mp->m_agfrotor = (args->agno * rotorstep + 1) %
2467 (mp->m_sb.sb_agcount * rotorstep);
2468 }
2469 break;
2470 default:
2471 ASSERT(0);
2472 /* NOTREACHED */
2473 }
2474 if (args->agbno == NULLAGBLOCK)
2475 args->fsbno = NULLFSBLOCK;
2476 else {
2477 args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
2478#ifdef DEBUG
2479 ASSERT(args->len >= args->minlen);
2480 ASSERT(args->len <= args->maxlen);
2481 ASSERT(args->agbno % args->alignment == 0);
2482 XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
2483 args->len);
2484#endif
2485 }
2486 return 0;
2487error0:
2488 up_read(&mp->m_peraglock);
2489 return error;
2490}
2491
2492/*
2493 * Free an extent.
2494 * Just break up the extent address and hand off to xfs_free_ag_extent
2495 * after fixing up the freelist.
2496 */
2497int /* error */
2498xfs_free_extent(
2499 xfs_trans_t *tp, /* transaction pointer */
2500 xfs_fsblock_t bno, /* starting block number of extent */
2501 xfs_extlen_t len) /* length of extent */
2502{
0e1edbd9 2503 xfs_alloc_arg_t args;
1da177e4
LT
2504 int error;
2505
2506 ASSERT(len != 0);
0e1edbd9 2507 memset(&args, 0, sizeof(xfs_alloc_arg_t));
1da177e4
LT
2508 args.tp = tp;
2509 args.mp = tp->t_mountp;
2510 args.agno = XFS_FSB_TO_AGNO(args.mp, bno);
2511 ASSERT(args.agno < args.mp->m_sb.sb_agcount);
2512 args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno);
1da177e4
LT
2513 down_read(&args.mp->m_peraglock);
2514 args.pag = &args.mp->m_perag[args.agno];
d210a28c 2515 if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING)))
1da177e4
LT
2516 goto error0;
2517#ifdef DEBUG
2518 ASSERT(args.agbp != NULL);
0e1edbd9
NS
2519 ASSERT((args.agbno + len) <=
2520 be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length));
1da177e4 2521#endif
0e1edbd9 2522 error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
1da177e4
LT
2523error0:
2524 up_read(&args.mp->m_peraglock);
2525 return error;
2526}
2527
2528
2529/*
2530 * AG Busy list management
2531 * The busy list contains block ranges that have been freed but whose
c41564b5 2532 * transactions have not yet hit disk. If any block listed in a busy
1da177e4
LT
2533 * list is reused, the transaction that freed it must be forced to disk
2534 * before continuing to use the block.
2535 *
2536 * xfs_alloc_mark_busy - add to the per-ag busy list
2537 * xfs_alloc_clear_busy - remove an item from the per-ag busy list
2538 */
2539void
2540xfs_alloc_mark_busy(xfs_trans_t *tp,
2541 xfs_agnumber_t agno,
2542 xfs_agblock_t bno,
2543 xfs_extlen_t len)
2544{
2545 xfs_mount_t *mp;
2546 xfs_perag_busy_t *bsy;
2547 int n;
1da177e4
LT
2548
2549 mp = tp->t_mountp;
64137e56 2550 spin_lock(&mp->m_perag[agno].pagb_lock);
1da177e4
LT
2551
2552 /* search pagb_list for an open slot */
2553 for (bsy = mp->m_perag[agno].pagb_list, n = 0;
2554 n < XFS_PAGB_NUM_SLOTS;
2555 bsy++, n++) {
2556 if (bsy->busy_tp == NULL) {
2557 break;
2558 }
2559 }
2560
2561 if (n < XFS_PAGB_NUM_SLOTS) {
2562 bsy = &mp->m_perag[agno].pagb_list[n];
2563 mp->m_perag[agno].pagb_count++;
2564 TRACE_BUSY("xfs_alloc_mark_busy", "got", agno, bno, len, n, tp);
2565 bsy->busy_start = bno;
2566 bsy->busy_length = len;
2567 bsy->busy_tp = tp;
2568 xfs_trans_add_busy(tp, agno, n);
2569 } else {
2570 TRACE_BUSY("xfs_alloc_mark_busy", "FULL", agno, bno, len, -1, tp);
2571 /*
2572 * The busy list is full! Since it is now not possible to
2573 * track the free block, make this a synchronous transaction
2574 * to insure that the block is not reused before this
2575 * transaction commits.
2576 */
2577 xfs_trans_set_sync(tp);
2578 }
2579
64137e56 2580 spin_unlock(&mp->m_perag[agno].pagb_lock);
1da177e4
LT
2581}
2582
2583void
2584xfs_alloc_clear_busy(xfs_trans_t *tp,
2585 xfs_agnumber_t agno,
2586 int idx)
2587{
2588 xfs_mount_t *mp;
2589 xfs_perag_busy_t *list;
1da177e4
LT
2590
2591 mp = tp->t_mountp;
2592
64137e56 2593 spin_lock(&mp->m_perag[agno].pagb_lock);
1da177e4
LT
2594 list = mp->m_perag[agno].pagb_list;
2595
2596 ASSERT(idx < XFS_PAGB_NUM_SLOTS);
2597 if (list[idx].busy_tp == tp) {
2598 TRACE_UNBUSY("xfs_alloc_clear_busy", "found", agno, idx, tp);
2599 list[idx].busy_tp = NULL;
2600 mp->m_perag[agno].pagb_count--;
2601 } else {
2602 TRACE_UNBUSY("xfs_alloc_clear_busy", "missing", agno, idx, tp);
2603 }
2604
64137e56 2605 spin_unlock(&mp->m_perag[agno].pagb_lock);
1da177e4
LT
2606}
2607
2608
2609/*
f4586e40
DC
2610 * If we find the extent in the busy list, force the log out to get the
2611 * extent out of the busy list so the caller can use it straight away.
1da177e4 2612 */
f4586e40 2613STATIC void
1da177e4
LT
2614xfs_alloc_search_busy(xfs_trans_t *tp,
2615 xfs_agnumber_t agno,
2616 xfs_agblock_t bno,
2617 xfs_extlen_t len)
2618{
2619 xfs_mount_t *mp;
2620 xfs_perag_busy_t *bsy;
1da177e4
LT
2621 xfs_agblock_t uend, bend;
2622 xfs_lsn_t lsn;
2623 int cnt;
1da177e4
LT
2624
2625 mp = tp->t_mountp;
2626
64137e56 2627 spin_lock(&mp->m_perag[agno].pagb_lock);
1da177e4
LT
2628 cnt = mp->m_perag[agno].pagb_count;
2629
2630 uend = bno + len - 1;
2631
2632 /* search pagb_list for this slot, skipping open slots */
f4586e40 2633 for (bsy = mp->m_perag[agno].pagb_list; cnt; bsy++) {
1da177e4
LT
2634
2635 /*
2636 * (start1,length1) within (start2, length2)
2637 */
2638 if (bsy->busy_tp != NULL) {
2639 bend = bsy->busy_start + bsy->busy_length - 1;
f4586e40 2640 if ((bno > bend) || (uend < bsy->busy_start)) {
1da177e4
LT
2641 cnt--;
2642 } else {
2643 TRACE_BUSYSEARCH("xfs_alloc_search_busy",
f4586e40 2644 "found1", agno, bno, len, tp);
1da177e4
LT
2645 break;
2646 }
2647 }
2648 }
2649
2650 /*
2651 * If a block was found, force the log through the LSN of the
2652 * transaction that freed the block
2653 */
2654 if (cnt) {
f4586e40 2655 TRACE_BUSYSEARCH("xfs_alloc_search_busy", "found", agno, bno, len, tp);
1da177e4 2656 lsn = bsy->busy_tp->t_commit_lsn;
64137e56 2657 spin_unlock(&mp->m_perag[agno].pagb_lock);
1da177e4
LT
2658 xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC);
2659 } else {
f4586e40 2660 TRACE_BUSYSEARCH("xfs_alloc_search_busy", "not-found", agno, bno, len, tp);
64137e56 2661 spin_unlock(&mp->m_perag[agno].pagb_lock);
1da177e4 2662 }
1da177e4 2663}