]>
Commit | Line | Data |
---|---|---|
b16817b6 DC |
1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /* | |
3 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. | |
4 | * Copyright (c) 2018 Red Hat, Inc. | |
5 | * All rights reserved. | |
6 | */ | |
7 | ||
8 | #include "xfs.h" | |
9 | #include "xfs_fs.h" | |
10 | #include "xfs_shared.h" | |
11 | #include "xfs_format.h" | |
12 | #include "xfs_trans_resv.h" | |
f327a007 | 13 | #include "xfs_bit.h" |
b16817b6 DC |
14 | #include "xfs_sb.h" |
15 | #include "xfs_mount.h" | |
16 | #include "xfs_btree.h" | |
17 | #include "xfs_alloc_btree.h" | |
18 | #include "xfs_rmap_btree.h" | |
19 | #include "xfs_alloc.h" | |
49dd56f2 | 20 | #include "xfs_ialloc.h" |
b16817b6 DC |
21 | #include "xfs_rmap.h" |
22 | #include "xfs_ag.h" | |
7cd5006b | 23 | #include "xfs_ag_resv.h" |
1302c6a2 | 24 | #include "xfs_health.h" |
46141dc8 GX |
25 | #include "xfs_error.h" |
26 | #include "xfs_bmap.h" | |
27 | #include "xfs_defer.h" | |
28 | #include "xfs_log_format.h" | |
29 | #include "xfs_trans.h" | |
9bbafc71 | 30 | #include "xfs_trace.h" |
07b6403a DC |
31 | #include "xfs_inode.h" |
32 | #include "xfs_icache.h" | |
33 | ||
9bbafc71 DC |
34 | |
35 | /* | |
36 | * Passive reference counting access wrappers to the perag structures. If the | |
37 | * per-ag structure is to be freed, the freeing code is responsible for cleaning | |
38 | * up objects with passive references before freeing the structure. This is | |
39 | * things like cached buffers. | |
40 | */ | |
41 | struct xfs_perag * | |
42 | xfs_perag_get( | |
43 | struct xfs_mount *mp, | |
44 | xfs_agnumber_t agno) | |
45 | { | |
46 | struct xfs_perag *pag; | |
47 | int ref = 0; | |
48 | ||
49 | rcu_read_lock(); | |
50 | pag = radix_tree_lookup(&mp->m_perag_tree, agno); | |
51 | if (pag) { | |
52 | ASSERT(atomic_read(&pag->pag_ref) >= 0); | |
53 | ref = atomic_inc_return(&pag->pag_ref); | |
54 | } | |
55 | rcu_read_unlock(); | |
56 | trace_xfs_perag_get(mp, agno, ref, _RET_IP_); | |
57 | return pag; | |
58 | } | |
59 | ||
60 | /* | |
61 | * search from @first to find the next perag with the given tag set. | |
62 | */ | |
63 | struct xfs_perag * | |
64 | xfs_perag_get_tag( | |
65 | struct xfs_mount *mp, | |
66 | xfs_agnumber_t first, | |
ffc18582 | 67 | unsigned int tag) |
9bbafc71 DC |
68 | { |
69 | struct xfs_perag *pag; | |
70 | int found; | |
71 | int ref; | |
72 | ||
73 | rcu_read_lock(); | |
74 | found = radix_tree_gang_lookup_tag(&mp->m_perag_tree, | |
75 | (void **)&pag, first, 1, tag); | |
76 | if (found <= 0) { | |
77 | rcu_read_unlock(); | |
78 | return NULL; | |
79 | } | |
80 | ref = atomic_inc_return(&pag->pag_ref); | |
81 | rcu_read_unlock(); | |
82 | trace_xfs_perag_get_tag(mp, pag->pag_agno, ref, _RET_IP_); | |
83 | return pag; | |
84 | } | |
85 | ||
86 | void | |
87 | xfs_perag_put( | |
88 | struct xfs_perag *pag) | |
89 | { | |
90 | int ref; | |
91 | ||
92 | ASSERT(atomic_read(&pag->pag_ref) > 0); | |
93 | ref = atomic_dec_return(&pag->pag_ref); | |
94 | trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_); | |
95 | } | |
96 | ||
97 | /* | |
98 | * xfs_initialize_perag_data | |
99 | * | |
100 | * Read in each per-ag structure so we can count up the number of | |
101 | * allocated inodes, free inodes and used filesystem blocks as this | |
102 | * information is no longer persistent in the superblock. Once we have | |
103 | * this information, write it into the in-core superblock structure. | |
104 | */ | |
105 | int | |
106 | xfs_initialize_perag_data( | |
50920116 DC |
107 | struct xfs_mount *mp, |
108 | xfs_agnumber_t agcount) | |
9bbafc71 | 109 | { |
50920116 DC |
110 | xfs_agnumber_t index; |
111 | struct xfs_perag *pag; | |
112 | struct xfs_sb *sbp = &mp->m_sb; | |
113 | uint64_t ifree = 0; | |
114 | uint64_t ialloc = 0; | |
115 | uint64_t bfree = 0; | |
116 | uint64_t bfreelst = 0; | |
117 | uint64_t btree = 0; | |
118 | uint64_t fdblocks; | |
119 | int error = 0; | |
9bbafc71 DC |
120 | |
121 | for (index = 0; index < agcount; index++) { | |
122 | /* | |
123 | * read the agf, then the agi. This gets us | |
124 | * all the information we need and populates the | |
125 | * per-ag structures for us. | |
126 | */ | |
127 | error = xfs_alloc_pagf_init(mp, NULL, index, 0); | |
128 | if (error) | |
129 | return error; | |
130 | ||
131 | error = xfs_ialloc_pagi_init(mp, NULL, index); | |
132 | if (error) | |
133 | return error; | |
134 | pag = xfs_perag_get(mp, index); | |
135 | ifree += pag->pagi_freecount; | |
136 | ialloc += pag->pagi_count; | |
137 | bfree += pag->pagf_freeblks; | |
138 | bfreelst += pag->pagf_flcount; | |
139 | btree += pag->pagf_btreeblks; | |
140 | xfs_perag_put(pag); | |
141 | } | |
142 | fdblocks = bfree + bfreelst + btree; | |
143 | ||
144 | /* | |
145 | * If the new summary counts are obviously incorrect, fail the | |
146 | * mount operation because that implies the AGFs are also corrupt. | |
147 | * Clear FS_COUNTERS so that we don't unmount with a dirty log, which | |
148 | * will prevent xfs_repair from fixing anything. | |
149 | */ | |
150 | if (fdblocks > sbp->sb_dblocks || ifree > ialloc) { | |
151 | xfs_alert(mp, "AGF corruption. Please run xfs_repair."); | |
152 | error = -EFSCORRUPTED; | |
153 | goto out; | |
154 | } | |
155 | ||
156 | /* Overwrite incore superblock counters with just-read data */ | |
157 | spin_lock(&mp->m_sb_lock); | |
158 | sbp->sb_ifree = ifree; | |
159 | sbp->sb_icount = ialloc; | |
160 | sbp->sb_fdblocks = fdblocks; | |
161 | spin_unlock(&mp->m_sb_lock); | |
162 | ||
163 | xfs_reinit_percpu_counters(mp); | |
164 | out: | |
165 | xfs_fs_mark_healthy(mp, XFS_SICK_FS_COUNTERS); | |
166 | return error; | |
167 | } | |
b16817b6 | 168 | |
07b6403a DC |
169 | STATIC void |
170 | __xfs_free_perag( | |
171 | struct rcu_head *head) | |
172 | { | |
173 | struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head); | |
174 | ||
175 | ASSERT(!delayed_work_pending(&pag->pag_blockgc_work)); | |
176 | ASSERT(atomic_read(&pag->pag_ref) == 0); | |
177 | kmem_free(pag); | |
178 | } | |
179 | ||
180 | /* | |
181 | * Free up the per-ag resources associated with the mount structure. | |
182 | */ | |
183 | void | |
184 | xfs_free_perag( | |
185 | struct xfs_mount *mp) | |
186 | { | |
187 | struct xfs_perag *pag; | |
188 | xfs_agnumber_t agno; | |
189 | ||
190 | for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { | |
191 | spin_lock(&mp->m_perag_lock); | |
192 | pag = radix_tree_delete(&mp->m_perag_tree, agno); | |
193 | spin_unlock(&mp->m_perag_lock); | |
194 | ASSERT(pag); | |
195 | ASSERT(atomic_read(&pag->pag_ref) == 0); | |
196 | ||
197 | cancel_delayed_work_sync(&pag->pag_blockgc_work); | |
198 | xfs_iunlink_destroy(pag); | |
199 | xfs_buf_hash_destroy(pag); | |
200 | ||
201 | call_rcu(&pag->rcu_head, __xfs_free_perag); | |
202 | } | |
203 | } | |
204 | ||
205 | int | |
206 | xfs_initialize_perag( | |
207 | struct xfs_mount *mp, | |
208 | xfs_agnumber_t agcount, | |
209 | xfs_agnumber_t *maxagi) | |
210 | { | |
211 | struct xfs_perag *pag; | |
212 | xfs_agnumber_t index; | |
213 | xfs_agnumber_t first_initialised = NULLAGNUMBER; | |
214 | int error; | |
215 | ||
216 | /* | |
217 | * Walk the current per-ag tree so we don't try to initialise AGs | |
218 | * that already exist (growfs case). Allocate and insert all the | |
219 | * AGs we don't find ready for initialisation. | |
220 | */ | |
221 | for (index = 0; index < agcount; index++) { | |
222 | pag = xfs_perag_get(mp, index); | |
223 | if (pag) { | |
224 | xfs_perag_put(pag); | |
225 | continue; | |
226 | } | |
227 | ||
228 | pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL); | |
229 | if (!pag) { | |
230 | error = -ENOMEM; | |
231 | goto out_unwind_new_pags; | |
232 | } | |
233 | pag->pag_agno = index; | |
234 | pag->pag_mount = mp; | |
235 | ||
236 | error = radix_tree_preload(GFP_NOFS); | |
237 | if (error) | |
238 | goto out_free_pag; | |
239 | ||
240 | spin_lock(&mp->m_perag_lock); | |
241 | if (radix_tree_insert(&mp->m_perag_tree, index, pag)) { | |
242 | WARN_ON_ONCE(1); | |
243 | spin_unlock(&mp->m_perag_lock); | |
244 | radix_tree_preload_end(); | |
245 | error = -EEXIST; | |
246 | goto out_free_pag; | |
247 | } | |
248 | spin_unlock(&mp->m_perag_lock); | |
249 | radix_tree_preload_end(); | |
250 | ||
251 | /* Place kernel structure only init below this point. */ | |
252 | spin_lock_init(&pag->pag_ici_lock); | |
253 | spin_lock_init(&pag->pagb_lock); | |
254 | spin_lock_init(&pag->pag_state_lock); | |
255 | INIT_DELAYED_WORK(&pag->pag_blockgc_work, xfs_blockgc_worker); | |
256 | INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); | |
257 | init_waitqueue_head(&pag->pagb_wait); | |
258 | pag->pagb_count = 0; | |
259 | pag->pagb_tree = RB_ROOT; | |
260 | ||
261 | error = xfs_buf_hash_init(pag); | |
262 | if (error) | |
263 | goto out_remove_pag; | |
264 | ||
265 | error = xfs_iunlink_init(pag); | |
266 | if (error) | |
267 | goto out_hash_destroy; | |
268 | ||
269 | /* first new pag is fully initialized */ | |
270 | if (first_initialised == NULLAGNUMBER) | |
271 | first_initialised = index; | |
272 | } | |
273 | ||
274 | index = xfs_set_inode_alloc(mp, agcount); | |
275 | ||
276 | if (maxagi) | |
277 | *maxagi = index; | |
278 | ||
279 | mp->m_ag_prealloc_blocks = xfs_prealloc_blocks(mp); | |
280 | return 0; | |
281 | ||
282 | out_hash_destroy: | |
283 | xfs_buf_hash_destroy(pag); | |
284 | out_remove_pag: | |
285 | radix_tree_delete(&mp->m_perag_tree, index); | |
286 | out_free_pag: | |
287 | kmem_free(pag); | |
288 | out_unwind_new_pags: | |
289 | /* unwind any prior newly initialized pags */ | |
290 | for (index = first_initialised; index < agcount; index++) { | |
291 | pag = radix_tree_delete(&mp->m_perag_tree, index); | |
292 | if (!pag) | |
293 | break; | |
294 | xfs_buf_hash_destroy(pag); | |
295 | xfs_iunlink_destroy(pag); | |
296 | kmem_free(pag); | |
297 | } | |
298 | return error; | |
299 | } | |
b16817b6 | 300 | |
2842b6db | 301 | static int |
b16817b6 DC |
302 | xfs_get_aghdr_buf( |
303 | struct xfs_mount *mp, | |
304 | xfs_daddr_t blkno, | |
305 | size_t numblks, | |
2842b6db | 306 | struct xfs_buf **bpp, |
b16817b6 DC |
307 | const struct xfs_buf_ops *ops) |
308 | { | |
309 | struct xfs_buf *bp; | |
2842b6db | 310 | int error; |
b16817b6 | 311 | |
2842b6db DW |
312 | error = xfs_buf_get_uncached(mp->m_ddev_targp, numblks, 0, &bp); |
313 | if (error) | |
314 | return error; | |
b16817b6 | 315 | |
b16817b6 DC |
316 | bp->b_maps[0].bm_bn = blkno; |
317 | bp->b_ops = ops; | |
318 | ||
2842b6db DW |
319 | *bpp = bp; |
320 | return 0; | |
b16817b6 DC |
321 | } |
322 | ||
f327a007 DW |
323 | static inline bool is_log_ag(struct xfs_mount *mp, struct aghdr_init_data *id) |
324 | { | |
325 | return mp->m_sb.sb_logstart > 0 && | |
326 | id->agno == XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart); | |
327 | } | |
328 | ||
b16817b6 DC |
329 | /* |
330 | * Generic btree root block init function | |
331 | */ | |
332 | static void | |
333 | xfs_btroot_init( | |
334 | struct xfs_mount *mp, | |
335 | struct xfs_buf *bp, | |
336 | struct aghdr_init_data *id) | |
337 | { | |
f5b999c0 | 338 | xfs_btree_init_block(mp, bp, id->type, 0, 0, id->agno); |
b16817b6 DC |
339 | } |
340 | ||
8d90857c | 341 | /* Finish initializing a free space btree. */ |
b16817b6 | 342 | static void |
8d90857c | 343 | xfs_freesp_init_recs( |
b16817b6 DC |
344 | struct xfs_mount *mp, |
345 | struct xfs_buf *bp, | |
346 | struct aghdr_init_data *id) | |
347 | { | |
348 | struct xfs_alloc_rec *arec; | |
f327a007 | 349 | struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); |
b16817b6 | 350 | |
b16817b6 DC |
351 | arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1); |
352 | arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks); | |
f327a007 DW |
353 | |
354 | if (is_log_ag(mp, id)) { | |
355 | struct xfs_alloc_rec *nrec; | |
356 | xfs_agblock_t start = XFS_FSB_TO_AGBNO(mp, | |
357 | mp->m_sb.sb_logstart); | |
358 | ||
359 | ASSERT(start >= mp->m_ag_prealloc_blocks); | |
360 | if (start != mp->m_ag_prealloc_blocks) { | |
361 | /* | |
362 | * Modify first record to pad stripe align of log | |
363 | */ | |
364 | arec->ar_blockcount = cpu_to_be32(start - | |
365 | mp->m_ag_prealloc_blocks); | |
366 | nrec = arec + 1; | |
367 | ||
368 | /* | |
369 | * Insert second record at start of internal log | |
370 | * which then gets trimmed. | |
371 | */ | |
372 | nrec->ar_startblock = cpu_to_be32( | |
373 | be32_to_cpu(arec->ar_startblock) + | |
374 | be32_to_cpu(arec->ar_blockcount)); | |
375 | arec = nrec; | |
376 | be16_add_cpu(&block->bb_numrecs, 1); | |
377 | } | |
378 | /* | |
379 | * Change record start to after the internal log | |
380 | */ | |
381 | be32_add_cpu(&arec->ar_startblock, mp->m_sb.sb_logblocks); | |
382 | } | |
383 | ||
384 | /* | |
385 | * Calculate the record block count and check for the case where | |
386 | * the log might have consumed all available space in the AG. If | |
387 | * so, reset the record count to 0 to avoid exposure of an invalid | |
388 | * record start block. | |
389 | */ | |
b16817b6 DC |
390 | arec->ar_blockcount = cpu_to_be32(id->agsize - |
391 | be32_to_cpu(arec->ar_startblock)); | |
f327a007 DW |
392 | if (!arec->ar_blockcount) |
393 | block->bb_numrecs = 0; | |
b16817b6 DC |
394 | } |
395 | ||
8d90857c DW |
396 | /* |
397 | * Alloc btree root block init functions | |
398 | */ | |
b16817b6 | 399 | static void |
8d90857c | 400 | xfs_bnoroot_init( |
b16817b6 DC |
401 | struct xfs_mount *mp, |
402 | struct xfs_buf *bp, | |
403 | struct aghdr_init_data *id) | |
404 | { | |
8d90857c DW |
405 | xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, id->agno); |
406 | xfs_freesp_init_recs(mp, bp, id); | |
407 | } | |
b16817b6 | 408 | |
8d90857c DW |
409 | static void |
410 | xfs_cntroot_init( | |
411 | struct xfs_mount *mp, | |
412 | struct xfs_buf *bp, | |
413 | struct aghdr_init_data *id) | |
414 | { | |
f5b999c0 | 415 | xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, id->agno); |
8d90857c | 416 | xfs_freesp_init_recs(mp, bp, id); |
b16817b6 DC |
417 | } |
418 | ||
419 | /* | |
420 | * Reverse map root block init | |
421 | */ | |
422 | static void | |
423 | xfs_rmaproot_init( | |
424 | struct xfs_mount *mp, | |
425 | struct xfs_buf *bp, | |
426 | struct aghdr_init_data *id) | |
427 | { | |
428 | struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); | |
429 | struct xfs_rmap_rec *rrec; | |
430 | ||
f5b999c0 | 431 | xfs_btree_init_block(mp, bp, XFS_BTNUM_RMAP, 0, 4, id->agno); |
b16817b6 DC |
432 | |
433 | /* | |
434 | * mark the AG header regions as static metadata The BNO | |
435 | * btree block is the first block after the headers, so | |
436 | * it's location defines the size of region the static | |
437 | * metadata consumes. | |
438 | * | |
439 | * Note: unlike mkfs, we never have to account for log | |
440 | * space when growing the data regions | |
441 | */ | |
442 | rrec = XFS_RMAP_REC_ADDR(block, 1); | |
443 | rrec->rm_startblock = 0; | |
444 | rrec->rm_blockcount = cpu_to_be32(XFS_BNO_BLOCK(mp)); | |
445 | rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_FS); | |
446 | rrec->rm_offset = 0; | |
447 | ||
448 | /* account freespace btree root blocks */ | |
449 | rrec = XFS_RMAP_REC_ADDR(block, 2); | |
450 | rrec->rm_startblock = cpu_to_be32(XFS_BNO_BLOCK(mp)); | |
451 | rrec->rm_blockcount = cpu_to_be32(2); | |
452 | rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG); | |
453 | rrec->rm_offset = 0; | |
454 | ||
455 | /* account inode btree root blocks */ | |
456 | rrec = XFS_RMAP_REC_ADDR(block, 3); | |
457 | rrec->rm_startblock = cpu_to_be32(XFS_IBT_BLOCK(mp)); | |
458 | rrec->rm_blockcount = cpu_to_be32(XFS_RMAP_BLOCK(mp) - | |
459 | XFS_IBT_BLOCK(mp)); | |
460 | rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_INOBT); | |
461 | rrec->rm_offset = 0; | |
462 | ||
463 | /* account for rmap btree root */ | |
464 | rrec = XFS_RMAP_REC_ADDR(block, 4); | |
465 | rrec->rm_startblock = cpu_to_be32(XFS_RMAP_BLOCK(mp)); | |
466 | rrec->rm_blockcount = cpu_to_be32(1); | |
467 | rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG); | |
468 | rrec->rm_offset = 0; | |
469 | ||
470 | /* account for refc btree root */ | |
38c26bfd | 471 | if (xfs_has_reflink(mp)) { |
b16817b6 DC |
472 | rrec = XFS_RMAP_REC_ADDR(block, 5); |
473 | rrec->rm_startblock = cpu_to_be32(xfs_refc_block(mp)); | |
474 | rrec->rm_blockcount = cpu_to_be32(1); | |
475 | rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_REFC); | |
476 | rrec->rm_offset = 0; | |
477 | be16_add_cpu(&block->bb_numrecs, 1); | |
478 | } | |
f327a007 DW |
479 | |
480 | /* account for the log space */ | |
481 | if (is_log_ag(mp, id)) { | |
482 | rrec = XFS_RMAP_REC_ADDR(block, | |
483 | be16_to_cpu(block->bb_numrecs) + 1); | |
484 | rrec->rm_startblock = cpu_to_be32( | |
485 | XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart)); | |
486 | rrec->rm_blockcount = cpu_to_be32(mp->m_sb.sb_logblocks); | |
487 | rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_LOG); | |
488 | rrec->rm_offset = 0; | |
489 | be16_add_cpu(&block->bb_numrecs, 1); | |
490 | } | |
b16817b6 DC |
491 | } |
492 | ||
493 | /* | |
494 | * Initialise new secondary superblocks with the pre-grow geometry, but mark | |
495 | * them as "in progress" so we know they haven't yet been activated. This will | |
496 | * get cleared when the update with the new geometry information is done after | |
497 | * changes to the primary are committed. This isn't strictly necessary, but we | |
498 | * get it for free with the delayed buffer write lists and it means we can tell | |
499 | * if a grow operation didn't complete properly after the fact. | |
500 | */ | |
501 | static void | |
502 | xfs_sbblock_init( | |
503 | struct xfs_mount *mp, | |
504 | struct xfs_buf *bp, | |
505 | struct aghdr_init_data *id) | |
506 | { | |
3e6e8afd | 507 | struct xfs_dsb *dsb = bp->b_addr; |
b16817b6 DC |
508 | |
509 | xfs_sb_to_disk(dsb, &mp->m_sb); | |
510 | dsb->sb_inprogress = 1; | |
511 | } | |
512 | ||
513 | static void | |
514 | xfs_agfblock_init( | |
515 | struct xfs_mount *mp, | |
516 | struct xfs_buf *bp, | |
517 | struct aghdr_init_data *id) | |
518 | { | |
9798f615 | 519 | struct xfs_agf *agf = bp->b_addr; |
b16817b6 DC |
520 | xfs_extlen_t tmpsize; |
521 | ||
522 | agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC); | |
523 | agf->agf_versionnum = cpu_to_be32(XFS_AGF_VERSION); | |
524 | agf->agf_seqno = cpu_to_be32(id->agno); | |
525 | agf->agf_length = cpu_to_be32(id->agsize); | |
526 | agf->agf_roots[XFS_BTNUM_BNOi] = cpu_to_be32(XFS_BNO_BLOCK(mp)); | |
527 | agf->agf_roots[XFS_BTNUM_CNTi] = cpu_to_be32(XFS_CNT_BLOCK(mp)); | |
528 | agf->agf_levels[XFS_BTNUM_BNOi] = cpu_to_be32(1); | |
529 | agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(1); | |
38c26bfd | 530 | if (xfs_has_rmapbt(mp)) { |
b16817b6 DC |
531 | agf->agf_roots[XFS_BTNUM_RMAPi] = |
532 | cpu_to_be32(XFS_RMAP_BLOCK(mp)); | |
533 | agf->agf_levels[XFS_BTNUM_RMAPi] = cpu_to_be32(1); | |
534 | agf->agf_rmap_blocks = cpu_to_be32(1); | |
535 | } | |
536 | ||
537 | agf->agf_flfirst = cpu_to_be32(1); | |
538 | agf->agf_fllast = 0; | |
539 | agf->agf_flcount = 0; | |
540 | tmpsize = id->agsize - mp->m_ag_prealloc_blocks; | |
541 | agf->agf_freeblks = cpu_to_be32(tmpsize); | |
542 | agf->agf_longest = cpu_to_be32(tmpsize); | |
38c26bfd | 543 | if (xfs_has_crc(mp)) |
b16817b6 | 544 | uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid); |
38c26bfd | 545 | if (xfs_has_reflink(mp)) { |
b16817b6 DC |
546 | agf->agf_refcount_root = cpu_to_be32( |
547 | xfs_refc_block(mp)); | |
548 | agf->agf_refcount_level = cpu_to_be32(1); | |
549 | agf->agf_refcount_blocks = cpu_to_be32(1); | |
550 | } | |
f327a007 DW |
551 | |
552 | if (is_log_ag(mp, id)) { | |
553 | int64_t logblocks = mp->m_sb.sb_logblocks; | |
554 | ||
555 | be32_add_cpu(&agf->agf_freeblks, -logblocks); | |
556 | agf->agf_longest = cpu_to_be32(id->agsize - | |
557 | XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart) - logblocks); | |
558 | } | |
b16817b6 DC |
559 | } |
560 | ||
561 | static void | |
562 | xfs_agflblock_init( | |
563 | struct xfs_mount *mp, | |
564 | struct xfs_buf *bp, | |
565 | struct aghdr_init_data *id) | |
566 | { | |
567 | struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp); | |
568 | __be32 *agfl_bno; | |
569 | int bucket; | |
570 | ||
38c26bfd | 571 | if (xfs_has_crc(mp)) { |
b16817b6 DC |
572 | agfl->agfl_magicnum = cpu_to_be32(XFS_AGFL_MAGIC); |
573 | agfl->agfl_seqno = cpu_to_be32(id->agno); | |
574 | uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid); | |
575 | } | |
576 | ||
183606d8 | 577 | agfl_bno = xfs_buf_to_agfl_bno(bp); |
b16817b6 DC |
578 | for (bucket = 0; bucket < xfs_agfl_size(mp); bucket++) |
579 | agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK); | |
580 | } | |
581 | ||
582 | static void | |
583 | xfs_agiblock_init( | |
584 | struct xfs_mount *mp, | |
585 | struct xfs_buf *bp, | |
586 | struct aghdr_init_data *id) | |
587 | { | |
370c782b | 588 | struct xfs_agi *agi = bp->b_addr; |
b16817b6 DC |
589 | int bucket; |
590 | ||
591 | agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC); | |
592 | agi->agi_versionnum = cpu_to_be32(XFS_AGI_VERSION); | |
593 | agi->agi_seqno = cpu_to_be32(id->agno); | |
594 | agi->agi_length = cpu_to_be32(id->agsize); | |
595 | agi->agi_count = 0; | |
596 | agi->agi_root = cpu_to_be32(XFS_IBT_BLOCK(mp)); | |
597 | agi->agi_level = cpu_to_be32(1); | |
598 | agi->agi_freecount = 0; | |
599 | agi->agi_newino = cpu_to_be32(NULLAGINO); | |
600 | agi->agi_dirino = cpu_to_be32(NULLAGINO); | |
38c26bfd | 601 | if (xfs_has_crc(mp)) |
b16817b6 | 602 | uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid); |
38c26bfd | 603 | if (xfs_has_finobt(mp)) { |
b16817b6 DC |
604 | agi->agi_free_root = cpu_to_be32(XFS_FIBT_BLOCK(mp)); |
605 | agi->agi_free_level = cpu_to_be32(1); | |
606 | } | |
607 | for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) | |
608 | agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); | |
ebd9027d | 609 | if (xfs_has_inobtcounts(mp)) { |
2a39946c | 610 | agi->agi_iblocks = cpu_to_be32(1); |
ebd9027d | 611 | if (xfs_has_finobt(mp)) |
2a39946c DW |
612 | agi->agi_fblocks = cpu_to_be32(1); |
613 | } | |
b16817b6 DC |
614 | } |
615 | ||
616 | typedef void (*aghdr_init_work_f)(struct xfs_mount *mp, struct xfs_buf *bp, | |
617 | struct aghdr_init_data *id); | |
618 | static int | |
619 | xfs_ag_init_hdr( | |
620 | struct xfs_mount *mp, | |
621 | struct aghdr_init_data *id, | |
622 | aghdr_init_work_f work, | |
623 | const struct xfs_buf_ops *ops) | |
b16817b6 DC |
624 | { |
625 | struct xfs_buf *bp; | |
2842b6db | 626 | int error; |
b16817b6 | 627 | |
2842b6db DW |
628 | error = xfs_get_aghdr_buf(mp, id->daddr, id->numblks, &bp, ops); |
629 | if (error) | |
630 | return error; | |
b16817b6 DC |
631 | |
632 | (*work)(mp, bp, id); | |
633 | ||
634 | xfs_buf_delwri_queue(bp, &id->buffer_list); | |
635 | xfs_buf_relse(bp); | |
636 | return 0; | |
637 | } | |
638 | ||
639 | struct xfs_aghdr_grow_data { | |
640 | xfs_daddr_t daddr; | |
641 | size_t numblks; | |
642 | const struct xfs_buf_ops *ops; | |
643 | aghdr_init_work_f work; | |
644 | xfs_btnum_t type; | |
645 | bool need_init; | |
646 | }; | |
647 | ||
648 | /* | |
649 | * Prepare new AG headers to be written to disk. We use uncached buffers here, | |
650 | * as it is assumed these new AG headers are currently beyond the currently | |
651 | * valid filesystem address space. Using cached buffers would trip over EOFS | |
652 | * corruption detection alogrithms in the buffer cache lookup routines. | |
653 | * | |
654 | * This is a non-transactional function, but the prepared buffers are added to a | |
655 | * delayed write buffer list supplied by the caller so they can submit them to | |
656 | * disk and wait on them as required. | |
657 | */ | |
658 | int | |
659 | xfs_ag_init_headers( | |
660 | struct xfs_mount *mp, | |
661 | struct aghdr_init_data *id) | |
662 | ||
663 | { | |
664 | struct xfs_aghdr_grow_data aghdr_data[] = { | |
665 | { /* SB */ | |
666 | .daddr = XFS_AG_DADDR(mp, id->agno, XFS_SB_DADDR), | |
667 | .numblks = XFS_FSS_TO_BB(mp, 1), | |
668 | .ops = &xfs_sb_buf_ops, | |
669 | .work = &xfs_sbblock_init, | |
670 | .need_init = true | |
671 | }, | |
672 | { /* AGF */ | |
673 | .daddr = XFS_AG_DADDR(mp, id->agno, XFS_AGF_DADDR(mp)), | |
674 | .numblks = XFS_FSS_TO_BB(mp, 1), | |
675 | .ops = &xfs_agf_buf_ops, | |
676 | .work = &xfs_agfblock_init, | |
677 | .need_init = true | |
678 | }, | |
679 | { /* AGFL */ | |
680 | .daddr = XFS_AG_DADDR(mp, id->agno, XFS_AGFL_DADDR(mp)), | |
681 | .numblks = XFS_FSS_TO_BB(mp, 1), | |
682 | .ops = &xfs_agfl_buf_ops, | |
683 | .work = &xfs_agflblock_init, | |
684 | .need_init = true | |
685 | }, | |
686 | { /* AGI */ | |
687 | .daddr = XFS_AG_DADDR(mp, id->agno, XFS_AGI_DADDR(mp)), | |
688 | .numblks = XFS_FSS_TO_BB(mp, 1), | |
689 | .ops = &xfs_agi_buf_ops, | |
690 | .work = &xfs_agiblock_init, | |
691 | .need_init = true | |
692 | }, | |
693 | { /* BNO root block */ | |
694 | .daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_BNO_BLOCK(mp)), | |
695 | .numblks = BTOBB(mp->m_sb.sb_blocksize), | |
27df4f50 | 696 | .ops = &xfs_bnobt_buf_ops, |
b16817b6 DC |
697 | .work = &xfs_bnoroot_init, |
698 | .need_init = true | |
699 | }, | |
700 | { /* CNT root block */ | |
701 | .daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_CNT_BLOCK(mp)), | |
702 | .numblks = BTOBB(mp->m_sb.sb_blocksize), | |
27df4f50 | 703 | .ops = &xfs_cntbt_buf_ops, |
b16817b6 DC |
704 | .work = &xfs_cntroot_init, |
705 | .need_init = true | |
706 | }, | |
707 | { /* INO root block */ | |
708 | .daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_IBT_BLOCK(mp)), | |
709 | .numblks = BTOBB(mp->m_sb.sb_blocksize), | |
710 | .ops = &xfs_inobt_buf_ops, | |
711 | .work = &xfs_btroot_init, | |
712 | .type = XFS_BTNUM_INO, | |
713 | .need_init = true | |
714 | }, | |
715 | { /* FINO root block */ | |
716 | .daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_FIBT_BLOCK(mp)), | |
717 | .numblks = BTOBB(mp->m_sb.sb_blocksize), | |
01e68f40 | 718 | .ops = &xfs_finobt_buf_ops, |
b16817b6 DC |
719 | .work = &xfs_btroot_init, |
720 | .type = XFS_BTNUM_FINO, | |
38c26bfd | 721 | .need_init = xfs_has_finobt(mp) |
b16817b6 DC |
722 | }, |
723 | { /* RMAP root block */ | |
724 | .daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_RMAP_BLOCK(mp)), | |
725 | .numblks = BTOBB(mp->m_sb.sb_blocksize), | |
726 | .ops = &xfs_rmapbt_buf_ops, | |
727 | .work = &xfs_rmaproot_init, | |
38c26bfd | 728 | .need_init = xfs_has_rmapbt(mp) |
b16817b6 DC |
729 | }, |
730 | { /* REFC root block */ | |
731 | .daddr = XFS_AGB_TO_DADDR(mp, id->agno, xfs_refc_block(mp)), | |
732 | .numblks = BTOBB(mp->m_sb.sb_blocksize), | |
733 | .ops = &xfs_refcountbt_buf_ops, | |
734 | .work = &xfs_btroot_init, | |
735 | .type = XFS_BTNUM_REFC, | |
38c26bfd | 736 | .need_init = xfs_has_reflink(mp) |
b16817b6 DC |
737 | }, |
738 | { /* NULL terminating block */ | |
739 | .daddr = XFS_BUF_DADDR_NULL, | |
740 | } | |
741 | }; | |
742 | struct xfs_aghdr_grow_data *dp; | |
743 | int error = 0; | |
744 | ||
745 | /* Account for AG free space in new AG */ | |
746 | id->nfree += id->agsize - mp->m_ag_prealloc_blocks; | |
747 | for (dp = &aghdr_data[0]; dp->daddr != XFS_BUF_DADDR_NULL; dp++) { | |
748 | if (!dp->need_init) | |
749 | continue; | |
750 | ||
751 | id->daddr = dp->daddr; | |
752 | id->numblks = dp->numblks; | |
753 | id->type = dp->type; | |
754 | error = xfs_ag_init_hdr(mp, id, dp->work, dp->ops); | |
755 | if (error) | |
756 | break; | |
757 | } | |
758 | return error; | |
759 | } | |
49dd56f2 | 760 | |
46141dc8 GX |
761 | int |
762 | xfs_ag_shrink_space( | |
763 | struct xfs_mount *mp, | |
764 | struct xfs_trans **tpp, | |
765 | xfs_agnumber_t agno, | |
766 | xfs_extlen_t delta) | |
767 | { | |
768 | struct xfs_alloc_arg args = { | |
769 | .tp = *tpp, | |
770 | .mp = mp, | |
771 | .type = XFS_ALLOCTYPE_THIS_BNO, | |
772 | .minlen = delta, | |
773 | .maxlen = delta, | |
774 | .oinfo = XFS_RMAP_OINFO_SKIP_UPDATE, | |
775 | .resv = XFS_AG_RESV_NONE, | |
776 | .prod = 1 | |
777 | }; | |
778 | struct xfs_buf *agibp, *agfbp; | |
779 | struct xfs_agi *agi; | |
780 | struct xfs_agf *agf; | |
a8f3522c | 781 | xfs_agblock_t aglen; |
46141dc8 GX |
782 | int error, err2; |
783 | ||
784 | ASSERT(agno == mp->m_sb.sb_agcount - 1); | |
785 | error = xfs_ialloc_read_agi(mp, *tpp, agno, &agibp); | |
786 | if (error) | |
787 | return error; | |
788 | ||
789 | agi = agibp->b_addr; | |
790 | ||
791 | error = xfs_alloc_read_agf(mp, *tpp, agno, 0, &agfbp); | |
792 | if (error) | |
793 | return error; | |
794 | ||
795 | agf = agfbp->b_addr; | |
a8f3522c | 796 | aglen = be32_to_cpu(agi->agi_length); |
46141dc8 GX |
797 | /* some extra paranoid checks before we shrink the ag */ |
798 | if (XFS_IS_CORRUPT(mp, agf->agf_length != agi->agi_length)) | |
799 | return -EFSCORRUPTED; | |
a8f3522c | 800 | if (delta >= aglen) |
46141dc8 GX |
801 | return -EINVAL; |
802 | ||
a8f3522c | 803 | args.fsbno = XFS_AGB_TO_FSB(mp, agno, aglen - delta); |
46141dc8 | 804 | |
da062d16 DW |
805 | /* |
806 | * Make sure that the last inode cluster cannot overlap with the new | |
807 | * end of the AG, even if it's sparse. | |
808 | */ | |
809 | error = xfs_ialloc_check_shrink(*tpp, agno, agibp, aglen - delta); | |
810 | if (error) | |
811 | return error; | |
812 | ||
46141dc8 GX |
813 | /* |
814 | * Disable perag reservations so it doesn't cause the allocation request | |
815 | * to fail. We'll reestablish reservation before we return. | |
816 | */ | |
817 | error = xfs_ag_resv_free(agibp->b_pag); | |
818 | if (error) | |
819 | return error; | |
820 | ||
821 | /* internal log shouldn't also show up in the free space btrees */ | |
822 | error = xfs_alloc_vextent(&args); | |
823 | if (!error && args.agbno == NULLAGBLOCK) | |
824 | error = -ENOSPC; | |
825 | ||
826 | if (error) { | |
827 | /* | |
828 | * if extent allocation fails, need to roll the transaction to | |
829 | * ensure that the AGFL fixup has been committed anyway. | |
830 | */ | |
831 | xfs_trans_bhold(*tpp, agfbp); | |
832 | err2 = xfs_trans_roll(tpp); | |
833 | if (err2) | |
834 | return err2; | |
835 | xfs_trans_bjoin(*tpp, agfbp); | |
836 | goto resv_init_out; | |
837 | } | |
838 | ||
839 | /* | |
840 | * if successfully deleted from freespace btrees, need to confirm | |
841 | * per-AG reservation works as expected. | |
842 | */ | |
843 | be32_add_cpu(&agi->agi_length, -delta); | |
844 | be32_add_cpu(&agf->agf_length, -delta); | |
845 | ||
846 | err2 = xfs_ag_resv_init(agibp->b_pag, *tpp); | |
847 | if (err2) { | |
848 | be32_add_cpu(&agi->agi_length, delta); | |
849 | be32_add_cpu(&agf->agf_length, delta); | |
850 | if (err2 != -ENOSPC) | |
851 | goto resv_err; | |
852 | ||
853 | __xfs_bmap_add_free(*tpp, args.fsbno, delta, NULL, true); | |
854 | ||
855 | /* | |
856 | * Roll the transaction before trying to re-init the per-ag | |
857 | * reservation. The new transaction is clean so it will cancel | |
858 | * without any side effects. | |
859 | */ | |
860 | error = xfs_defer_finish(tpp); | |
861 | if (error) | |
862 | return error; | |
863 | ||
864 | error = -ENOSPC; | |
865 | goto resv_init_out; | |
866 | } | |
867 | xfs_ialloc_log_agi(*tpp, agibp, XFS_AGI_LENGTH); | |
868 | xfs_alloc_log_agf(*tpp, agfbp, XFS_AGF_LENGTH); | |
869 | return 0; | |
870 | resv_init_out: | |
871 | err2 = xfs_ag_resv_init(agibp->b_pag, *tpp); | |
872 | if (!err2) | |
873 | return error; | |
874 | resv_err: | |
875 | xfs_warn(mp, "Error %d reserving per-AG metadata reserve pool.", err2); | |
876 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | |
877 | return err2; | |
878 | } | |
879 | ||
49dd56f2 DC |
880 | /* |
881 | * Extent the AG indicated by the @id by the length passed in | |
882 | */ | |
883 | int | |
884 | xfs_ag_extend_space( | |
885 | struct xfs_mount *mp, | |
886 | struct xfs_trans *tp, | |
887 | struct aghdr_init_data *id, | |
888 | xfs_extlen_t len) | |
889 | { | |
49dd56f2 DC |
890 | struct xfs_buf *bp; |
891 | struct xfs_agi *agi; | |
892 | struct xfs_agf *agf; | |
893 | int error; | |
894 | ||
895 | /* | |
896 | * Change the agi length. | |
897 | */ | |
898 | error = xfs_ialloc_read_agi(mp, tp, id->agno, &bp); | |
899 | if (error) | |
900 | return error; | |
901 | ||
370c782b | 902 | agi = bp->b_addr; |
49dd56f2 DC |
903 | be32_add_cpu(&agi->agi_length, len); |
904 | ASSERT(id->agno == mp->m_sb.sb_agcount - 1 || | |
905 | be32_to_cpu(agi->agi_length) == mp->m_sb.sb_agblocks); | |
906 | xfs_ialloc_log_agi(tp, bp, XFS_AGI_LENGTH); | |
907 | ||
908 | /* | |
909 | * Change agf length. | |
910 | */ | |
911 | error = xfs_alloc_read_agf(mp, tp, id->agno, 0, &bp); | |
912 | if (error) | |
913 | return error; | |
914 | ||
9798f615 | 915 | agf = bp->b_addr; |
49dd56f2 DC |
916 | be32_add_cpu(&agf->agf_length, len); |
917 | ASSERT(agf->agf_length == agi->agi_length); | |
918 | xfs_alloc_log_agf(tp, bp, XFS_AGF_LENGTH); | |
919 | ||
920 | /* | |
921 | * Free the new space. | |
922 | * | |
7280feda | 923 | * XFS_RMAP_OINFO_SKIP_UPDATE is used here to tell the rmap btree that |
49dd56f2 DC |
924 | * this doesn't actually exist in the rmap btree. |
925 | */ | |
fa9c3c19 | 926 | error = xfs_rmap_free(tp, bp, bp->b_pag, |
49dd56f2 | 927 | be32_to_cpu(agf->agf_length) - len, |
7280feda | 928 | len, &XFS_RMAP_OINFO_SKIP_UPDATE); |
49dd56f2 DC |
929 | if (error) |
930 | return error; | |
931 | ||
932 | return xfs_free_extent(tp, XFS_AGB_TO_FSB(mp, id->agno, | |
933 | be32_to_cpu(agf->agf_length) - len), | |
7280feda DW |
934 | len, &XFS_RMAP_OINFO_SKIP_UPDATE, |
935 | XFS_AG_RESV_NONE); | |
49dd56f2 | 936 | } |
7cd5006b DW |
937 | |
938 | /* Retrieve AG geometry. */ | |
939 | int | |
940 | xfs_ag_get_geometry( | |
941 | struct xfs_mount *mp, | |
942 | xfs_agnumber_t agno, | |
943 | struct xfs_ag_geometry *ageo) | |
944 | { | |
945 | struct xfs_buf *agi_bp; | |
946 | struct xfs_buf *agf_bp; | |
947 | struct xfs_agi *agi; | |
948 | struct xfs_agf *agf; | |
949 | struct xfs_perag *pag; | |
950 | unsigned int freeblks; | |
951 | int error; | |
952 | ||
953 | if (agno >= mp->m_sb.sb_agcount) | |
954 | return -EINVAL; | |
955 | ||
956 | /* Lock the AG headers. */ | |
957 | error = xfs_ialloc_read_agi(mp, NULL, agno, &agi_bp); | |
958 | if (error) | |
959 | return error; | |
960 | error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agf_bp); | |
961 | if (error) | |
962 | goto out_agi; | |
92a00544 GX |
963 | |
964 | pag = agi_bp->b_pag; | |
7cd5006b DW |
965 | |
966 | /* Fill out form. */ | |
967 | memset(ageo, 0, sizeof(*ageo)); | |
968 | ageo->ag_number = agno; | |
969 | ||
370c782b | 970 | agi = agi_bp->b_addr; |
7cd5006b DW |
971 | ageo->ag_icount = be32_to_cpu(agi->agi_count); |
972 | ageo->ag_ifree = be32_to_cpu(agi->agi_freecount); | |
973 | ||
9798f615 | 974 | agf = agf_bp->b_addr; |
7cd5006b DW |
975 | ageo->ag_length = be32_to_cpu(agf->agf_length); |
976 | freeblks = pag->pagf_freeblks + | |
977 | pag->pagf_flcount + | |
978 | pag->pagf_btreeblks - | |
979 | xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE); | |
980 | ageo->ag_freeblks = freeblks; | |
1302c6a2 | 981 | xfs_ag_geom_health(pag, ageo); |
7cd5006b DW |
982 | |
983 | /* Release resources. */ | |
7cd5006b DW |
984 | xfs_buf_relse(agf_bp); |
985 | out_agi: | |
986 | xfs_buf_relse(agi_bp); | |
987 | return error; | |
988 | } |