]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - fs/xfs/xfs_mount.c
xfs: rename xfs_has_attr()
[mirror_ubuntu-jammy-kernel.git] / fs / xfs / xfs_mount.c
CommitLineData
0b61f8a4 1// SPDX-License-Identifier: GPL-2.0
1da177e4 2/*
7b718769
NS
3 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
4 * All Rights Reserved.
1da177e4 5 */
1da177e4 6#include "xfs.h"
a844f451 7#include "xfs_fs.h"
70a9883c 8#include "xfs_shared.h"
239880ef
DC
9#include "xfs_format.h"
10#include "xfs_log_format.h"
11#include "xfs_trans_resv.h"
a844f451 12#include "xfs_bit.h"
1da177e4 13#include "xfs_sb.h"
1da177e4 14#include "xfs_mount.h"
1da177e4 15#include "xfs_inode.h"
a4fbe6ab 16#include "xfs_dir2.h"
a844f451 17#include "xfs_ialloc.h"
1da177e4
LT
18#include "xfs_alloc.h"
19#include "xfs_rtalloc.h"
20#include "xfs_bmap.h"
a4fbe6ab
DC
21#include "xfs_trans.h"
22#include "xfs_trans_priv.h"
23#include "xfs_log.h"
1da177e4 24#include "xfs_error.h"
1da177e4
LT
25#include "xfs_quota.h"
26#include "xfs_fsops.h"
6d8b79cf 27#include "xfs_icache.h"
a31b1d3d 28#include "xfs_sysfs.h"
035e00ac 29#include "xfs_rmap_btree.h"
1946b91c 30#include "xfs_refcount_btree.h"
174edb0e 31#include "xfs_reflink.h"
ebf55872 32#include "xfs_extent_busy.h"
39353ff6 33#include "xfs_health.h"
13eaec4b 34#include "xfs_trace.h"
9bbafc71 35#include "xfs_ag.h"
1da177e4 36
27174203
CH
37static DEFINE_MUTEX(xfs_uuid_table_mutex);
38static int xfs_uuid_table_size;
39static uuid_t *xfs_uuid_table;
40
af3b6382
DW
41void
42xfs_uuid_table_free(void)
43{
44 if (xfs_uuid_table_size == 0)
45 return;
46 kmem_free(xfs_uuid_table);
47 xfs_uuid_table = NULL;
48 xfs_uuid_table_size = 0;
49}
50
27174203
CH
51/*
52 * See if the UUID is unique among mounted XFS filesystems.
53 * Mount fails if UUID is nil or a FS with the same UUID is already mounted.
54 */
55STATIC int
56xfs_uuid_mount(
57 struct xfs_mount *mp)
58{
59 uuid_t *uuid = &mp->m_sb.sb_uuid;
60 int hole, i;
61
8f720d9f 62 /* Publish UUID in struct super_block */
85787090 63 uuid_copy(&mp->m_super->s_uuid, uuid);
8f720d9f 64
27174203
CH
65 if (mp->m_flags & XFS_MOUNT_NOUUID)
66 return 0;
67
d905fdaa
AG
68 if (uuid_is_null(uuid)) {
69 xfs_warn(mp, "Filesystem has null UUID - can't mount");
2451337d 70 return -EINVAL;
27174203
CH
71 }
72
73 mutex_lock(&xfs_uuid_table_mutex);
74 for (i = 0, hole = -1; i < xfs_uuid_table_size; i++) {
d905fdaa 75 if (uuid_is_null(&xfs_uuid_table[i])) {
27174203
CH
76 hole = i;
77 continue;
78 }
79 if (uuid_equal(uuid, &xfs_uuid_table[i]))
80 goto out_duplicate;
81 }
82
83 if (hole < 0) {
771915c4 84 xfs_uuid_table = krealloc(xfs_uuid_table,
27174203 85 (xfs_uuid_table_size + 1) * sizeof(*xfs_uuid_table),
771915c4 86 GFP_KERNEL | __GFP_NOFAIL);
27174203
CH
87 hole = xfs_uuid_table_size++;
88 }
89 xfs_uuid_table[hole] = *uuid;
90 mutex_unlock(&xfs_uuid_table_mutex);
91
92 return 0;
93
94 out_duplicate:
95 mutex_unlock(&xfs_uuid_table_mutex);
021000e5 96 xfs_warn(mp, "Filesystem has duplicate UUID %pU - can't mount", uuid);
2451337d 97 return -EINVAL;
27174203
CH
98}
99
100STATIC void
101xfs_uuid_unmount(
102 struct xfs_mount *mp)
103{
104 uuid_t *uuid = &mp->m_sb.sb_uuid;
105 int i;
106
107 if (mp->m_flags & XFS_MOUNT_NOUUID)
108 return;
109
110 mutex_lock(&xfs_uuid_table_mutex);
111 for (i = 0; i < xfs_uuid_table_size; i++) {
d905fdaa 112 if (uuid_is_null(&xfs_uuid_table[i]))
27174203
CH
113 continue;
114 if (!uuid_equal(uuid, &xfs_uuid_table[i]))
115 continue;
116 memset(&xfs_uuid_table[i], 0, sizeof(uuid_t));
117 break;
118 }
119 ASSERT(i < xfs_uuid_table_size);
120 mutex_unlock(&xfs_uuid_table_mutex);
121}
122
4cc929ee
NS
123/*
124 * Check size of device based on the (data/realtime) block count.
125 * Note: this check is used by the growfs code as well as mount.
126 */
127int
128xfs_sb_validate_fsb_count(
129 xfs_sb_t *sbp,
c8ce540d 130 uint64_t nblocks)
4cc929ee
NS
131{
132 ASSERT(PAGE_SHIFT >= sbp->sb_blocklog);
133 ASSERT(sbp->sb_blocklog >= BBSHIFT);
134
d5cf09ba 135 /* Limited by ULONG_MAX of page cache index */
09cbfeaf 136 if (nblocks >> (PAGE_SHIFT - sbp->sb_blocklog) > ULONG_MAX)
2451337d 137 return -EFBIG;
4cc929ee
NS
138 return 0;
139}
1da177e4 140
1da177e4
LT
141/*
142 * xfs_readsb
143 *
144 * Does the initial read of the superblock.
145 */
146int
ff55068c
DC
147xfs_readsb(
148 struct xfs_mount *mp,
149 int flags)
1da177e4
LT
150{
151 unsigned int sector_size;
04a1e6c5
DC
152 struct xfs_buf *bp;
153 struct xfs_sb *sbp = &mp->m_sb;
1da177e4 154 int error;
af34e09d 155 int loud = !(flags & XFS_MFSI_QUIET);
daba5427 156 const struct xfs_buf_ops *buf_ops;
1da177e4
LT
157
158 ASSERT(mp->m_sb_bp == NULL);
159 ASSERT(mp->m_ddev_targp != NULL);
160
daba5427
ES
161 /*
162 * For the initial read, we must guess at the sector
163 * size based on the block device. It's enough to
164 * get the sb_sectsize out of the superblock and
165 * then reread with the proper length.
166 * We don't verify it yet, because it may not be complete.
167 */
168 sector_size = xfs_getsize_buftarg(mp->m_ddev_targp);
169 buf_ops = NULL;
170
1da177e4 171 /*
c891c30a
BF
172 * Allocate a (locked) buffer to hold the superblock. This will be kept
173 * around at all times to optimize access to the superblock. Therefore,
174 * set XBF_NO_IOACCT to make sure it doesn't hold the buftarg count
175 * elevated.
1da177e4 176 */
26af6552 177reread:
ba372674 178 error = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR,
c891c30a
BF
179 BTOBB(sector_size), XBF_NO_IOACCT, &bp,
180 buf_ops);
ba372674 181 if (error) {
eab4e633 182 if (loud)
e721f504 183 xfs_warn(mp, "SB validate failed with error %d.", error);
ac75a1f7 184 /* bad CRC means corrupted metadata */
2451337d
DC
185 if (error == -EFSBADCRC)
186 error = -EFSCORRUPTED;
ba372674 187 return error;
eab4e633 188 }
1da177e4
LT
189
190 /*
191 * Initialize the mount structure from the superblock.
1da177e4 192 */
3e6e8afd 193 xfs_sb_from_disk(sbp, bp->b_addr);
556b8883
DC
194
195 /*
196 * If we haven't validated the superblock, do so now before we try
197 * to check the sector size and reread the superblock appropriately.
198 */
199 if (sbp->sb_magicnum != XFS_SB_MAGIC) {
200 if (loud)
201 xfs_warn(mp, "Invalid superblock magic number");
2451337d 202 error = -EINVAL;
556b8883
DC
203 goto release_buf;
204 }
ff55068c 205
1da177e4
LT
206 /*
207 * We must be able to do sector-sized and sector-aligned IO.
208 */
04a1e6c5 209 if (sector_size > sbp->sb_sectsize) {
af34e09d
DC
210 if (loud)
211 xfs_warn(mp, "device supports %u byte sectors (not %u)",
04a1e6c5 212 sector_size, sbp->sb_sectsize);
2451337d 213 error = -ENOSYS;
26af6552 214 goto release_buf;
1da177e4
LT
215 }
216
daba5427 217 if (buf_ops == NULL) {
556b8883
DC
218 /*
219 * Re-read the superblock so the buffer is correctly sized,
220 * and properly verified.
221 */
1da177e4 222 xfs_buf_relse(bp);
04a1e6c5 223 sector_size = sbp->sb_sectsize;
daba5427 224 buf_ops = loud ? &xfs_sb_buf_ops : &xfs_sb_quiet_buf_ops;
26af6552 225 goto reread;
1da177e4
LT
226 }
227
5681ca40 228 xfs_reinit_percpu_counters(mp);
8d280b98 229
04a1e6c5
DC
230 /* no need to be quiet anymore, so reset the buf ops */
231 bp->b_ops = &xfs_sb_buf_ops;
232
1da177e4 233 mp->m_sb_bp = bp;
26af6552 234 xfs_buf_unlock(bp);
1da177e4
LT
235 return 0;
236
26af6552
DC
237release_buf:
238 xfs_buf_relse(bp);
1da177e4
LT
239 return error;
240}
241
13eaec4b
DW
242/*
243 * If the sunit/swidth change would move the precomputed root inode value, we
244 * must reject the ondisk change because repair will stumble over that.
245 * However, we allow the mount to proceed because we never rejected this
246 * combination before. Returns true to update the sb, false otherwise.
247 */
248static inline int
249xfs_check_new_dalign(
250 struct xfs_mount *mp,
251 int new_dalign,
252 bool *update_sb)
253{
254 struct xfs_sb *sbp = &mp->m_sb;
255 xfs_ino_t calc_ino;
256
257 calc_ino = xfs_ialloc_calc_rootino(mp, new_dalign);
258 trace_xfs_check_new_dalign(mp, new_dalign, calc_ino);
259
260 if (sbp->sb_rootino == calc_ino) {
261 *update_sb = true;
262 return 0;
263 }
264
265 xfs_warn(mp,
266"Cannot change stripe alignment; would require moving root inode.");
267
268 /*
269 * XXX: Next time we add a new incompat feature, this should start
270 * returning -EINVAL to fail the mount. Until then, spit out a warning
271 * that we're ignoring the administrator's instructions.
272 */
273 xfs_warn(mp, "Skipping superblock stripe alignment update.");
274 *update_sb = false;
275 return 0;
276}
277
1da177e4 278/*
4f5b1b3a
DW
279 * If we were provided with new sunit/swidth values as mount options, make sure
280 * that they pass basic alignment and superblock feature checks, and convert
281 * them into the same units (FSB) that everything else expects. This step
282 * /must/ be done before computing the inode geometry.
1da177e4 283 */
0771fb45 284STATIC int
4f5b1b3a
DW
285xfs_validate_new_dalign(
286 struct xfs_mount *mp)
1da177e4 287{
4f5b1b3a
DW
288 if (mp->m_dalign == 0)
289 return 0;
1da177e4 290
4f5b1b3a
DW
291 /*
292 * If stripe unit and stripe width are not multiples
293 * of the fs blocksize turn off alignment.
294 */
295 if ((BBTOB(mp->m_dalign) & mp->m_blockmask) ||
296 (BBTOB(mp->m_swidth) & mp->m_blockmask)) {
297 xfs_warn(mp,
298 "alignment check failed: sunit/swidth vs. blocksize(%d)",
299 mp->m_sb.sb_blocksize);
300 return -EINVAL;
301 } else {
1da177e4 302 /*
4f5b1b3a 303 * Convert the stripe unit and width to FSBs.
1da177e4 304 */
4f5b1b3a
DW
305 mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign);
306 if (mp->m_dalign && (mp->m_sb.sb_agblocks % mp->m_dalign)) {
39a45d84 307 xfs_warn(mp,
4f5b1b3a
DW
308 "alignment check failed: sunit/swidth vs. agsize(%d)",
309 mp->m_sb.sb_agblocks);
2451337d 310 return -EINVAL;
4f5b1b3a
DW
311 } else if (mp->m_dalign) {
312 mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
34d7f603
JL
313 } else {
314 xfs_warn(mp,
4f5b1b3a
DW
315 "alignment check failed: sunit(%d) less than bsize(%d)",
316 mp->m_dalign, mp->m_sb.sb_blocksize);
2451337d 317 return -EINVAL;
1da177e4 318 }
4f5b1b3a
DW
319 }
320
321 if (!xfs_sb_version_hasdalign(&mp->m_sb)) {
322 xfs_warn(mp,
323"cannot change alignment: superblock does not support data alignment");
324 return -EINVAL;
325 }
326
327 return 0;
328}
329
330/* Update alignment values based on mount options and sb values. */
331STATIC int
332xfs_update_alignment(
333 struct xfs_mount *mp)
334{
335 struct xfs_sb *sbp = &mp->m_sb;
336
337 if (mp->m_dalign) {
13eaec4b
DW
338 bool update_sb;
339 int error;
340
4f5b1b3a
DW
341 if (sbp->sb_unit == mp->m_dalign &&
342 sbp->sb_width == mp->m_swidth)
343 return 0;
344
13eaec4b
DW
345 error = xfs_check_new_dalign(mp, mp->m_dalign, &update_sb);
346 if (error || !update_sb)
347 return error;
348
4f5b1b3a
DW
349 sbp->sb_unit = mp->m_dalign;
350 sbp->sb_width = mp->m_swidth;
351 mp->m_update_sb = true;
1da177e4 352 } else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN &&
62118709 353 xfs_sb_version_hasdalign(&mp->m_sb)) {
4f5b1b3a
DW
354 mp->m_dalign = sbp->sb_unit;
355 mp->m_swidth = sbp->sb_width;
1da177e4
LT
356 }
357
0771fb45
ES
358 return 0;
359}
1da177e4 360
055388a3
DC
361/*
362 * precalculate the low space thresholds for dynamic speculative preallocation.
363 */
364void
365xfs_set_low_space_thresholds(
366 struct xfs_mount *mp)
367{
65f03d86
DW
368 uint64_t dblocks = mp->m_sb.sb_dblocks;
369 uint64_t rtexts = mp->m_sb.sb_rextents;
370 int i;
055388a3 371
65f03d86
DW
372 do_div(dblocks, 100);
373 do_div(rtexts, 100);
055388a3 374
65f03d86
DW
375 for (i = 0; i < XFS_LOWSP_MAX; i++) {
376 mp->m_low_space[i] = dblocks * (i + 1);
377 mp->m_low_rtexts[i] = rtexts * (i + 1);
055388a3
DC
378 }
379}
380
0771fb45 381/*
0471f62e 382 * Check that the data (and log if separate) is an ok size.
0771fb45
ES
383 */
384STATIC int
ba372674
DC
385xfs_check_sizes(
386 struct xfs_mount *mp)
0771fb45 387{
ba372674 388 struct xfs_buf *bp;
0771fb45 389 xfs_daddr_t d;
ba372674 390 int error;
0771fb45 391
1da177e4
LT
392 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
393 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) {
0b932ccc 394 xfs_warn(mp, "filesystem size mismatch detected");
2451337d 395 return -EFBIG;
1da177e4 396 }
ba372674 397 error = xfs_buf_read_uncached(mp->m_ddev_targp,
1922c949 398 d - XFS_FSS_TO_BB(mp, 1),
ba372674
DC
399 XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL);
400 if (error) {
0b932ccc 401 xfs_warn(mp, "last sector read failed");
ba372674 402 return error;
1da177e4 403 }
1922c949 404 xfs_buf_relse(bp);
1da177e4 405
ba372674
DC
406 if (mp->m_logdev_targp == mp->m_ddev_targp)
407 return 0;
408
409 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
410 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
411 xfs_warn(mp, "log size mismatch detected");
412 return -EFBIG;
413 }
414 error = xfs_buf_read_uncached(mp->m_logdev_targp,
1922c949 415 d - XFS_FSB_TO_BB(mp, 1),
ba372674
DC
416 XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL);
417 if (error) {
418 xfs_warn(mp, "log device read failed");
419 return error;
0771fb45 420 }
ba372674 421 xfs_buf_relse(bp);
0771fb45
ES
422 return 0;
423}
424
7d095257
CH
425/*
426 * Clear the quotaflags in memory and in the superblock.
427 */
428int
429xfs_mount_reset_sbqflags(
430 struct xfs_mount *mp)
431{
7d095257
CH
432 mp->m_qflags = 0;
433
61e63ecb 434 /* It is OK to look at sb_qflags in the mount path without m_sb_lock. */
7d095257
CH
435 if (mp->m_sb.sb_qflags == 0)
436 return 0;
437 spin_lock(&mp->m_sb_lock);
438 mp->m_sb.sb_qflags = 0;
439 spin_unlock(&mp->m_sb_lock);
440
61e63ecb 441 if (!xfs_fs_writable(mp, SB_FREEZE_WRITE))
7d095257
CH
442 return 0;
443
61e63ecb 444 return xfs_sync_sb(mp, false);
7d095257
CH
445}
446
c8ce540d 447uint64_t
d5db0f97
ES
448xfs_default_resblks(xfs_mount_t *mp)
449{
c8ce540d 450 uint64_t resblks;
d5db0f97
ES
451
452 /*
8babd8a2
DC
453 * We default to 5% or 8192 fsbs of space reserved, whichever is
454 * smaller. This is intended to cover concurrent allocation
455 * transactions when we initially hit enospc. These each require a 4
456 * block reservation. Hence by default we cover roughly 2000 concurrent
457 * allocation reservations.
d5db0f97
ES
458 */
459 resblks = mp->m_sb.sb_dblocks;
460 do_div(resblks, 20);
c8ce540d 461 resblks = min_t(uint64_t, resblks, 8192);
d5db0f97
ES
462 return resblks;
463}
464
2e9e6481
DW
465/* Ensure the summary counts are correct. */
466STATIC int
467xfs_check_summary_counts(
468 struct xfs_mount *mp)
469{
470 /*
471 * The AG0 superblock verifier rejects in-progress filesystems,
472 * so we should never see the flag set this far into mounting.
473 */
474 if (mp->m_sb.sb_inprogress) {
475 xfs_err(mp, "sb_inprogress set after log recovery??");
476 WARN_ON(1);
477 return -EFSCORRUPTED;
478 }
479
480 /*
481 * Now the log is mounted, we know if it was an unclean shutdown or
482 * not. If it was, with the first phase of recovery has completed, we
483 * have consistent AG blocks on disk. We have not recovered EFIs yet,
484 * but they are recovered transactionally in the second recovery phase
485 * later.
486 *
487 * If the log was clean when we mounted, we can check the summary
488 * counters. If any of them are obviously incorrect, we can recompute
489 * them from the AGF headers in the next step.
490 */
491 if (XFS_LAST_UNMOUNT_WAS_CLEAN(mp) &&
492 (mp->m_sb.sb_fdblocks > mp->m_sb.sb_dblocks ||
00d22a1c 493 !xfs_verify_icount(mp, mp->m_sb.sb_icount) ||
2e9e6481 494 mp->m_sb.sb_ifree > mp->m_sb.sb_icount))
39353ff6 495 xfs_fs_mark_sick(mp, XFS_SICK_FS_COUNTERS);
2e9e6481
DW
496
497 /*
498 * We can safely re-initialise incore superblock counters from the
499 * per-ag data. These may not be correct if the filesystem was not
500 * cleanly unmounted, so we waited for recovery to finish before doing
501 * this.
502 *
503 * If the filesystem was cleanly unmounted or the previous check did
504 * not flag anything weird, then we can trust the values in the
505 * superblock to be correct and we don't need to do anything here.
506 * Otherwise, recalculate the summary counters.
507 */
508 if ((!xfs_sb_version_haslazysbcount(&mp->m_sb) ||
509 XFS_LAST_UNMOUNT_WAS_CLEAN(mp)) &&
39353ff6 510 !xfs_fs_has_sickness(mp, XFS_SICK_FS_COUNTERS))
2e9e6481
DW
511 return 0;
512
513 return xfs_initialize_perag_data(mp, mp->m_sb.sb_agcount);
514}
515
d336f7eb
DW
516/*
517 * Flush and reclaim dirty inodes in preparation for unmount. Inodes and
518 * internal inode structures can be sitting in the CIL and AIL at this point,
519 * so we need to unpin them, write them back and/or reclaim them before unmount
ab23a776
DC
520 * can proceed. In other words, callers are required to have inactivated all
521 * inodes.
d336f7eb
DW
522 *
523 * An inode cluster that has been freed can have its buffer still pinned in
524 * memory because the transaction is still sitting in a iclog. The stale inodes
525 * on that buffer will be pinned to the buffer until the transaction hits the
526 * disk and the callbacks run. Pushing the AIL will skip the stale inodes and
527 * may never see the pinned buffer, so nothing will push out the iclog and
528 * unpin the buffer.
529 *
530 * Hence we need to force the log to unpin everything first. However, log
531 * forces don't wait for the discards they issue to complete, so we have to
532 * explicitly wait for them to complete here as well.
533 *
534 * Then we can tell the world we are unmounting so that error handling knows
535 * that the filesystem is going away and we should error out anything that we
536 * have been retrying in the background. This will prevent never-ending
537 * retries in AIL pushing from hanging the unmount.
538 *
539 * Finally, we can push the AIL to clean all the remaining dirty objects, then
540 * reclaim the remaining inodes that are still in memory at this point in time.
541 */
542static void
543xfs_unmount_flush_inodes(
544 struct xfs_mount *mp)
545{
546 xfs_log_force(mp, XFS_LOG_SYNC);
547 xfs_extent_busy_wait_all(mp);
548 flush_workqueue(xfs_discard_wq);
549
550 mp->m_flags |= XFS_MOUNT_UNMOUNTING;
551
552 xfs_ail_push_all_sync(mp->m_ail);
ab23a776 553 xfs_inodegc_stop(mp);
d336f7eb
DW
554 cancel_delayed_work_sync(&mp->m_reclaim_work);
555 xfs_reclaim_inodes(mp);
556 xfs_health_unmount(mp);
557}
558
b2941046
DC
559static void
560xfs_mount_setup_inode_geom(
561 struct xfs_mount *mp)
562{
563 struct xfs_ino_geometry *igeo = M_IGEO(mp);
564
565 igeo->attr_fork_offset = xfs_bmap_compute_attr_offset(mp);
566 ASSERT(igeo->attr_fork_offset < XFS_LITINO(mp));
567
568 xfs_ialloc_setup_geometry(mp);
569}
570
0771fb45 571/*
0771fb45
ES
572 * This function does the following on an initial mount of a file system:
573 * - reads the superblock from disk and init the mount struct
574 * - if we're a 32-bit kernel, do a size check on the superblock
575 * so we don't mount terabyte filesystems
576 * - init mount struct realtime fields
577 * - allocate inode hash table for fs
578 * - init directory manager
579 * - perform recovery and init the log manager
580 */
581int
582xfs_mountfs(
f0b2efad 583 struct xfs_mount *mp)
0771fb45 584{
f0b2efad
BF
585 struct xfs_sb *sbp = &(mp->m_sb);
586 struct xfs_inode *rip;
ef325959 587 struct xfs_ino_geometry *igeo = M_IGEO(mp);
c8ce540d 588 uint64_t resblks;
f0b2efad
BF
589 uint quotamount = 0;
590 uint quotaflags = 0;
591 int error = 0;
0771fb45 592
ff55068c 593 xfs_sb_mount_common(mp, sbp);
0771fb45 594
ee1c0908 595 /*
074e427b
DC
596 * Check for a mismatched features2 values. Older kernels read & wrote
597 * into the wrong sb offset for sb_features2 on some platforms due to
598 * xfs_sb_t not being 64bit size aligned when sb_features2 was added,
599 * which made older superblock reading/writing routines swap it as a
600 * 64-bit value.
ee1c0908 601 *
e6957ea4
ES
602 * For backwards compatibility, we make both slots equal.
603 *
074e427b
DC
604 * If we detect a mismatched field, we OR the set bits into the existing
605 * features2 field in case it has already been modified; we don't want
606 * to lose any features. We then update the bad location with the ORed
607 * value so that older kernels will see any features2 flags. The
608 * superblock writeback code ensures the new sb_features2 is copied to
609 * sb_bad_features2 before it is logged or written to disk.
ee1c0908 610 */
e6957ea4 611 if (xfs_sb_has_mismatched_features2(sbp)) {
0b932ccc 612 xfs_warn(mp, "correcting sb_features alignment problem");
ee1c0908 613 sbp->sb_features2 |= sbp->sb_bad_features2;
61e63ecb 614 mp->m_update_sb = true;
e6957ea4
ES
615
616 /*
617 * Re-check for ATTR2 in case it was found in bad_features2
618 * slot.
619 */
7c12f296
TS
620 if (xfs_sb_version_hasattr2(&mp->m_sb) &&
621 !(mp->m_flags & XFS_MOUNT_NOATTR2))
e6957ea4 622 mp->m_flags |= XFS_MOUNT_ATTR2;
7c12f296
TS
623 }
624
625 if (xfs_sb_version_hasattr2(&mp->m_sb) &&
626 (mp->m_flags & XFS_MOUNT_NOATTR2)) {
627 xfs_sb_version_removeattr2(&mp->m_sb);
61e63ecb 628 mp->m_update_sb = true;
e6957ea4 629
7c12f296
TS
630 /* update sb_versionnum for the clearing of the morebits */
631 if (!sbp->sb_features2)
61e63ecb 632 mp->m_update_sb = true;
ee1c0908
DC
633 }
634
263997a6
DC
635 /* always use v2 inodes by default now */
636 if (!(mp->m_sb.sb_versionnum & XFS_SB_VERSION_NLINKBIT)) {
637 mp->m_sb.sb_versionnum |= XFS_SB_VERSION_NLINKBIT;
61e63ecb 638 mp->m_update_sb = true;
263997a6
DC
639 }
640
0771fb45 641 /*
4f5b1b3a
DW
642 * If we were given new sunit/swidth options, do some basic validation
643 * checks and convert the incore dalign and swidth values to the
644 * same units (FSB) that everything else uses. This /must/ happen
645 * before computing the inode geometry.
0771fb45 646 */
4f5b1b3a 647 error = xfs_validate_new_dalign(mp);
0771fb45 648 if (error)
f9057e3d 649 goto out;
0771fb45
ES
650
651 xfs_alloc_compute_maxlevels(mp);
652 xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK);
653 xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK);
b2941046 654 xfs_mount_setup_inode_geom(mp);
035e00ac 655 xfs_rmapbt_compute_maxlevels(mp);
1946b91c 656 xfs_refcountbt_compute_maxlevels(mp);
0771fb45 657
4f5b1b3a
DW
658 /*
659 * Check if sb_agblocks is aligned at stripe boundary. If sb_agblocks
660 * is NOT aligned turn off m_dalign since allocator alignment is within
661 * an ag, therefore ag has to be aligned at stripe boundary. Note that
662 * we must compute the free space and rmap btree geometry before doing
663 * this.
664 */
665 error = xfs_update_alignment(mp);
666 if (error)
667 goto out;
668
e6b3bb78 669 /* enable fail_at_unmount as default */
749f24f3 670 mp->m_fail_unmount = true;
e6b3bb78 671
e1d3d218
IK
672 error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype,
673 NULL, mp->m_super->s_id);
27174203
CH
674 if (error)
675 goto out;
1da177e4 676
225e4635
BD
677 error = xfs_sysfs_init(&mp->m_stats.xs_kobj, &xfs_stats_ktype,
678 &mp->m_kobj, "stats");
a31b1d3d
BF
679 if (error)
680 goto out_remove_sysfs;
681
192852be 682 error = xfs_error_sysfs_init(mp);
225e4635
BD
683 if (error)
684 goto out_del_stats;
685
31965ef3
DW
686 error = xfs_errortag_init(mp);
687 if (error)
688 goto out_remove_error_sysfs;
192852be
CM
689
690 error = xfs_uuid_mount(mp);
691 if (error)
31965ef3 692 goto out_remove_errortag;
192852be 693
0771fb45 694 /*
2fcddee8
CH
695 * Update the preferred write size based on the information from the
696 * on-disk superblock.
0771fb45 697 */
2fcddee8
CH
698 mp->m_allocsize_log =
699 max_t(uint32_t, sbp->sb_blocklog, mp->m_allocsize_log);
700 mp->m_allocsize_blocks = 1U << (mp->m_allocsize_log - sbp->sb_blocklog);
0771fb45 701
055388a3
DC
702 /* set the low space thresholds for dynamic preallocation */
703 xfs_set_low_space_thresholds(mp);
704
e5376fc1
BF
705 /*
706 * If enabled, sparse inode chunk alignment is expected to match the
707 * cluster size. Full inode chunk alignment must match the chunk size,
708 * but that is checked on sb read verification...
709 */
710 if (xfs_sb_version_hassparseinodes(&mp->m_sb) &&
711 mp->m_sb.sb_spino_align !=
490d451f 712 XFS_B_TO_FSBT(mp, igeo->inode_cluster_size_raw)) {
e5376fc1
BF
713 xfs_warn(mp,
714 "Sparse inode block alignment (%u) must match cluster size (%llu).",
715 mp->m_sb.sb_spino_align,
490d451f 716 XFS_B_TO_FSBT(mp, igeo->inode_cluster_size_raw));
e5376fc1
BF
717 error = -EINVAL;
718 goto out_remove_uuid;
719 }
720
0771fb45 721 /*
c2bfbc9b 722 * Check that the data (and log if separate) is an ok size.
0771fb45 723 */
4249023a 724 error = xfs_check_sizes(mp);
0771fb45 725 if (error)
f9057e3d 726 goto out_remove_uuid;
0771fb45 727
1da177e4
LT
728 /*
729 * Initialize realtime fields in the mount structure
730 */
0771fb45
ES
731 error = xfs_rtmount_init(mp);
732 if (error) {
0b932ccc 733 xfs_warn(mp, "RT mount failed");
f9057e3d 734 goto out_remove_uuid;
1da177e4
LT
735 }
736
1da177e4
LT
737 /*
738 * Copies the low order bits of the timestamp and the randomly
739 * set "sequence" number out of a UUID.
740 */
cb0ba6cc
CH
741 mp->m_fixedfsid[0] =
742 (get_unaligned_be16(&sbp->sb_uuid.b[8]) << 16) |
743 get_unaligned_be16(&sbp->sb_uuid.b[4]);
744 mp->m_fixedfsid[1] = get_unaligned_be32(&sbp->sb_uuid.b[0]);
1da177e4 745
0650b554
DC
746 error = xfs_da_mount(mp);
747 if (error) {
748 xfs_warn(mp, "Failed dir/attr init: %d", error);
749 goto out_remove_uuid;
750 }
1da177e4
LT
751
752 /*
753 * Initialize the precomputed transaction reservations values.
754 */
755 xfs_trans_init(mp);
756
1da177e4
LT
757 /*
758 * Allocate and initialize the per-ag data.
759 */
1c1c6ebc
DC
760 error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi);
761 if (error) {
0b932ccc 762 xfs_warn(mp, "Failed per-ag init: %d", error);
0650b554 763 goto out_free_dir;
1c1c6ebc 764 }
1da177e4 765
a71895c5 766 if (XFS_IS_CORRUPT(mp, !sbp->sb_logblocks)) {
0b932ccc 767 xfs_warn(mp, "no log defined");
2451337d 768 error = -EFSCORRUPTED;
f9057e3d
CH
769 goto out_free_perag;
770 }
771
40b1de00
DW
772 error = xfs_inodegc_register_shrinker(mp);
773 if (error)
774 goto out_fail_wait;
775
1da177e4 776 /*
f0b2efad
BF
777 * Log's mount-time initialization. The first part of recovery can place
778 * some items on the AIL, to be handled when recovery is finished or
779 * cancelled.
1da177e4 780 */
f9057e3d
CH
781 error = xfs_log_mount(mp, mp->m_logdev_targp,
782 XFS_FSB_TO_DADDR(mp, sbp->sb_logstart),
783 XFS_FSB_TO_BB(mp, sbp->sb_logblocks));
784 if (error) {
0b932ccc 785 xfs_warn(mp, "log mount failed");
40b1de00 786 goto out_inodegc_shrinker;
1da177e4
LT
787 }
788
2e9e6481
DW
789 /* Make sure the summary counts are ok. */
790 error = xfs_check_summary_counts(mp);
791 if (error)
792 goto out_log_dealloc;
f9057e3d 793
ab23a776
DC
794 /* Enable background inode inactivation workers. */
795 xfs_inodegc_start(mp);
6f649091 796 xfs_blockgc_start(mp);
ab23a776 797
1da177e4
LT
798 /*
799 * Get and sanity-check the root inode.
800 * Save the pointer to it in the mount structure.
801 */
541b5acc
DC
802 error = xfs_iget(mp, NULL, sbp->sb_rootino, XFS_IGET_UNTRUSTED,
803 XFS_ILOCK_EXCL, &rip);
1da177e4 804 if (error) {
541b5acc
DC
805 xfs_warn(mp,
806 "Failed to read root inode 0x%llx, error %d",
807 sbp->sb_rootino, -error);
f9057e3d 808 goto out_log_dealloc;
1da177e4
LT
809 }
810
811 ASSERT(rip != NULL);
1da177e4 812
a71895c5 813 if (XFS_IS_CORRUPT(mp, !S_ISDIR(VFS_I(rip)->i_mode))) {
0b932ccc 814 xfs_warn(mp, "corrupted root inode %llu: not a directory",
b6574520 815 (unsigned long long)rip->i_ino);
1da177e4 816 xfs_iunlock(rip, XFS_ILOCK_EXCL);
2451337d 817 error = -EFSCORRUPTED;
f9057e3d 818 goto out_rele_rip;
1da177e4
LT
819 }
820 mp->m_rootip = rip; /* save it */
821
822 xfs_iunlock(rip, XFS_ILOCK_EXCL);
823
824 /*
825 * Initialize realtime inode pointers in the mount structure
826 */
0771fb45
ES
827 error = xfs_rtmount_inodes(mp);
828 if (error) {
1da177e4
LT
829 /*
830 * Free up the root inode.
831 */
0b932ccc 832 xfs_warn(mp, "failed to read RT inodes");
f9057e3d 833 goto out_rele_rip;
1da177e4
LT
834 }
835
836 /*
7884bc86
CH
837 * If this is a read-only mount defer the superblock updates until
838 * the next remount into writeable mode. Otherwise we would never
839 * perform the update e.g. for the root filesystem.
1da177e4 840 */
61e63ecb
DC
841 if (mp->m_update_sb && !(mp->m_flags & XFS_MOUNT_RDONLY)) {
842 error = xfs_sync_sb(mp, false);
e5720eec 843 if (error) {
0b932ccc 844 xfs_warn(mp, "failed to write sb changes");
b93b6e43 845 goto out_rtunmount;
e5720eec
DC
846 }
847 }
1da177e4
LT
848
849 /*
850 * Initialise the XFS quota management subsystem for this mount
851 */
149e53af 852 if (XFS_IS_QUOTA_ON(mp)) {
7d095257
CH
853 error = xfs_qm_newmount(mp, &quotamount, &quotaflags);
854 if (error)
855 goto out_rtunmount;
856 } else {
7d095257
CH
857 /*
858 * If a file system had quotas running earlier, but decided to
859 * mount without -o uquota/pquota/gquota options, revoke the
860 * quotachecked license.
861 */
862 if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) {
0b932ccc 863 xfs_notice(mp, "resetting quota flags");
7d095257
CH
864 error = xfs_mount_reset_sbqflags(mp);
865 if (error)
a70a4fa5 866 goto out_rtunmount;
7d095257
CH
867 }
868 }
1da177e4
LT
869
870 /*
f0b2efad
BF
871 * Finish recovering the file system. This part needed to be delayed
872 * until after the root and real-time bitmap inodes were consistently
81ed9475
DW
873 * read in. Temporarily create per-AG space reservations for metadata
874 * btree shape changes because space freeing transactions (for inode
875 * inactivation) require the per-AG reservation in lieu of reserving
876 * blocks.
1da177e4 877 */
81ed9475
DW
878 error = xfs_fs_reserve_ag_blocks(mp);
879 if (error && error == -ENOSPC)
880 xfs_warn(mp,
881 "ENOSPC reserving per-AG metadata pool, log recovery may fail.");
4249023a 882 error = xfs_log_mount_finish(mp);
81ed9475 883 xfs_fs_unreserve_ag_blocks(mp);
1da177e4 884 if (error) {
0b932ccc 885 xfs_warn(mp, "log mount finish failed");
b93b6e43 886 goto out_rtunmount;
1da177e4
LT
887 }
888
ddeb14f4
DC
889 /*
890 * Now the log is fully replayed, we can transition to full read-only
891 * mode for read-only mounts. This will sync all the metadata and clean
892 * the log so that the recovery we just performed does not have to be
893 * replayed again on the next mount.
894 *
895 * We use the same quiesce mechanism as the rw->ro remount, as they are
896 * semantically identical operations.
897 */
898 if ((mp->m_flags & (XFS_MOUNT_RDONLY|XFS_MOUNT_NORECOVERY)) ==
899 XFS_MOUNT_RDONLY) {
ea2064da 900 xfs_log_clean(mp);
ddeb14f4
DC
901 }
902
1da177e4
LT
903 /*
904 * Complete the quota initialisation, post-log-replay component.
905 */
7d095257
CH
906 if (quotamount) {
907 ASSERT(mp->m_qflags == 0);
908 mp->m_qflags = quotaflags;
909
910 xfs_qm_mount_quotas(mp);
911 }
912
84e1e99f
DC
913 /*
914 * Now we are mounted, reserve a small amount of unused space for
915 * privileged transactions. This is needed so that transaction
916 * space required for critical operations can dip into this pool
917 * when at ENOSPC. This is needed for operations like create with
918 * attr, unwritten extent conversion at ENOSPC, etc. Data allocations
919 * are not allowed to use this reserved space.
8babd8a2
DC
920 *
921 * This may drive us straight to ENOSPC on mount, but that implies
922 * we were already there on the last unmount. Warn if this occurs.
84e1e99f 923 */
d5db0f97
ES
924 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
925 resblks = xfs_default_resblks(mp);
926 error = xfs_reserve_blocks(mp, &resblks, NULL);
927 if (error)
0b932ccc
DC
928 xfs_warn(mp,
929 "Unable to allocate reserve blocks. Continuing without reserve pool.");
174edb0e
DW
930
931 /* Recover any CoW blocks that never got remapped. */
932 error = xfs_reflink_recover_cow(mp);
933 if (error) {
934 xfs_err(mp,
935 "Error %d recovering leftover CoW allocations.", error);
936 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
937 goto out_quota;
938 }
84d69619
DW
939
940 /* Reserve AG blocks for future btree expansion. */
941 error = xfs_fs_reserve_ag_blocks(mp);
942 if (error && error != -ENOSPC)
943 goto out_agresv;
d5db0f97 944 }
84e1e99f 945
1da177e4
LT
946 return 0;
947
84d69619
DW
948 out_agresv:
949 xfs_fs_unreserve_ag_blocks(mp);
174edb0e
DW
950 out_quota:
951 xfs_qm_unmount_quotas(mp);
b93b6e43
CH
952 out_rtunmount:
953 xfs_rtunmount_inodes(mp);
f9057e3d 954 out_rele_rip:
44a8736b 955 xfs_irele(rip);
77aff8c7
DW
956 /* Clean out dquots that might be in memory after quotacheck. */
957 xfs_qm_unmount(mp);
ab23a776
DC
958
959 /*
960 * Inactivate all inodes that might still be in memory after a log
961 * intent recovery failure so that reclaim can free them. Metadata
962 * inodes and the root directory shouldn't need inactivation, but the
963 * mount failed for some reason, so pull down all the state and flee.
964 */
965 xfs_inodegc_flush(mp);
966
2d1d1da3 967 /*
d336f7eb 968 * Flush all inode reclamation work and flush the log.
2d1d1da3
DW
969 * We have to do this /after/ rtunmount and qm_unmount because those
970 * two will have scheduled delayed reclaim for the rt/quota inodes.
971 *
972 * This is slightly different from the unmountfs call sequence
973 * because we could be tearing down a partially set up mount. In
974 * particular, if log_mount_finish fails we bail out without calling
975 * qm_unmount_quotas and therefore rely on qm_unmount to release the
976 * quota inodes.
977 */
d336f7eb 978 xfs_unmount_flush_inodes(mp);
f9057e3d 979 out_log_dealloc:
f0b2efad 980 xfs_log_mount_cancel(mp);
40b1de00
DW
981 out_inodegc_shrinker:
982 unregister_shrinker(&mp->m_inodegc_shrinker);
d4f3512b
DC
983 out_fail_wait:
984 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
10fb9ac1
BF
985 xfs_buftarg_drain(mp->m_logdev_targp);
986 xfs_buftarg_drain(mp->m_ddev_targp);
f9057e3d 987 out_free_perag:
ff4f038c 988 xfs_free_perag(mp);
0650b554
DC
989 out_free_dir:
990 xfs_da_unmount(mp);
f9057e3d 991 out_remove_uuid:
27174203 992 xfs_uuid_unmount(mp);
31965ef3
DW
993 out_remove_errortag:
994 xfs_errortag_del(mp);
192852be
CM
995 out_remove_error_sysfs:
996 xfs_error_sysfs_del(mp);
225e4635
BD
997 out_del_stats:
998 xfs_sysfs_del(&mp->m_stats.xs_kobj);
a31b1d3d
BF
999 out_remove_sysfs:
1000 xfs_sysfs_del(&mp->m_kobj);
f9057e3d 1001 out:
1da177e4
LT
1002 return error;
1003}
1004
1005/*
1da177e4
LT
1006 * This flushes out the inodes,dquots and the superblock, unmounts the
1007 * log and makes sure that incore structures are freed.
1008 */
41b5c2e7
CH
1009void
1010xfs_unmountfs(
1011 struct xfs_mount *mp)
1da177e4 1012{
c8ce540d 1013 uint64_t resblks;
41b5c2e7 1014 int error;
1da177e4 1015
ab23a776
DC
1016 /*
1017 * Perform all on-disk metadata updates required to inactivate inodes
1018 * that the VFS evicted earlier in the unmount process. Freeing inodes
1019 * and discarding CoW fork preallocations can cause shape changes to
1020 * the free inode and refcount btrees, respectively, so we must finish
1021 * this before we discard the metadata space reservations. Metadata
1022 * inodes and the root directory do not require inactivation.
1023 */
1024 xfs_inodegc_flush(mp);
1025
c9a6526f 1026 xfs_blockgc_stop(mp);
84d69619 1027 xfs_fs_unreserve_ag_blocks(mp);
7d095257 1028 xfs_qm_unmount_quotas(mp);
b93b6e43 1029 xfs_rtunmount_inodes(mp);
44a8736b 1030 xfs_irele(mp->m_rootip);
77508ec8 1031
d336f7eb 1032 xfs_unmount_flush_inodes(mp);
1da177e4 1033
7d095257 1034 xfs_qm_unmount(mp);
a357a121 1035
84e1e99f
DC
1036 /*
1037 * Unreserve any blocks we have so that when we unmount we don't account
1038 * the reserved free space as used. This is really only necessary for
1039 * lazy superblock counting because it trusts the incore superblock
9da096fd 1040 * counters to be absolutely correct on clean unmount.
84e1e99f
DC
1041 *
1042 * We don't bother correcting this elsewhere for lazy superblock
1043 * counting because on mount of an unclean filesystem we reconstruct the
1044 * correct counter value and this is irrelevant.
1045 *
1046 * For non-lazy counter filesystems, this doesn't matter at all because
1047 * we only every apply deltas to the superblock and hence the incore
1048 * value does not matter....
1049 */
1050 resblks = 0;
714082bc
DC
1051 error = xfs_reserve_blocks(mp, &resblks, NULL);
1052 if (error)
0b932ccc 1053 xfs_warn(mp, "Unable to free reserved block pool. "
714082bc
DC
1054 "Freespace may not be correct on next mount.");
1055
21b699c8 1056 xfs_log_unmount(mp);
0650b554 1057 xfs_da_unmount(mp);
27174203 1058 xfs_uuid_unmount(mp);
1da177e4 1059
1550d0b0 1060#if defined(DEBUG)
31965ef3 1061 xfs_errortag_clearall(mp);
1da177e4 1062#endif
40b1de00 1063 unregister_shrinker(&mp->m_inodegc_shrinker);
ff4f038c 1064 xfs_free_perag(mp);
a31b1d3d 1065
31965ef3 1066 xfs_errortag_del(mp);
192852be 1067 xfs_error_sysfs_del(mp);
225e4635 1068 xfs_sysfs_del(&mp->m_stats.xs_kobj);
a31b1d3d 1069 xfs_sysfs_del(&mp->m_kobj);
1da177e4
LT
1070}
1071
91ee575f
BF
1072/*
1073 * Determine whether modifications can proceed. The caller specifies the minimum
1074 * freeze level for which modifications should not be allowed. This allows
1075 * certain operations to proceed while the freeze sequence is in progress, if
1076 * necessary.
1077 */
1078bool
1079xfs_fs_writable(
1080 struct xfs_mount *mp,
1081 int level)
92821e2b 1082{
91ee575f
BF
1083 ASSERT(level > SB_UNFROZEN);
1084 if ((mp->m_super->s_writers.frozen >= level) ||
1085 XFS_FORCED_SHUTDOWN(mp) || (mp->m_flags & XFS_MOUNT_RDONLY))
1086 return false;
1087
1088 return true;
92821e2b
DC
1089}
1090
0d485ada
DC
1091int
1092xfs_mod_fdblocks(
1093 struct xfs_mount *mp,
1094 int64_t delta,
1095 bool rsvd)
1096{
1097 int64_t lcounter;
1098 long long res_used;
1099 s32 batch;
fd43cf60 1100 uint64_t set_aside;
0d485ada
DC
1101
1102 if (delta > 0) {
1103 /*
1104 * If the reserve pool is depleted, put blocks back into it
1105 * first. Most of the time the pool is full.
1106 */
1107 if (likely(mp->m_resblks == mp->m_resblks_avail)) {
1108 percpu_counter_add(&mp->m_fdblocks, delta);
1109 return 0;
1110 }
1111
1112 spin_lock(&mp->m_sb_lock);
1113 res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
1114
1115 if (res_used > delta) {
1116 mp->m_resblks_avail += delta;
1117 } else {
1118 delta -= res_used;
1119 mp->m_resblks_avail = mp->m_resblks;
1120 percpu_counter_add(&mp->m_fdblocks, delta);
1121 }
1122 spin_unlock(&mp->m_sb_lock);
1123 return 0;
1124 }
1125
1126 /*
1127 * Taking blocks away, need to be more accurate the closer we
1128 * are to zero.
1129 *
0d485ada
DC
1130 * If the counter has a value of less than 2 * max batch size,
1131 * then make everything serialise as we are real close to
1132 * ENOSPC.
1133 */
8c1903d3
DC
1134 if (__percpu_counter_compare(&mp->m_fdblocks, 2 * XFS_FDBLOCKS_BATCH,
1135 XFS_FDBLOCKS_BATCH) < 0)
0d485ada
DC
1136 batch = 1;
1137 else
8c1903d3 1138 batch = XFS_FDBLOCKS_BATCH;
0d485ada 1139
fd43cf60
BF
1140 /*
1141 * Set aside allocbt blocks because these blocks are tracked as free
1142 * space but not available for allocation. Technically this means that a
1143 * single reservation cannot consume all remaining free space, but the
1144 * ratio of allocbt blocks to usable free blocks should be rather small.
1145 * The tradeoff without this is that filesystems that maintain high
1146 * perag block reservations can over reserve physical block availability
1147 * and fail physical allocation, which leads to much more serious
1148 * problems (i.e. transaction abort, pagecache discards, etc.) than
1149 * slightly premature -ENOSPC.
1150 */
1151 set_aside = mp->m_alloc_set_aside + atomic64_read(&mp->m_allocbt_blks);
104b4e51 1152 percpu_counter_add_batch(&mp->m_fdblocks, delta, batch);
fd43cf60 1153 if (__percpu_counter_compare(&mp->m_fdblocks, set_aside,
8c1903d3 1154 XFS_FDBLOCKS_BATCH) >= 0) {
0d485ada
DC
1155 /* we had space! */
1156 return 0;
1157 }
1158
1159 /*
1160 * lock up the sb for dipping into reserves before releasing the space
1161 * that took us to ENOSPC.
1162 */
1163 spin_lock(&mp->m_sb_lock);
1164 percpu_counter_add(&mp->m_fdblocks, -delta);
1165 if (!rsvd)
1166 goto fdblocks_enospc;
1167
1168 lcounter = (long long)mp->m_resblks_avail + delta;
1169 if (lcounter >= 0) {
1170 mp->m_resblks_avail = lcounter;
1171 spin_unlock(&mp->m_sb_lock);
1172 return 0;
1173 }
ec43f6da
ES
1174 xfs_warn_once(mp,
1175"Reserve blocks depleted! Consider increasing reserve pool size.");
1176
0d485ada
DC
1177fdblocks_enospc:
1178 spin_unlock(&mp->m_sb_lock);
1179 return -ENOSPC;
1180}
1181
bab98bbe
DC
1182int
1183xfs_mod_frextents(
1184 struct xfs_mount *mp,
1185 int64_t delta)
1186{
1187 int64_t lcounter;
1188 int ret = 0;
1189
1190 spin_lock(&mp->m_sb_lock);
1191 lcounter = mp->m_sb.sb_frextents + delta;
1192 if (lcounter < 0)
1193 ret = -ENOSPC;
1194 else
1195 mp->m_sb.sb_frextents = lcounter;
1196 spin_unlock(&mp->m_sb_lock);
1197 return ret;
1198}
1199
1da177e4
LT
1200/*
1201 * Used to free the superblock along various error paths.
1202 */
1203void
1204xfs_freesb(
26af6552 1205 struct xfs_mount *mp)
1da177e4 1206{
26af6552 1207 struct xfs_buf *bp = mp->m_sb_bp;
1da177e4 1208
26af6552 1209 xfs_buf_lock(bp);
1da177e4 1210 mp->m_sb_bp = NULL;
26af6552 1211 xfs_buf_relse(bp);
1da177e4
LT
1212}
1213
dda35b8f
CH
1214/*
1215 * If the underlying (data/log/rt) device is readonly, there are some
1216 * operations that cannot proceed.
1217 */
1218int
1219xfs_dev_is_read_only(
1220 struct xfs_mount *mp,
1221 char *message)
1222{
1223 if (xfs_readonly_buftarg(mp->m_ddev_targp) ||
1224 xfs_readonly_buftarg(mp->m_logdev_targp) ||
1225 (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) {
0b932ccc
DC
1226 xfs_notice(mp, "%s required on read-only device.", message);
1227 xfs_notice(mp, "write access unavailable, cannot proceed.");
2451337d 1228 return -EROFS;
dda35b8f
CH
1229 }
1230 return 0;
1231}
f467cad9
DW
1232
1233/* Force the summary counters to be recalculated at next mount. */
1234void
1235xfs_force_summary_recalc(
1236 struct xfs_mount *mp)
1237{
1238 if (!xfs_sb_version_haslazysbcount(&mp->m_sb))
1239 return;
1240
39353ff6 1241 xfs_fs_mark_sick(mp, XFS_SICK_FS_COUNTERS);
f467cad9 1242}
9fe82b8c 1243
908ce71e
DW
1244/*
1245 * Enable a log incompat feature flag in the primary superblock. The caller
1246 * cannot have any other transactions in progress.
1247 */
1248int
1249xfs_add_incompat_log_feature(
1250 struct xfs_mount *mp,
1251 uint32_t feature)
1252{
1253 struct xfs_dsb *dsb;
1254 int error;
1255
1256 ASSERT(hweight32(feature) == 1);
1257 ASSERT(!(feature & XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN));
1258
1259 /*
1260 * Force the log to disk and kick the background AIL thread to reduce
1261 * the chances that the bwrite will stall waiting for the AIL to unpin
1262 * the primary superblock buffer. This isn't a data integrity
1263 * operation, so we don't need a synchronous push.
1264 */
1265 error = xfs_log_force(mp, XFS_LOG_SYNC);
1266 if (error)
1267 return error;
1268 xfs_ail_push_all(mp->m_ail);
1269
1270 /*
1271 * Lock the primary superblock buffer to serialize all callers that
1272 * are trying to set feature bits.
1273 */
1274 xfs_buf_lock(mp->m_sb_bp);
1275 xfs_buf_hold(mp->m_sb_bp);
1276
1277 if (XFS_FORCED_SHUTDOWN(mp)) {
1278 error = -EIO;
1279 goto rele;
1280 }
1281
1282 if (xfs_sb_has_incompat_log_feature(&mp->m_sb, feature))
1283 goto rele;
1284
1285 /*
1286 * Write the primary superblock to disk immediately, because we need
1287 * the log_incompat bit to be set in the primary super now to protect
1288 * the log items that we're going to commit later.
1289 */
1290 dsb = mp->m_sb_bp->b_addr;
1291 xfs_sb_to_disk(dsb, &mp->m_sb);
1292 dsb->sb_features_log_incompat |= cpu_to_be32(feature);
1293 error = xfs_bwrite(mp->m_sb_bp);
1294 if (error)
1295 goto shutdown;
1296
1297 /*
1298 * Add the feature bits to the incore superblock before we unlock the
1299 * buffer.
1300 */
1301 xfs_sb_add_incompat_log_features(&mp->m_sb, feature);
1302 xfs_buf_relse(mp->m_sb_bp);
1303
1304 /* Log the superblock to disk. */
1305 return xfs_sync_sb(mp, false);
1306shutdown:
1307 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
1308rele:
1309 xfs_buf_relse(mp->m_sb_bp);
1310 return error;
1311}
1312
1313/*
1314 * Clear all the log incompat flags from the superblock.
1315 *
1316 * The caller cannot be in a transaction, must ensure that the log does not
1317 * contain any log items protected by any log incompat bit, and must ensure
1318 * that there are no other threads that depend on the state of the log incompat
1319 * feature flags in the primary super.
1320 *
1321 * Returns true if the superblock is dirty.
1322 */
1323bool
1324xfs_clear_incompat_log_features(
1325 struct xfs_mount *mp)
1326{
1327 bool ret = false;
1328
1329 if (!xfs_sb_version_hascrc(&mp->m_sb) ||
1330 !xfs_sb_has_incompat_log_feature(&mp->m_sb,
1331 XFS_SB_FEAT_INCOMPAT_LOG_ALL) ||
1332 XFS_FORCED_SHUTDOWN(mp))
1333 return false;
1334
1335 /*
1336 * Update the incore superblock. We synchronize on the primary super
1337 * buffer lock to be consistent with the add function, though at least
1338 * in theory this shouldn't be necessary.
1339 */
1340 xfs_buf_lock(mp->m_sb_bp);
1341 xfs_buf_hold(mp->m_sb_bp);
1342
1343 if (xfs_sb_has_incompat_log_feature(&mp->m_sb,
1344 XFS_SB_FEAT_INCOMPAT_LOG_ALL)) {
1345 xfs_info(mp, "Clearing log incompat feature flags.");
1346 xfs_sb_remove_incompat_log_features(&mp->m_sb);
1347 ret = true;
1348 }
1349
1350 xfs_buf_relse(mp->m_sb_bp);
1351 return ret;
1352}
1353
9fe82b8c
DW
1354/*
1355 * Update the in-core delayed block counter.
1356 *
1357 * We prefer to update the counter without having to take a spinlock for every
1358 * counter update (i.e. batching). Each change to delayed allocation
1359 * reservations can change can easily exceed the default percpu counter
1360 * batching, so we use a larger batch factor here.
1361 *
1362 * Note that we don't currently have any callers requiring fast summation
1363 * (e.g. percpu_counter_read) so we can use a big batch value here.
1364 */
1365#define XFS_DELALLOC_BATCH (4096)
1366void
1367xfs_mod_delalloc(
1368 struct xfs_mount *mp,
1369 int64_t delta)
1370{
1371 percpu_counter_add_batch(&mp->m_delalloc_blks, delta,
1372 XFS_DELALLOC_BATCH);
1373}