]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - fs/ext4/super.c
ext4: fix error handling in ext4_fc_record_modified_inode()
[mirror_ubuntu-jammy-kernel.git] / fs / ext4 / super.c
CommitLineData
f5166768 1// SPDX-License-Identifier: GPL-2.0
ac27a0ec 2/*
617ba13b 3 * linux/fs/ext4/super.c
ac27a0ec
DK
4 *
5 * Copyright (C) 1992, 1993, 1994, 1995
6 * Remy Card (card@masi.ibp.fr)
7 * Laboratoire MASI - Institut Blaise Pascal
8 * Universite Pierre et Marie Curie (Paris VI)
9 *
10 * from
11 *
12 * linux/fs/minix/inode.c
13 *
14 * Copyright (C) 1991, 1992 Linus Torvalds
15 *
16 * Big-endian to little-endian byte-swapping/bitmaps by
17 * David S. Miller (davem@caip.rutgers.edu), 1995
18 */
19
20#include <linux/module.h>
21#include <linux/string.h>
22#include <linux/fs.h>
23#include <linux/time.h>
c5ca7c76 24#include <linux/vmalloc.h>
ac27a0ec
DK
25#include <linux/slab.h>
26#include <linux/init.h>
27#include <linux/blkdev.h>
66114cad 28#include <linux/backing-dev.h>
ac27a0ec 29#include <linux/parser.h>
ac27a0ec 30#include <linux/buffer_head.h>
a5694255 31#include <linux/exportfs.h>
ac27a0ec
DK
32#include <linux/vfs.h>
33#include <linux/random.h>
34#include <linux/mount.h>
35#include <linux/namei.h>
36#include <linux/quotaops.h>
37#include <linux/seq_file.h>
3197ebdb 38#include <linux/ctype.h>
1330593e 39#include <linux/log2.h>
717d50e4 40#include <linux/crc16.h>
ef510424 41#include <linux/dax.h>
7abc52c2 42#include <linux/cleancache.h>
7c0f6ba6 43#include <linux/uaccess.h>
ee73f9a5 44#include <linux/iversion.h>
c83ad55e 45#include <linux/unicode.h>
c6a564ff 46#include <linux/part_stat.h>
bfff6873
LC
47#include <linux/kthread.h>
48#include <linux/freezer.h>
49
3dcf5451 50#include "ext4.h"
4a092d73 51#include "ext4_extents.h" /* Needed for trace points definition */
3dcf5451 52#include "ext4_jbd2.h"
ac27a0ec
DK
53#include "xattr.h"
54#include "acl.h"
3661d286 55#include "mballoc.h"
0c9ec4be 56#include "fsmap.h"
ac27a0ec 57
9bffad1e
TT
58#define CREATE_TRACE_POINTS
59#include <trace/events/ext4.h>
60
0b75a840 61static struct ext4_lazy_init *ext4_li_info;
59ebc7fd 62static DEFINE_MUTEX(ext4_li_mtx);
e294a537 63static struct ratelimit_state ext4_mount_msg_ratelimit;
9f6200bb 64
617ba13b 65static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
ac27a0ec 66 unsigned long journal_devnum);
2adf6da8 67static int ext4_show_options(struct seq_file *seq, struct dentry *root);
2d01ddc8 68static void ext4_update_super(struct super_block *sb);
4392fbc4 69static int ext4_commit_super(struct super_block *sb);
11215630 70static int ext4_mark_recovery_complete(struct super_block *sb,
2b2d6d01 71 struct ext4_super_block *es);
11215630
JK
72static int ext4_clear_journal_err(struct super_block *sb,
73 struct ext4_super_block *es);
617ba13b 74static int ext4_sync_fs(struct super_block *sb, int wait);
2b2d6d01
TT
75static int ext4_remount(struct super_block *sb, int *flags, char *data);
76static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
c4be0c1d 77static int ext4_unfreeze(struct super_block *sb);
c4be0c1d 78static int ext4_freeze(struct super_block *sb);
152a0836
AV
79static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
80 const char *dev_name, void *data);
2035e776
TT
81static inline int ext2_feature_set_ok(struct super_block *sb);
82static inline int ext3_feature_set_ok(struct super_block *sb);
bfff6873
LC
83static void ext4_destroy_lazyinit_thread(void);
84static void ext4_unregister_li_request(struct super_block *sb);
8f1f7453 85static void ext4_clear_request_list(void);
c6cb7e77
EW
86static struct inode *ext4_get_journal_inode(struct super_block *sb,
87 unsigned int journal_inum);
ac27a0ec 88
e74031fd
JK
89/*
90 * Lock ordering
91 *
e74031fd 92 * page fault path:
d4f5258e
JK
93 * mmap_lock -> sb_start_pagefault -> invalidate_lock (r) -> transaction start
94 * -> page lock -> i_data_sem (rw)
e74031fd
JK
95 *
96 * buffered write path:
c1e8d7c6 97 * sb_start_write -> i_mutex -> mmap_lock
e74031fd
JK
98 * sb_start_write -> i_mutex -> transaction start -> page lock ->
99 * i_data_sem (rw)
100 *
101 * truncate:
d4f5258e
JK
102 * sb_start_write -> i_mutex -> invalidate_lock (w) -> i_mmap_rwsem (w) ->
103 * page lock
104 * sb_start_write -> i_mutex -> invalidate_lock (w) -> transaction start ->
1d39834f 105 * i_data_sem (rw)
e74031fd
JK
106 *
107 * direct IO:
c1e8d7c6 108 * sb_start_write -> i_mutex -> mmap_lock
1d39834f 109 * sb_start_write -> i_mutex -> transaction start -> i_data_sem (rw)
e74031fd
JK
110 *
111 * writepages:
112 * transaction start -> page lock(s) -> i_data_sem (rw)
113 */
114
c290ea01 115#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
2035e776
TT
116static struct file_system_type ext2_fs_type = {
117 .owner = THIS_MODULE,
118 .name = "ext2",
119 .mount = ext4_mount,
120 .kill_sb = kill_block_super,
121 .fs_flags = FS_REQUIRES_DEV,
122};
7f78e035 123MODULE_ALIAS_FS("ext2");
fa7614dd 124MODULE_ALIAS("ext2");
2035e776
TT
125#define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type)
126#else
127#define IS_EXT2_SB(sb) (0)
128#endif
129
130
ba69f9ab
JK
131static struct file_system_type ext3_fs_type = {
132 .owner = THIS_MODULE,
133 .name = "ext3",
152a0836 134 .mount = ext4_mount,
ba69f9ab
JK
135 .kill_sb = kill_block_super,
136 .fs_flags = FS_REQUIRES_DEV,
137};
7f78e035 138MODULE_ALIAS_FS("ext3");
fa7614dd 139MODULE_ALIAS("ext3");
ba69f9ab 140#define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type)
bd81d8ee 141
fa491b14 142
143static inline void __ext4_read_bh(struct buffer_head *bh, int op_flags,
144 bh_end_io_t *end_io)
145{
146 /*
147 * buffer's verified bit is no longer valid after reading from
148 * disk again due to write out error, clear it to make sure we
149 * recheck the buffer contents.
150 */
151 clear_buffer_verified(bh);
152
153 bh->b_end_io = end_io ? end_io : end_buffer_read_sync;
154 get_bh(bh);
155 submit_bh(REQ_OP_READ, op_flags, bh);
156}
157
158void ext4_read_bh_nowait(struct buffer_head *bh, int op_flags,
159 bh_end_io_t *end_io)
160{
161 BUG_ON(!buffer_locked(bh));
162
163 if (ext4_buffer_uptodate(bh)) {
164 unlock_buffer(bh);
165 return;
166 }
167 __ext4_read_bh(bh, op_flags, end_io);
168}
169
170int ext4_read_bh(struct buffer_head *bh, int op_flags, bh_end_io_t *end_io)
171{
172 BUG_ON(!buffer_locked(bh));
173
174 if (ext4_buffer_uptodate(bh)) {
175 unlock_buffer(bh);
176 return 0;
177 }
178
179 __ext4_read_bh(bh, op_flags, end_io);
180
181 wait_on_buffer(bh);
182 if (buffer_uptodate(bh))
183 return 0;
184 return -EIO;
185}
186
187int ext4_read_bh_lock(struct buffer_head *bh, int op_flags, bool wait)
188{
189 if (trylock_buffer(bh)) {
190 if (wait)
191 return ext4_read_bh(bh, op_flags, NULL);
192 ext4_read_bh_nowait(bh, op_flags, NULL);
193 return 0;
194 }
195 if (wait) {
196 wait_on_buffer(bh);
197 if (buffer_uptodate(bh))
198 return 0;
199 return -EIO;
200 }
201 return 0;
202}
203
fb265c9c 204/*
8394a6ab 205 * This works like __bread_gfp() except it uses ERR_PTR for error
fb265c9c
TT
206 * returns. Currently with sb_bread it's impossible to distinguish
207 * between ENOMEM and EIO situations (since both result in a NULL
208 * return.
209 */
8394a6ab 210static struct buffer_head *__ext4_sb_bread_gfp(struct super_block *sb,
211 sector_t block, int op_flags,
212 gfp_t gfp)
fb265c9c 213{
2d069c08 214 struct buffer_head *bh;
215 int ret;
fb265c9c 216
8394a6ab 217 bh = sb_getblk_gfp(sb, block, gfp);
fb265c9c
TT
218 if (bh == NULL)
219 return ERR_PTR(-ENOMEM);
cf2834a5 220 if (ext4_buffer_uptodate(bh))
fb265c9c 221 return bh;
2d069c08 222
223 ret = ext4_read_bh_lock(bh, REQ_META | op_flags, true);
224 if (ret) {
225 put_bh(bh);
226 return ERR_PTR(ret);
227 }
228 return bh;
fb265c9c
TT
229}
230
8394a6ab 231struct buffer_head *ext4_sb_bread(struct super_block *sb, sector_t block,
232 int op_flags)
233{
234 return __ext4_sb_bread_gfp(sb, block, op_flags, __GFP_MOVABLE);
235}
236
237struct buffer_head *ext4_sb_bread_unmovable(struct super_block *sb,
238 sector_t block)
239{
240 return __ext4_sb_bread_gfp(sb, block, 0, 0);
241}
242
5df1d412 243void ext4_sb_breadahead_unmovable(struct super_block *sb, sector_t block)
244{
245 struct buffer_head *bh = sb_getblk_gfp(sb, block, 0);
246
247 if (likely(bh)) {
248 ext4_read_bh_lock(bh, REQ_RAHEAD, false);
249 brelse(bh);
250 }
fb265c9c
TT
251}
252
d25425f8
DW
253static int ext4_verify_csum_type(struct super_block *sb,
254 struct ext4_super_block *es)
255{
e2b911c5 256 if (!ext4_has_feature_metadata_csum(sb))
d25425f8
DW
257 return 1;
258
259 return es->s_checksum_type == EXT4_CRC32C_CHKSUM;
260}
261
a9c47317
DW
262static __le32 ext4_superblock_csum(struct super_block *sb,
263 struct ext4_super_block *es)
264{
265 struct ext4_sb_info *sbi = EXT4_SB(sb);
266 int offset = offsetof(struct ext4_super_block, s_checksum);
267 __u32 csum;
268
269 csum = ext4_chksum(sbi, ~0, (char *)es, offset);
270
271 return cpu_to_le32(csum);
272}
273
c197855e
SH
274static int ext4_superblock_csum_verify(struct super_block *sb,
275 struct ext4_super_block *es)
a9c47317 276{
9aa5d32b 277 if (!ext4_has_metadata_csum(sb))
a9c47317
DW
278 return 1;
279
280 return es->s_checksum == ext4_superblock_csum(sb, es);
281}
282
06db49e6 283void ext4_superblock_csum_set(struct super_block *sb)
a9c47317 284{
06db49e6
TT
285 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
286
9aa5d32b 287 if (!ext4_has_metadata_csum(sb))
a9c47317
DW
288 return;
289
290 es->s_checksum = ext4_superblock_csum(sb, es);
291}
292
8fadc143
AR
293ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
294 struct ext4_group_desc *bg)
bd81d8ee 295{
3a14589c 296 return le32_to_cpu(bg->bg_block_bitmap_lo) |
8fadc143 297 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
0b8e58a1 298 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
bd81d8ee
LV
299}
300
8fadc143
AR
301ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
302 struct ext4_group_desc *bg)
bd81d8ee 303{
5272f837 304 return le32_to_cpu(bg->bg_inode_bitmap_lo) |
8fadc143 305 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
0b8e58a1 306 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
bd81d8ee
LV
307}
308
8fadc143
AR
309ext4_fsblk_t ext4_inode_table(struct super_block *sb,
310 struct ext4_group_desc *bg)
bd81d8ee 311{
5272f837 312 return le32_to_cpu(bg->bg_inode_table_lo) |
8fadc143 313 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
0b8e58a1 314 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
bd81d8ee
LV
315}
316
021b65bb
TT
317__u32 ext4_free_group_clusters(struct super_block *sb,
318 struct ext4_group_desc *bg)
560671a0
AK
319{
320 return le16_to_cpu(bg->bg_free_blocks_count_lo) |
321 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
0b8e58a1 322 (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
560671a0
AK
323}
324
325__u32 ext4_free_inodes_count(struct super_block *sb,
326 struct ext4_group_desc *bg)
327{
328 return le16_to_cpu(bg->bg_free_inodes_count_lo) |
329 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
0b8e58a1 330 (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
560671a0
AK
331}
332
333__u32 ext4_used_dirs_count(struct super_block *sb,
334 struct ext4_group_desc *bg)
335{
336 return le16_to_cpu(bg->bg_used_dirs_count_lo) |
337 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
0b8e58a1 338 (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
560671a0
AK
339}
340
341__u32 ext4_itable_unused_count(struct super_block *sb,
342 struct ext4_group_desc *bg)
343{
344 return le16_to_cpu(bg->bg_itable_unused_lo) |
345 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
0b8e58a1 346 (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
560671a0
AK
347}
348
8fadc143
AR
349void ext4_block_bitmap_set(struct super_block *sb,
350 struct ext4_group_desc *bg, ext4_fsblk_t blk)
bd81d8ee 351{
3a14589c 352 bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk);
8fadc143
AR
353 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
354 bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32);
bd81d8ee
LV
355}
356
8fadc143
AR
357void ext4_inode_bitmap_set(struct super_block *sb,
358 struct ext4_group_desc *bg, ext4_fsblk_t blk)
bd81d8ee 359{
5272f837 360 bg->bg_inode_bitmap_lo = cpu_to_le32((u32)blk);
8fadc143
AR
361 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
362 bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32);
bd81d8ee
LV
363}
364
8fadc143
AR
365void ext4_inode_table_set(struct super_block *sb,
366 struct ext4_group_desc *bg, ext4_fsblk_t blk)
bd81d8ee 367{
5272f837 368 bg->bg_inode_table_lo = cpu_to_le32((u32)blk);
8fadc143
AR
369 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
370 bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
bd81d8ee
LV
371}
372
021b65bb
TT
373void ext4_free_group_clusters_set(struct super_block *sb,
374 struct ext4_group_desc *bg, __u32 count)
560671a0
AK
375{
376 bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
377 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
378 bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16);
379}
380
381void ext4_free_inodes_set(struct super_block *sb,
382 struct ext4_group_desc *bg, __u32 count)
383{
384 bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count);
385 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
386 bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16);
387}
388
389void ext4_used_dirs_set(struct super_block *sb,
390 struct ext4_group_desc *bg, __u32 count)
391{
392 bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count);
393 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
394 bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16);
395}
396
397void ext4_itable_unused_set(struct super_block *sb,
398 struct ext4_group_desc *bg, __u32 count)
399{
400 bg->bg_itable_unused_lo = cpu_to_le16((__u16)count);
401 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
402 bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
403}
404
c92dc856 405static void __ext4_update_tstamp(__le32 *lo, __u8 *hi, time64_t now)
6a0678a7 406{
6a0678a7
AB
407 now = clamp_val(now, 0, (1ull << 40) - 1);
408
409 *lo = cpu_to_le32(lower_32_bits(now));
410 *hi = upper_32_bits(now);
411}
412
413static time64_t __ext4_get_tstamp(__le32 *lo, __u8 *hi)
414{
415 return ((time64_t)(*hi) << 32) + le32_to_cpu(*lo);
416}
417#define ext4_update_tstamp(es, tstamp) \
c92dc856
JK
418 __ext4_update_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi, \
419 ktime_get_real_seconds())
6a0678a7
AB
420#define ext4_get_tstamp(es, tstamp) \
421 __ext4_get_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi)
d3d1faf6 422
bdfe0cbd
TT
423/*
424 * The del_gendisk() function uninitializes the disk-specific data
425 * structures, including the bdi structure, without telling anyone
426 * else. Once this happens, any attempt to call mark_buffer_dirty()
427 * (for example, by ext4_commit_super), will cause a kernel OOPS.
428 * This is a kludge to prevent these oops until we can put in a proper
429 * hook in del_gendisk() to inform the VFS and file system layers.
430 */
431static int block_device_ejected(struct super_block *sb)
432{
433 struct inode *bd_inode = sb->s_bdev->bd_inode;
434 struct backing_dev_info *bdi = inode_to_bdi(bd_inode);
435
436 return bdi->dev == NULL;
437}
438
18aadd47
BJ
439static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
440{
441 struct super_block *sb = journal->j_private;
442 struct ext4_sb_info *sbi = EXT4_SB(sb);
443 int error = is_journal_aborted(journal);
5d3ee208 444 struct ext4_journal_cb_entry *jce;
18aadd47 445
5d3ee208 446 BUG_ON(txn->t_state == T_FINISHED);
a0154344
DJ
447
448 ext4_process_freed_data(sb, txn->t_tid);
449
18aadd47 450 spin_lock(&sbi->s_md_lock);
5d3ee208
DM
451 while (!list_empty(&txn->t_private_list)) {
452 jce = list_entry(txn->t_private_list.next,
453 struct ext4_journal_cb_entry, jce_list);
18aadd47
BJ
454 list_del_init(&jce->jce_list);
455 spin_unlock(&sbi->s_md_lock);
456 jce->jce_func(sb, jce, error);
457 spin_lock(&sbi->s_md_lock);
458 }
459 spin_unlock(&sbi->s_md_lock);
460}
1c13d5c0 461
afb585a9
MFO
462/*
463 * This writepage callback for write_cache_pages()
464 * takes care of a few cases after page cleaning.
465 *
466 * write_cache_pages() already checks for dirty pages
467 * and calls clear_page_dirty_for_io(), which we want,
468 * to write protect the pages.
469 *
470 * However, we may have to redirty a page (see below.)
471 */
472static int ext4_journalled_writepage_callback(struct page *page,
473 struct writeback_control *wbc,
474 void *data)
475{
476 transaction_t *transaction = (transaction_t *) data;
477 struct buffer_head *bh, *head;
478 struct journal_head *jh;
479
480 bh = head = page_buffers(page);
481 do {
482 /*
483 * We have to redirty a page in these cases:
484 * 1) If buffer is dirty, it means the page was dirty because it
485 * contains a buffer that needs checkpointing. So the dirty bit
486 * needs to be preserved so that checkpointing writes the buffer
487 * properly.
488 * 2) If buffer is not part of the committing transaction
489 * (we may have just accidentally come across this buffer because
490 * inode range tracking is not exact) or if the currently running
491 * transaction already contains this buffer as well, dirty bit
492 * needs to be preserved so that the buffer gets writeprotected
493 * properly on running transaction's commit.
494 */
495 jh = bh2jh(bh);
496 if (buffer_dirty(bh) ||
497 (jh && (jh->b_transaction != transaction ||
498 jh->b_next_transaction))) {
499 redirty_page_for_writepage(wbc, page);
500 goto out;
501 }
502 } while ((bh = bh->b_this_page) != head);
503
504out:
505 return AOP_WRITEPAGE_ACTIVATE;
506}
507
508static int ext4_journalled_submit_inode_data_buffers(struct jbd2_inode *jinode)
509{
510 struct address_space *mapping = jinode->i_vfs_inode->i_mapping;
511 struct writeback_control wbc = {
512 .sync_mode = WB_SYNC_ALL,
513 .nr_to_write = LONG_MAX,
514 .range_start = jinode->i_dirty_start,
515 .range_end = jinode->i_dirty_end,
516 };
517
518 return write_cache_pages(mapping, &wbc,
519 ext4_journalled_writepage_callback,
520 jinode->i_transaction);
521}
522
523static int ext4_journal_submit_inode_data_buffers(struct jbd2_inode *jinode)
524{
525 int ret;
526
527 if (ext4_should_journal_data(jinode->i_vfs_inode))
528 ret = ext4_journalled_submit_inode_data_buffers(jinode);
529 else
530 ret = jbd2_journal_submit_inode_data_buffers(jinode);
531
532 return ret;
533}
534
535static int ext4_journal_finish_inode_data_buffers(struct jbd2_inode *jinode)
536{
537 int ret = 0;
538
539 if (!ext4_should_journal_data(jinode->i_vfs_inode))
540 ret = jbd2_journal_finish_inode_data_buffers(jinode);
541
542 return ret;
543}
544
1dc1097f
JK
545static bool system_going_down(void)
546{
547 return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF
548 || system_state == SYSTEM_RESTART;
549}
550
02a7780e
JK
551struct ext4_err_translation {
552 int code;
553 int errno;
554};
555
556#define EXT4_ERR_TRANSLATE(err) { .code = EXT4_ERR_##err, .errno = err }
557
558static struct ext4_err_translation err_translation[] = {
559 EXT4_ERR_TRANSLATE(EIO),
560 EXT4_ERR_TRANSLATE(ENOMEM),
561 EXT4_ERR_TRANSLATE(EFSBADCRC),
562 EXT4_ERR_TRANSLATE(EFSCORRUPTED),
563 EXT4_ERR_TRANSLATE(ENOSPC),
564 EXT4_ERR_TRANSLATE(ENOKEY),
565 EXT4_ERR_TRANSLATE(EROFS),
566 EXT4_ERR_TRANSLATE(EFBIG),
567 EXT4_ERR_TRANSLATE(EEXIST),
568 EXT4_ERR_TRANSLATE(ERANGE),
569 EXT4_ERR_TRANSLATE(EOVERFLOW),
570 EXT4_ERR_TRANSLATE(EBUSY),
571 EXT4_ERR_TRANSLATE(ENOTDIR),
572 EXT4_ERR_TRANSLATE(ENOTEMPTY),
573 EXT4_ERR_TRANSLATE(ESHUTDOWN),
574 EXT4_ERR_TRANSLATE(EFAULT),
575};
576
577static int ext4_errno_to_code(int errno)
578{
579 int i;
580
581 for (i = 0; i < ARRAY_SIZE(err_translation); i++)
582 if (err_translation[i].errno == errno)
583 return err_translation[i].code;
584 return EXT4_ERR_UNKNOWN;
585}
586
2d01ddc8
JK
587static void save_error_info(struct super_block *sb, int error,
588 __u32 ino, __u64 block,
589 const char *func, unsigned int line)
40676623 590{
c92dc856 591 struct ext4_sb_info *sbi = EXT4_SB(sb);
40676623 592
02a7780e
JK
593 /* We default to EFSCORRUPTED error... */
594 if (error == 0)
595 error = EFSCORRUPTED;
c92dc856
JK
596
597 spin_lock(&sbi->s_error_lock);
598 sbi->s_add_error_count++;
599 sbi->s_last_error_code = error;
600 sbi->s_last_error_line = line;
601 sbi->s_last_error_ino = ino;
602 sbi->s_last_error_block = block;
603 sbi->s_last_error_func = func;
604 sbi->s_last_error_time = ktime_get_real_seconds();
605 if (!sbi->s_first_error_time) {
606 sbi->s_first_error_code = error;
607 sbi->s_first_error_line = line;
608 sbi->s_first_error_ino = ino;
609 sbi->s_first_error_block = block;
610 sbi->s_first_error_func = func;
611 sbi->s_first_error_time = sbi->s_last_error_time;
612 }
613 spin_unlock(&sbi->s_error_lock);
40676623
JK
614}
615
ac27a0ec
DK
616/* Deal with the reporting of failure conditions on a filesystem such as
617 * inconsistencies detected or read IO failures.
618 *
619 * On ext2, we can store the error state of the filesystem in the
617ba13b 620 * superblock. That is not possible on ext4, because we may have other
ac27a0ec
DK
621 * write ordering constraints on the superblock which prevent us from
622 * writing it out straight away; and given that the journal is about to
623 * be aborted, we can't rely on the current, or future, transactions to
624 * write out the superblock safely.
625 *
dab291af 626 * We'll just use the jbd2_journal_abort() error code to record an error in
d6b198bc 627 * the journal instead. On recovery, the journal will complain about
ac27a0ec 628 * that error until we've noted it down and cleared it.
014c9caa
JK
629 *
630 * If force_ro is set, we unconditionally force the filesystem into an
631 * ABORT|READONLY state, unless the error response on the fs has been set to
632 * panic in which case we take the easy way out and panic immediately. This is
633 * used to deal with unrecoverable failures such as journal IO errors or ENOMEM
634 * at a critical moment in log management.
ac27a0ec 635 */
e789ca0c
JK
636static void ext4_handle_error(struct super_block *sb, bool force_ro, int error,
637 __u32 ino, __u64 block,
638 const char *func, unsigned int line)
ac27a0ec 639{
b08070ec 640 journal_t *journal = EXT4_SB(sb)->s_journal;
2d01ddc8 641 bool continue_fs = !force_ro && test_opt(sb, ERRORS_CONT);
b08070ec 642
e789ca0c 643 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
327eaf73
TT
644 if (test_opt(sb, WARN_ON_ERROR))
645 WARN_ON_ONCE(1);
646
2d01ddc8
JK
647 if (!continue_fs && !sb_rdonly(sb)) {
648 ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
649 if (journal)
650 jbd2_journal_abort(journal, -EIO);
651 }
652
653 if (!bdev_read_only(sb->s_bdev)) {
e789ca0c 654 save_error_info(sb, error, ino, block, func, line);
2d01ddc8
JK
655 /*
656 * In case the fs should keep running, we need to writeout
657 * superblock through the journal. Due to lock ordering
658 * constraints, it may not be safe to do it right here so we
659 * defer superblock flushing to a workqueue.
660 */
bb9464e0 661 if (continue_fs && journal)
2d01ddc8
JK
662 schedule_work(&EXT4_SB(sb)->s_error_work);
663 else
664 ext4_commit_super(sb);
665 }
e789ca0c 666
1dc1097f
JK
667 /*
668 * We force ERRORS_RO behavior when system is rebooting. Otherwise we
669 * could panic during 'reboot -f' as the underlying device got already
670 * disabled.
671 */
014c9caa 672 if (test_opt(sb, ERRORS_PANIC) && !system_going_down()) {
617ba13b 673 panic("EXT4-fs (device %s): panic forced after error\n",
ac27a0ec 674 sb->s_id);
4327ba52 675 }
ac2f7ca5
YB
676
677 if (sb_rdonly(sb) || continue_fs)
678 return;
679
014c9caa
JK
680 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
681 /*
682 * Make sure updated value of ->s_mount_flags will be visible before
683 * ->s_flags update
684 */
685 smp_wmb();
686 sb->s_flags |= SB_RDONLY;
ac27a0ec
DK
687}
688
c92dc856
JK
689static void flush_stashed_error_work(struct work_struct *work)
690{
691 struct ext4_sb_info *sbi = container_of(work, struct ext4_sb_info,
692 s_error_work);
2d01ddc8
JK
693 journal_t *journal = sbi->s_journal;
694 handle_t *handle;
c92dc856 695
2d01ddc8
JK
696 /*
697 * If the journal is still running, we have to write out superblock
698 * through the journal to avoid collisions of other journalled sb
699 * updates.
700 *
701 * We use directly jbd2 functions here to avoid recursing back into
702 * ext4 error handling code during handling of previous errors.
703 */
704 if (!sb_rdonly(sbi->s_sb) && journal) {
558d6450 705 struct buffer_head *sbh = sbi->s_sbh;
2d01ddc8
JK
706 handle = jbd2_journal_start(journal, 1);
707 if (IS_ERR(handle))
708 goto write_directly;
558d6450 709 if (jbd2_journal_get_write_access(handle, sbh)) {
2d01ddc8
JK
710 jbd2_journal_stop(handle);
711 goto write_directly;
712 }
713 ext4_update_super(sbi->s_sb);
558d6450
YB
714 if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
715 ext4_msg(sbi->s_sb, KERN_ERR, "previous I/O error to "
716 "superblock detected");
717 clear_buffer_write_io_error(sbh);
718 set_buffer_uptodate(sbh);
719 }
720
721 if (jbd2_journal_dirty_metadata(handle, sbh)) {
2d01ddc8
JK
722 jbd2_journal_stop(handle);
723 goto write_directly;
724 }
725 jbd2_journal_stop(handle);
d578b994 726 ext4_notify_error_sysfs(sbi);
2d01ddc8
JK
727 return;
728 }
729write_directly:
730 /*
731 * Write through journal failed. Write sb directly to get error info
732 * out and hope for the best.
733 */
4392fbc4 734 ext4_commit_super(sbi->s_sb);
d578b994 735 ext4_notify_error_sysfs(sbi);
ac27a0ec
DK
736}
737
efbed4dc
TT
738#define ext4_error_ratelimit(sb) \
739 ___ratelimit(&(EXT4_SB(sb)->s_err_ratelimit_state), \
740 "EXT4-fs error")
741
12062ddd 742void __ext4_error(struct super_block *sb, const char *function,
014c9caa 743 unsigned int line, bool force_ro, int error, __u64 block,
54d3adbc 744 const char *fmt, ...)
ac27a0ec 745{
0ff2ea7d 746 struct va_format vaf;
ac27a0ec
DK
747 va_list args;
748
0db1ff22
TT
749 if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
750 return;
751
ccf0f32a 752 trace_ext4_error(sb, function, line);
efbed4dc
TT
753 if (ext4_error_ratelimit(sb)) {
754 va_start(args, fmt);
755 vaf.fmt = fmt;
756 vaf.va = &args;
757 printk(KERN_CRIT
758 "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
759 sb->s_id, function, line, current->comm, &vaf);
760 va_end(args);
761 }
e789ca0c 762 ext4_handle_error(sb, force_ro, error, 0, block, function, line);
ac27a0ec
DK
763}
764
e7c96e8e 765void __ext4_error_inode(struct inode *inode, const char *function,
54d3adbc 766 unsigned int line, ext4_fsblk_t block, int error,
e7c96e8e 767 const char *fmt, ...)
273df556
FM
768{
769 va_list args;
f7c21177 770 struct va_format vaf;
273df556 771
0db1ff22
TT
772 if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
773 return;
774
ccf0f32a 775 trace_ext4_error(inode->i_sb, function, line);
efbed4dc
TT
776 if (ext4_error_ratelimit(inode->i_sb)) {
777 va_start(args, fmt);
778 vaf.fmt = fmt;
779 vaf.va = &args;
780 if (block)
781 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
782 "inode #%lu: block %llu: comm %s: %pV\n",
783 inode->i_sb->s_id, function, line, inode->i_ino,
784 block, current->comm, &vaf);
785 else
786 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
787 "inode #%lu: comm %s: %pV\n",
788 inode->i_sb->s_id, function, line, inode->i_ino,
789 current->comm, &vaf);
790 va_end(args);
791 }
e789ca0c
JK
792 ext4_handle_error(inode->i_sb, false, error, inode->i_ino, block,
793 function, line);
273df556
FM
794}
795
e7c96e8e
JP
796void __ext4_error_file(struct file *file, const char *function,
797 unsigned int line, ext4_fsblk_t block,
798 const char *fmt, ...)
273df556
FM
799{
800 va_list args;
f7c21177 801 struct va_format vaf;
496ad9aa 802 struct inode *inode = file_inode(file);
273df556
FM
803 char pathname[80], *path;
804
0db1ff22
TT
805 if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
806 return;
807
ccf0f32a 808 trace_ext4_error(inode->i_sb, function, line);
efbed4dc 809 if (ext4_error_ratelimit(inode->i_sb)) {
9bf39ab2 810 path = file_path(file, pathname, sizeof(pathname));
efbed4dc
TT
811 if (IS_ERR(path))
812 path = "(unknown)";
813 va_start(args, fmt);
814 vaf.fmt = fmt;
815 vaf.va = &args;
816 if (block)
817 printk(KERN_CRIT
818 "EXT4-fs error (device %s): %s:%d: inode #%lu: "
819 "block %llu: comm %s: path %s: %pV\n",
820 inode->i_sb->s_id, function, line, inode->i_ino,
821 block, current->comm, path, &vaf);
822 else
823 printk(KERN_CRIT
824 "EXT4-fs error (device %s): %s:%d: inode #%lu: "
825 "comm %s: path %s: %pV\n",
826 inode->i_sb->s_id, function, line, inode->i_ino,
827 current->comm, path, &vaf);
828 va_end(args);
829 }
e789ca0c
JK
830 ext4_handle_error(inode->i_sb, false, EFSCORRUPTED, inode->i_ino, block,
831 function, line);
273df556
FM
832}
833
722887dd
TT
834const char *ext4_decode_error(struct super_block *sb, int errno,
835 char nbuf[16])
ac27a0ec
DK
836{
837 char *errstr = NULL;
838
839 switch (errno) {
6a797d27
DW
840 case -EFSCORRUPTED:
841 errstr = "Corrupt filesystem";
842 break;
843 case -EFSBADCRC:
844 errstr = "Filesystem failed CRC";
845 break;
ac27a0ec
DK
846 case -EIO:
847 errstr = "IO failure";
848 break;
849 case -ENOMEM:
850 errstr = "Out of memory";
851 break;
852 case -EROFS:
78f1ddbb
TT
853 if (!sb || (EXT4_SB(sb)->s_journal &&
854 EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT))
ac27a0ec
DK
855 errstr = "Journal has aborted";
856 else
857 errstr = "Readonly filesystem";
858 break;
859 default:
860 /* If the caller passed in an extra buffer for unknown
861 * errors, textualise them now. Else we just return
862 * NULL. */
863 if (nbuf) {
864 /* Check for truncated error codes... */
865 if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
866 errstr = nbuf;
867 }
868 break;
869 }
870
871 return errstr;
872}
873
617ba13b 874/* __ext4_std_error decodes expected errors from journaling functions
ac27a0ec
DK
875 * automatically and invokes the appropriate error response. */
876
c398eda0
TT
877void __ext4_std_error(struct super_block *sb, const char *function,
878 unsigned int line, int errno)
ac27a0ec
DK
879{
880 char nbuf[16];
881 const char *errstr;
882
0db1ff22
TT
883 if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
884 return;
885
ac27a0ec
DK
886 /* Special case: if the error is EROFS, and we're not already
887 * inside a transaction, then there's really no point in logging
888 * an error. */
bc98a42c 889 if (errno == -EROFS && journal_current_handle() == NULL && sb_rdonly(sb))
ac27a0ec
DK
890 return;
891
efbed4dc
TT
892 if (ext4_error_ratelimit(sb)) {
893 errstr = ext4_decode_error(sb, errno, nbuf);
894 printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
895 sb->s_id, function, line, errstr);
896 }
ac27a0ec 897
e789ca0c 898 ext4_handle_error(sb, false, -errno, 0, 0, function, line);
ac27a0ec
DK
899}
900
e7c96e8e
JP
901void __ext4_msg(struct super_block *sb,
902 const char *prefix, const char *fmt, ...)
b31e1552 903{
0ff2ea7d 904 struct va_format vaf;
b31e1552
ES
905 va_list args;
906
1cf006ed 907 atomic_inc(&EXT4_SB(sb)->s_msg_count);
efbed4dc
TT
908 if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state), "EXT4-fs"))
909 return;
910
b31e1552 911 va_start(args, fmt);
0ff2ea7d
JP
912 vaf.fmt = fmt;
913 vaf.va = &args;
914 printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
b31e1552
ES
915 va_end(args);
916}
917
1cf006ed
DM
918static int ext4_warning_ratelimit(struct super_block *sb)
919{
920 atomic_inc(&EXT4_SB(sb)->s_warning_count);
921 return ___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state),
922 "EXT4-fs warning");
923}
b03a2f7e 924
12062ddd 925void __ext4_warning(struct super_block *sb, const char *function,
c398eda0 926 unsigned int line, const char *fmt, ...)
ac27a0ec 927{
0ff2ea7d 928 struct va_format vaf;
ac27a0ec
DK
929 va_list args;
930
b03a2f7e 931 if (!ext4_warning_ratelimit(sb))
efbed4dc
TT
932 return;
933
ac27a0ec 934 va_start(args, fmt);
0ff2ea7d
JP
935 vaf.fmt = fmt;
936 vaf.va = &args;
937 printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n",
938 sb->s_id, function, line, &vaf);
ac27a0ec
DK
939 va_end(args);
940}
941
b03a2f7e
AD
942void __ext4_warning_inode(const struct inode *inode, const char *function,
943 unsigned int line, const char *fmt, ...)
944{
945 struct va_format vaf;
946 va_list args;
947
948 if (!ext4_warning_ratelimit(inode->i_sb))
949 return;
950
951 va_start(args, fmt);
952 vaf.fmt = fmt;
953 vaf.va = &args;
954 printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: "
955 "inode #%lu: comm %s: %pV\n", inode->i_sb->s_id,
956 function, line, inode->i_ino, current->comm, &vaf);
957 va_end(args);
958}
959
e29136f8
TT
960void __ext4_grp_locked_error(const char *function, unsigned int line,
961 struct super_block *sb, ext4_group_t grp,
962 unsigned long ino, ext4_fsblk_t block,
963 const char *fmt, ...)
5d1b1b3f
AK
964__releases(bitlock)
965__acquires(bitlock)
966{
0ff2ea7d 967 struct va_format vaf;
5d1b1b3f 968 va_list args;
5d1b1b3f 969
0db1ff22
TT
970 if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
971 return;
972
ccf0f32a 973 trace_ext4_error(sb, function, line);
efbed4dc
TT
974 if (ext4_error_ratelimit(sb)) {
975 va_start(args, fmt);
976 vaf.fmt = fmt;
977 vaf.va = &args;
978 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
979 sb->s_id, function, line, grp);
980 if (ino)
981 printk(KERN_CONT "inode %lu: ", ino);
982 if (block)
983 printk(KERN_CONT "block %llu:",
984 (unsigned long long) block);
985 printk(KERN_CONT "%pV\n", &vaf);
986 va_end(args);
987 }
5d1b1b3f
AK
988
989 if (test_opt(sb, ERRORS_CONT)) {
c92dc856
JK
990 if (test_opt(sb, WARN_ON_ERROR))
991 WARN_ON_ONCE(1);
e789ca0c 992 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
2d01ddc8
JK
993 if (!bdev_read_only(sb->s_bdev)) {
994 save_error_info(sb, EFSCORRUPTED, ino, block, function,
995 line);
e789ca0c 996 schedule_work(&EXT4_SB(sb)->s_error_work);
2d01ddc8 997 }
5d1b1b3f
AK
998 return;
999 }
1000 ext4_unlock_group(sb, grp);
e789ca0c 1001 ext4_handle_error(sb, false, EFSCORRUPTED, ino, block, function, line);
5d1b1b3f
AK
1002 /*
1003 * We only get here in the ERRORS_RO case; relocking the group
1004 * may be dangerous, but nothing bad will happen since the
1005 * filesystem will have already been marked read/only and the
1006 * journal has been aborted. We return 1 as a hint to callers
1007 * who might what to use the return value from
25985edc 1008 * ext4_grp_locked_error() to distinguish between the
5d1b1b3f
AK
1009 * ERRORS_CONT and ERRORS_RO case, and perhaps return more
1010 * aggressively from the ext4 function in question, with a
1011 * more appropriate error code.
1012 */
1013 ext4_lock_group(sb, grp);
1014 return;
1015}
1016
db79e6d1
WS
1017void ext4_mark_group_bitmap_corrupted(struct super_block *sb,
1018 ext4_group_t group,
1019 unsigned int flags)
1020{
1021 struct ext4_sb_info *sbi = EXT4_SB(sb);
1022 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
1023 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
9af0b3d1
WS
1024 int ret;
1025
1026 if (flags & EXT4_GROUP_INFO_BBITMAP_CORRUPT) {
1027 ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
1028 &grp->bb_state);
1029 if (!ret)
1030 percpu_counter_sub(&sbi->s_freeclusters_counter,
1031 grp->bb_free);
db79e6d1
WS
1032 }
1033
9af0b3d1
WS
1034 if (flags & EXT4_GROUP_INFO_IBITMAP_CORRUPT) {
1035 ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT,
1036 &grp->bb_state);
1037 if (!ret && gdp) {
db79e6d1
WS
1038 int count;
1039
1040 count = ext4_free_inodes_count(sb, gdp);
1041 percpu_counter_sub(&sbi->s_freeinodes_counter,
1042 count);
1043 }
db79e6d1
WS
1044 }
1045}
1046
617ba13b 1047void ext4_update_dynamic_rev(struct super_block *sb)
ac27a0ec 1048{
617ba13b 1049 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
ac27a0ec 1050
617ba13b 1051 if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
ac27a0ec
DK
1052 return;
1053
12062ddd 1054 ext4_warning(sb,
ac27a0ec
DK
1055 "updating to rev %d because of new feature flag, "
1056 "running e2fsck is recommended",
617ba13b 1057 EXT4_DYNAMIC_REV);
ac27a0ec 1058
617ba13b
MC
1059 es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO);
1060 es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE);
1061 es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV);
ac27a0ec
DK
1062 /* leave es->s_feature_*compat flags alone */
1063 /* es->s_uuid will be set by e2fsck if empty */
1064
1065 /*
1066 * The rest of the superblock fields should be zero, and if not it
1067 * means they are likely already in use, so leave them alone. We
1068 * can leave it up to e2fsck to clean up any inconsistencies there.
1069 */
1070}
1071
1072/*
1073 * Open the external journal device
1074 */
b31e1552 1075static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
ac27a0ec
DK
1076{
1077 struct block_device *bdev;
ac27a0ec 1078
d4d77629 1079 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb);
ac27a0ec
DK
1080 if (IS_ERR(bdev))
1081 goto fail;
1082 return bdev;
1083
1084fail:
ea3edd4d
CH
1085 ext4_msg(sb, KERN_ERR,
1086 "failed to open journal device unknown-block(%u,%u) %ld",
1087 MAJOR(dev), MINOR(dev), PTR_ERR(bdev));
ac27a0ec
DK
1088 return NULL;
1089}
1090
1091/*
1092 * Release the journal device
1093 */
4385bab1 1094static void ext4_blkdev_put(struct block_device *bdev)
ac27a0ec 1095{
4385bab1 1096 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
ac27a0ec
DK
1097}
1098
4385bab1 1099static void ext4_blkdev_remove(struct ext4_sb_info *sbi)
ac27a0ec
DK
1100{
1101 struct block_device *bdev;
ee7ed3aa 1102 bdev = sbi->s_journal_bdev;
ac27a0ec 1103 if (bdev) {
4385bab1 1104 ext4_blkdev_put(bdev);
ee7ed3aa 1105 sbi->s_journal_bdev = NULL;
ac27a0ec 1106 }
ac27a0ec
DK
1107}
1108
1109static inline struct inode *orphan_list_entry(struct list_head *l)
1110{
617ba13b 1111 return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
ac27a0ec
DK
1112}
1113
617ba13b 1114static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
ac27a0ec
DK
1115{
1116 struct list_head *l;
1117
b31e1552
ES
1118 ext4_msg(sb, KERN_ERR, "sb orphan head is %d",
1119 le32_to_cpu(sbi->s_es->s_last_orphan));
ac27a0ec
DK
1120
1121 printk(KERN_ERR "sb_info orphan list:\n");
1122 list_for_each(l, &sbi->s_orphan) {
1123 struct inode *inode = orphan_list_entry(l);
1124 printk(KERN_ERR " "
1125 "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
1126 inode->i_sb->s_id, inode->i_ino, inode,
1127 inode->i_mode, inode->i_nlink,
1128 NEXT_ORPHAN(inode));
1129 }
1130}
1131
957153fc
JK
1132#ifdef CONFIG_QUOTA
1133static int ext4_quota_off(struct super_block *sb, int type);
1134
1135static inline void ext4_quota_off_umount(struct super_block *sb)
1136{
1137 int type;
1138
964edf66
JK
1139 /* Use our quota_off function to clear inode flags etc. */
1140 for (type = 0; type < EXT4_MAXQUOTAS; type++)
1141 ext4_quota_off(sb, type);
957153fc 1142}
33458eab
TT
1143
1144/*
1145 * This is a helper function which is used in the mount/remount
1146 * codepaths (which holds s_umount) to fetch the quota file name.
1147 */
1148static inline char *get_qf_name(struct super_block *sb,
1149 struct ext4_sb_info *sbi,
1150 int type)
1151{
1152 return rcu_dereference_protected(sbi->s_qf_names[type],
1153 lockdep_is_held(&sb->s_umount));
1154}
957153fc
JK
1155#else
1156static inline void ext4_quota_off_umount(struct super_block *sb)
1157{
1158}
1159#endif
1160
2b2d6d01 1161static void ext4_put_super(struct super_block *sb)
ac27a0ec 1162{
617ba13b
MC
1163 struct ext4_sb_info *sbi = EXT4_SB(sb);
1164 struct ext4_super_block *es = sbi->s_es;
1d0c3924 1165 struct buffer_head **group_desc;
7c990728 1166 struct flex_groups **flex_groups;
97abd7d4 1167 int aborted = 0;
ef2cabf7 1168 int i, err;
ac27a0ec 1169
857ac889 1170 ext4_unregister_li_request(sb);
957153fc 1171 ext4_quota_off_umount(sb);
e0ccfd95 1172
c92dc856 1173 flush_work(&sbi->s_error_work);
2e8fa54e 1174 destroy_workqueue(sbi->rsv_conversion_wq);
02f310fc 1175 ext4_release_orphan_info(sb);
4c0425ff 1176
5e47868f
RH
1177 /*
1178 * Unregister sysfs before destroying jbd2 journal.
1179 * Since we could still access attr_journal_task attribute via sysfs
1180 * path which could have sbi->s_journal->j_task as NULL
1181 */
1182 ext4_unregister_sysfs(sb);
1183
0390131b 1184 if (sbi->s_journal) {
97abd7d4 1185 aborted = is_journal_aborted(sbi->s_journal);
0390131b
FM
1186 err = jbd2_journal_destroy(sbi->s_journal);
1187 sbi->s_journal = NULL;
878520ac 1188 if ((err < 0) && !aborted) {
54d3adbc 1189 ext4_abort(sb, -err, "Couldn't clean up the journal");
878520ac 1190 }
0390131b 1191 }
d4edac31 1192
d3922a77 1193 ext4_es_unregister_shrinker(sbi);
9105bb14 1194 del_timer_sync(&sbi->s_err_report);
d4edac31
JB
1195 ext4_release_system_zone(sb);
1196 ext4_mb_release(sb);
1197 ext4_ext_release(sb);
d4edac31 1198
bc98a42c 1199 if (!sb_rdonly(sb) && !aborted) {
e2b911c5 1200 ext4_clear_feature_journal_needs_recovery(sb);
02f310fc 1201 ext4_clear_feature_orphan_present(sb);
ac27a0ec 1202 es->s_state = cpu_to_le16(sbi->s_mount_state);
ac27a0ec 1203 }
bc98a42c 1204 if (!sb_rdonly(sb))
4392fbc4 1205 ext4_commit_super(sb);
a8e25a83 1206
1d0c3924
TT
1207 rcu_read_lock();
1208 group_desc = rcu_dereference(sbi->s_group_desc);
ac27a0ec 1209 for (i = 0; i < sbi->s_gdb_count; i++)
1d0c3924
TT
1210 brelse(group_desc[i]);
1211 kvfree(group_desc);
7c990728
SJS
1212 flex_groups = rcu_dereference(sbi->s_flex_groups);
1213 if (flex_groups) {
1214 for (i = 0; i < sbi->s_flex_groups_allocated; i++)
1215 kvfree(flex_groups[i]);
1216 kvfree(flex_groups);
1217 }
1d0c3924 1218 rcu_read_unlock();
57042651 1219 percpu_counter_destroy(&sbi->s_freeclusters_counter);
ac27a0ec
DK
1220 percpu_counter_destroy(&sbi->s_freeinodes_counter);
1221 percpu_counter_destroy(&sbi->s_dirs_counter);
57042651 1222 percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
efc61345 1223 percpu_counter_destroy(&sbi->s_sra_exceeded_retry_limit);
bbd55937 1224 percpu_free_rwsem(&sbi->s_writepages_rwsem);
ac27a0ec 1225#ifdef CONFIG_QUOTA
a2d4a646 1226 for (i = 0; i < EXT4_MAXQUOTAS; i++)
33458eab 1227 kfree(get_qf_name(sb, sbi, i));
ac27a0ec
DK
1228#endif
1229
1230 /* Debugging code just in case the in-memory inode orphan list
1231 * isn't empty. The on-disk one can be non-empty if we've
1232 * detected an error and taken the fs readonly, but the
1233 * in-memory list had better be clean by this point. */
1234 if (!list_empty(&sbi->s_orphan))
1235 dump_orphan_list(sb, sbi);
837c23fb 1236 ASSERT(list_empty(&sbi->s_orphan));
ac27a0ec 1237
89d96a6f 1238 sync_blockdev(sb->s_bdev);
f98393a6 1239 invalidate_bdev(sb->s_bdev);
ee7ed3aa 1240 if (sbi->s_journal_bdev && sbi->s_journal_bdev != sb->s_bdev) {
ac27a0ec
DK
1241 /*
1242 * Invalidate the journal device's buffers. We don't want them
1243 * floating about in memory - the physical journal device may
1244 * hotswapped, and it breaks the `ro-after' testing code.
1245 */
ee7ed3aa
CX
1246 sync_blockdev(sbi->s_journal_bdev);
1247 invalidate_bdev(sbi->s_journal_bdev);
617ba13b 1248 ext4_blkdev_remove(sbi);
ac27a0ec 1249 }
50c15df6
CX
1250
1251 ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
1252 sbi->s_ea_inode_cache = NULL;
1253
1254 ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
1255 sbi->s_ea_block_cache = NULL;
1256
618f0031
PS
1257 ext4_stop_mmpd(sbi);
1258
9060dd2c 1259 brelse(sbi->s_sbh);
ac27a0ec 1260 sb->s_fs_info = NULL;
3197ebdb
TT
1261 /*
1262 * Now that we are completely done shutting down the
1263 * superblock, we need to actually destroy the kobject.
1264 */
3197ebdb
TT
1265 kobject_put(&sbi->s_kobj);
1266 wait_for_completion(&sbi->s_kobj_unregister);
0441984a
DW
1267 if (sbi->s_chksum_driver)
1268 crypto_free_shash(sbi->s_chksum_driver);
705895b6 1269 kfree(sbi->s_blockgroup_lock);
5e405595 1270 fs_put_dax(sbi->s_daxdev);
ac4acb1f 1271 fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy);
c83ad55e 1272#ifdef CONFIG_UNICODE
f8f4acb6 1273 utf8_unload(sb->s_encoding);
c83ad55e 1274#endif
ac27a0ec 1275 kfree(sbi);
ac27a0ec
DK
1276}
1277
e18b890b 1278static struct kmem_cache *ext4_inode_cachep;
ac27a0ec
DK
1279
1280/*
1281 * Called inside transaction, so use GFP_NOFS
1282 */
617ba13b 1283static struct inode *ext4_alloc_inode(struct super_block *sb)
ac27a0ec 1284{
617ba13b 1285 struct ext4_inode_info *ei;
ac27a0ec 1286
e6b4f8da 1287 ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
ac27a0ec
DK
1288 if (!ei)
1289 return NULL;
0b8e58a1 1290
ee73f9a5 1291 inode_set_iversion(&ei->vfs_inode, 1);
202ee5df 1292 spin_lock_init(&ei->i_raw_lock);
c9de560d 1293 INIT_LIST_HEAD(&ei->i_prealloc_list);
27bc446e 1294 atomic_set(&ei->i_prealloc_active, 0);
c9de560d 1295 spin_lock_init(&ei->i_prealloc_lock);
9a26b661
ZL
1296 ext4_es_init_tree(&ei->i_es_tree);
1297 rwlock_init(&ei->i_es_lock);
edaa53ca 1298 INIT_LIST_HEAD(&ei->i_es_list);
eb68d0e2 1299 ei->i_es_all_nr = 0;
edaa53ca 1300 ei->i_es_shk_nr = 0;
dd475925 1301 ei->i_es_shrink_lblk = 0;
d2a17637 1302 ei->i_reserved_data_blocks = 0;
d2a17637 1303 spin_lock_init(&(ei->i_block_reservation_lock));
1dc0aa46 1304 ext4_init_pending_tree(&ei->i_pending_tree);
a9e7f447
DM
1305#ifdef CONFIG_QUOTA
1306 ei->i_reserved_quota = 0;
96c7e0d9 1307 memset(&ei->i_dquot, 0, sizeof(ei->i_dquot));
a9e7f447 1308#endif
8aefcd55 1309 ei->jinode = NULL;
2e8fa54e 1310 INIT_LIST_HEAD(&ei->i_rsv_conversion_list);
744692dc 1311 spin_lock_init(&ei->i_completed_io_lock);
b436b9be
JK
1312 ei->i_sync_tid = 0;
1313 ei->i_datasync_tid = 0;
e27f41e1 1314 atomic_set(&ei->i_unwritten, 0);
2e8fa54e 1315 INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
aa75f4d3
HS
1316 ext4_fc_init_inode(&ei->vfs_inode);
1317 mutex_init(&ei->i_fc_lock);
ac27a0ec
DK
1318 return &ei->vfs_inode;
1319}
1320
7ff9c073
TT
1321static int ext4_drop_inode(struct inode *inode)
1322{
1323 int drop = generic_drop_inode(inode);
1324
29b3692e
EB
1325 if (!drop)
1326 drop = fscrypt_drop_inode(inode);
1327
7ff9c073
TT
1328 trace_ext4_drop_inode(inode, drop);
1329 return drop;
1330}
1331
94053139 1332static void ext4_free_in_core_inode(struct inode *inode)
fa0d7e3d 1333{
2c58d548 1334 fscrypt_free_inode(inode);
aa75f4d3
HS
1335 if (!list_empty(&(EXT4_I(inode)->i_fc_list))) {
1336 pr_warn("%s: inode %ld still in fc list",
1337 __func__, inode->i_ino);
1338 }
fa0d7e3d
NP
1339 kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
1340}
1341
617ba13b 1342static void ext4_destroy_inode(struct inode *inode)
ac27a0ec 1343{
9f7dd93d 1344 if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
b31e1552
ES
1345 ext4_msg(inode->i_sb, KERN_ERR,
1346 "Inode %lu (%p): orphan list check failed!",
1347 inode->i_ino, EXT4_I(inode));
9f7dd93d
VA
1348 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
1349 EXT4_I(inode), sizeof(struct ext4_inode_info),
1350 true);
1351 dump_stack();
1352 }
6fed8395
JX
1353
1354 if (EXT4_I(inode)->i_reserved_data_blocks)
1355 ext4_msg(inode->i_sb, KERN_ERR,
1356 "Inode %lu (%p): i_reserved_data_blocks (%u) not cleared!",
1357 inode->i_ino, EXT4_I(inode),
1358 EXT4_I(inode)->i_reserved_data_blocks);
ac27a0ec
DK
1359}
1360
51cc5068 1361static void init_once(void *foo)
ac27a0ec 1362{
617ba13b 1363 struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
ac27a0ec 1364
a35afb83 1365 INIT_LIST_HEAD(&ei->i_orphan);
a35afb83 1366 init_rwsem(&ei->xattr_sem);
0e855ac8 1367 init_rwsem(&ei->i_data_sem);
a35afb83 1368 inode_init_once(&ei->vfs_inode);
aa75f4d3 1369 ext4_fc_init_inode(&ei->vfs_inode);
ac27a0ec
DK
1370}
1371
e67bc2b3 1372static int __init init_inodecache(void)
ac27a0ec 1373{
f8dd7c70
DW
1374 ext4_inode_cachep = kmem_cache_create_usercopy("ext4_inode_cache",
1375 sizeof(struct ext4_inode_info), 0,
1376 (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|
1377 SLAB_ACCOUNT),
1378 offsetof(struct ext4_inode_info, i_data),
1379 sizeof_field(struct ext4_inode_info, i_data),
1380 init_once);
617ba13b 1381 if (ext4_inode_cachep == NULL)
ac27a0ec
DK
1382 return -ENOMEM;
1383 return 0;
1384}
1385
1386static void destroy_inodecache(void)
1387{
8c0a8537
KS
1388 /*
1389 * Make sure all delayed rcu free inodes are flushed before we
1390 * destroy cache.
1391 */
1392 rcu_barrier();
617ba13b 1393 kmem_cache_destroy(ext4_inode_cachep);
ac27a0ec
DK
1394}
1395
0930fcc1 1396void ext4_clear_inode(struct inode *inode)
ac27a0ec 1397{
aa75f4d3 1398 ext4_fc_del(inode);
0930fcc1 1399 invalidate_inode_buffers(inode);
dbd5768f 1400 clear_inode(inode);
27bc446e 1401 ext4_discard_preallocations(inode, 0);
51865fda 1402 ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
f4c2d372 1403 dquot_drop(inode);
8aefcd55
TT
1404 if (EXT4_I(inode)->jinode) {
1405 jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
1406 EXT4_I(inode)->jinode);
1407 jbd2_free_inode(EXT4_I(inode)->jinode);
1408 EXT4_I(inode)->jinode = NULL;
1409 }
3d204e24 1410 fscrypt_put_encryption_info(inode);
c93d8f88 1411 fsverity_cleanup_inode(inode);
ac27a0ec
DK
1412}
1413
1b961ac0 1414static struct inode *ext4_nfs_get_inode(struct super_block *sb,
0b8e58a1 1415 u64 ino, u32 generation)
ac27a0ec 1416{
ac27a0ec 1417 struct inode *inode;
ac27a0ec 1418
8a363970 1419 /*
ac27a0ec
DK
1420 * Currently we don't know the generation for parent directory, so
1421 * a generation of 0 means "accept any"
1422 */
8a363970 1423 inode = ext4_iget(sb, ino, EXT4_IGET_HANDLE);
1d1fe1ee
DH
1424 if (IS_ERR(inode))
1425 return ERR_CAST(inode);
1426 if (generation && inode->i_generation != generation) {
ac27a0ec
DK
1427 iput(inode);
1428 return ERR_PTR(-ESTALE);
1429 }
1b961ac0
CH
1430
1431 return inode;
1432}
1433
1434static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid,
0b8e58a1 1435 int fh_len, int fh_type)
1b961ac0
CH
1436{
1437 return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
1438 ext4_nfs_get_inode);
1439}
1440
1441static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
0b8e58a1 1442 int fh_len, int fh_type)
1b961ac0
CH
1443{
1444 return generic_fh_to_parent(sb, fid, fh_len, fh_type,
1445 ext4_nfs_get_inode);
ac27a0ec
DK
1446}
1447
fde87268
TT
1448static int ext4_nfs_commit_metadata(struct inode *inode)
1449{
1450 struct writeback_control wbc = {
1451 .sync_mode = WB_SYNC_ALL
1452 };
1453
1454 trace_ext4_nfs_commit_metadata(inode);
1455 return ext4_write_inode(inode, &wbc);
1456}
1457
643fa961 1458#ifdef CONFIG_FS_ENCRYPTION
a7550b30
JK
1459static int ext4_get_context(struct inode *inode, void *ctx, size_t len)
1460{
1461 return ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION,
1462 EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx, len);
1463}
1464
a7550b30
JK
1465static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
1466 void *fs_data)
1467{
2f8f5e76 1468 handle_t *handle = fs_data;
c1a5d5f6 1469 int res, res2, credits, retries = 0;
2f8f5e76 1470
9ce0151a
EB
1471 /*
1472 * Encrypting the root directory is not allowed because e2fsck expects
1473 * lost+found to exist and be unencrypted, and encrypting the root
1474 * directory would imply encrypting the lost+found directory as well as
1475 * the filename "lost+found" itself.
1476 */
1477 if (inode->i_ino == EXT4_ROOT_INO)
1478 return -EPERM;
2f8f5e76 1479
7d3e06a8
RZ
1480 if (WARN_ON_ONCE(IS_DAX(inode) && i_size_read(inode)))
1481 return -EINVAL;
1482
b383a73f
IW
1483 if (ext4_test_inode_flag(inode, EXT4_INODE_DAX))
1484 return -EOPNOTSUPP;
1485
94840e3c
EB
1486 res = ext4_convert_inline_data(inode);
1487 if (res)
1488 return res;
1489
2f8f5e76
EB
1490 /*
1491 * If a journal handle was specified, then the encryption context is
1492 * being set on a new inode via inheritance and is part of a larger
1493 * transaction to create the inode. Otherwise the encryption context is
1494 * being set on an existing inode in its own transaction. Only in the
1495 * latter case should the "retry on ENOSPC" logic be used.
1496 */
a7550b30 1497
2f8f5e76
EB
1498 if (handle) {
1499 res = ext4_xattr_set_handle(handle, inode,
1500 EXT4_XATTR_INDEX_ENCRYPTION,
1501 EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
1502 ctx, len, 0);
a7550b30
JK
1503 if (!res) {
1504 ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
1505 ext4_clear_inode_state(inode,
1506 EXT4_STATE_MAY_INLINE_DATA);
a3caa24b 1507 /*
2ee6a576
EB
1508 * Update inode->i_flags - S_ENCRYPTED will be enabled,
1509 * S_DAX may be disabled
a3caa24b 1510 */
043546e4 1511 ext4_set_inode_flags(inode, false);
a7550b30
JK
1512 }
1513 return res;
1514 }
1515
b8cb5a54
TE
1516 res = dquot_initialize(inode);
1517 if (res)
1518 return res;
2f8f5e76 1519retry:
af65207c
TE
1520 res = ext4_xattr_set_credits(inode, len, false /* is_create */,
1521 &credits);
dec214d0
TE
1522 if (res)
1523 return res;
1524
c1a5d5f6 1525 handle = ext4_journal_start(inode, EXT4_HT_MISC, credits);
a7550b30
JK
1526 if (IS_ERR(handle))
1527 return PTR_ERR(handle);
1528
2f8f5e76
EB
1529 res = ext4_xattr_set_handle(handle, inode, EXT4_XATTR_INDEX_ENCRYPTION,
1530 EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
1531 ctx, len, 0);
a7550b30
JK
1532 if (!res) {
1533 ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
2ee6a576
EB
1534 /*
1535 * Update inode->i_flags - S_ENCRYPTED will be enabled,
1536 * S_DAX may be disabled
1537 */
043546e4 1538 ext4_set_inode_flags(inode, false);
a7550b30
JK
1539 res = ext4_mark_inode_dirty(handle, inode);
1540 if (res)
1541 EXT4_ERROR_INODE(inode, "Failed to mark inode dirty");
1542 }
1543 res2 = ext4_journal_stop(handle);
2f8f5e76
EB
1544
1545 if (res == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
1546 goto retry;
a7550b30
JK
1547 if (!res)
1548 res = res2;
1549 return res;
1550}
1551
ac4acb1f 1552static const union fscrypt_policy *ext4_get_dummy_policy(struct super_block *sb)
a7550b30 1553{
ac4acb1f 1554 return EXT4_SB(sb)->s_dummy_enc_policy.policy;
a7550b30
JK
1555}
1556
b925acb8
EB
1557static bool ext4_has_stable_inodes(struct super_block *sb)
1558{
1559 return ext4_has_feature_stable_inodes(sb);
1560}
1561
1562static void ext4_get_ino_and_lblk_bits(struct super_block *sb,
1563 int *ino_bits_ret, int *lblk_bits_ret)
1564{
1565 *ino_bits_ret = 8 * sizeof(EXT4_SB(sb)->s_es->s_inodes_count);
1566 *lblk_bits_ret = 8 * sizeof(ext4_lblk_t);
1567}
1568
6f69f0ed 1569static const struct fscrypt_operations ext4_cryptops = {
a5d431ef 1570 .key_prefix = "ext4:",
a7550b30 1571 .get_context = ext4_get_context,
a7550b30 1572 .set_context = ext4_set_context,
ac4acb1f 1573 .get_dummy_policy = ext4_get_dummy_policy,
a7550b30 1574 .empty_dir = ext4_empty_dir,
e12ee683 1575 .max_namelen = EXT4_NAME_LEN,
b925acb8
EB
1576 .has_stable_inodes = ext4_has_stable_inodes,
1577 .get_ino_and_lblk_bits = ext4_get_ino_and_lblk_bits,
a7550b30 1578};
a7550b30
JK
1579#endif
1580
ac27a0ec 1581#ifdef CONFIG_QUOTA
d6006186 1582static const char * const quotatypes[] = INITQFNAMES;
689c958c 1583#define QTYPE2NAME(t) (quotatypes[t])
ac27a0ec 1584
617ba13b
MC
1585static int ext4_write_dquot(struct dquot *dquot);
1586static int ext4_acquire_dquot(struct dquot *dquot);
1587static int ext4_release_dquot(struct dquot *dquot);
1588static int ext4_mark_dquot_dirty(struct dquot *dquot);
1589static int ext4_write_info(struct super_block *sb, int type);
6f28e087 1590static int ext4_quota_on(struct super_block *sb, int type, int format_id,
8c54ca9c 1591 const struct path *path);
617ba13b 1592static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
ac27a0ec 1593 size_t len, loff_t off);
617ba13b 1594static ssize_t ext4_quota_write(struct super_block *sb, int type,
ac27a0ec 1595 const char *data, size_t len, loff_t off);
7c319d32
AK
1596static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
1597 unsigned int flags);
ac27a0ec 1598
96c7e0d9
JK
1599static struct dquot **ext4_get_dquots(struct inode *inode)
1600{
1601 return EXT4_I(inode)->i_dquot;
1602}
1603
61e225dc 1604static const struct dquot_operations ext4_quota_operations = {
7a9ca53a
TE
1605 .get_reserved_space = ext4_get_reserved_space,
1606 .write_dquot = ext4_write_dquot,
1607 .acquire_dquot = ext4_acquire_dquot,
1608 .release_dquot = ext4_release_dquot,
1609 .mark_dirty = ext4_mark_dquot_dirty,
1610 .write_info = ext4_write_info,
1611 .alloc_dquot = dquot_alloc,
1612 .destroy_dquot = dquot_destroy,
1613 .get_projid = ext4_get_projid,
1614 .get_inode_usage = ext4_get_inode_usage,
ebc11f7b 1615 .get_next_id = dquot_get_next_id,
ac27a0ec
DK
1616};
1617
0d54b217 1618static const struct quotactl_ops ext4_qctl_operations = {
617ba13b 1619 .quota_on = ext4_quota_on,
ca0e05e4 1620 .quota_off = ext4_quota_off,
287a8095 1621 .quota_sync = dquot_quota_sync,
0a240339 1622 .get_state = dquot_get_state,
287a8095
CH
1623 .set_info = dquot_set_dqinfo,
1624 .get_dqblk = dquot_get_dqblk,
6332b9b5
ES
1625 .set_dqblk = dquot_set_dqblk,
1626 .get_nextdqblk = dquot_get_next_dqblk,
ac27a0ec
DK
1627};
1628#endif
1629
ee9b6d61 1630static const struct super_operations ext4_sops = {
617ba13b 1631 .alloc_inode = ext4_alloc_inode,
94053139 1632 .free_inode = ext4_free_in_core_inode,
617ba13b 1633 .destroy_inode = ext4_destroy_inode,
617ba13b
MC
1634 .write_inode = ext4_write_inode,
1635 .dirty_inode = ext4_dirty_inode,
7ff9c073 1636 .drop_inode = ext4_drop_inode,
0930fcc1 1637 .evict_inode = ext4_evict_inode,
617ba13b 1638 .put_super = ext4_put_super,
617ba13b 1639 .sync_fs = ext4_sync_fs,
c4be0c1d
TS
1640 .freeze_fs = ext4_freeze,
1641 .unfreeze_fs = ext4_unfreeze,
617ba13b
MC
1642 .statfs = ext4_statfs,
1643 .remount_fs = ext4_remount,
617ba13b 1644 .show_options = ext4_show_options,
ac27a0ec 1645#ifdef CONFIG_QUOTA
617ba13b
MC
1646 .quota_read = ext4_quota_read,
1647 .quota_write = ext4_quota_write,
96c7e0d9 1648 .get_dquots = ext4_get_dquots,
ac27a0ec
DK
1649#endif
1650};
1651
39655164 1652static const struct export_operations ext4_export_ops = {
1b961ac0
CH
1653 .fh_to_dentry = ext4_fh_to_dentry,
1654 .fh_to_parent = ext4_fh_to_parent,
617ba13b 1655 .get_parent = ext4_get_parent,
fde87268 1656 .commit_metadata = ext4_nfs_commit_metadata,
ac27a0ec
DK
1657};
1658
1659enum {
1660 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
1661 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
72578c33 1662 Opt_nouid32, Opt_debug, Opt_removed,
ac27a0ec 1663 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
72578c33 1664 Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload,
ad4eec61
ES
1665 Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev,
1666 Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit,
ac27a0ec 1667 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
6ddb2447 1668 Opt_data_err_abort, Opt_data_err_ignore, Opt_test_dummy_encryption,
4f74d15f 1669 Opt_inlinecrypt,
ac27a0ec 1670 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
5a20bdfc 1671 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
ee4a3fcd 1672 Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
9cb20f94
IW
1673 Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version,
1674 Opt_dax, Opt_dax_always, Opt_dax_inode, Opt_dax_never,
327eaf73
TT
1675 Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_warn_on_error,
1676 Opt_nowarn_on_error, Opt_mblk_io_submit,
670e9875 1677 Opt_lazytime, Opt_nolazytime, Opt_debug_want_extra_isize,
1449032b 1678 Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
5328e635 1679 Opt_inode_readahead_blks, Opt_journal_ioprio,
744692dc 1680 Opt_dioread_nolock, Opt_dioread_lock,
fc6cb1cd 1681 Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
cdb7ee4c 1682 Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
21175ca4 1683 Opt_no_prefetch_block_bitmaps, Opt_mb_optimize_scan,
8016e29f 1684#ifdef CONFIG_EXT4_DEBUG
99c880de 1685 Opt_fc_debug_max_replay, Opt_fc_debug_force
8016e29f 1686#endif
ac27a0ec
DK
1687};
1688
a447c093 1689static const match_table_t tokens = {
ac27a0ec
DK
1690 {Opt_bsd_df, "bsddf"},
1691 {Opt_minix_df, "minixdf"},
1692 {Opt_grpid, "grpid"},
1693 {Opt_grpid, "bsdgroups"},
1694 {Opt_nogrpid, "nogrpid"},
1695 {Opt_nogrpid, "sysvgroups"},
1696 {Opt_resgid, "resgid=%u"},
1697 {Opt_resuid, "resuid=%u"},
1698 {Opt_sb, "sb=%u"},
1699 {Opt_err_cont, "errors=continue"},
1700 {Opt_err_panic, "errors=panic"},
1701 {Opt_err_ro, "errors=remount-ro"},
1702 {Opt_nouid32, "nouid32"},
ac27a0ec 1703 {Opt_debug, "debug"},
72578c33
TT
1704 {Opt_removed, "oldalloc"},
1705 {Opt_removed, "orlov"},
ac27a0ec
DK
1706 {Opt_user_xattr, "user_xattr"},
1707 {Opt_nouser_xattr, "nouser_xattr"},
1708 {Opt_acl, "acl"},
1709 {Opt_noacl, "noacl"},
e3bb52ae 1710 {Opt_noload, "norecovery"},
5a916be1 1711 {Opt_noload, "noload"},
72578c33
TT
1712 {Opt_removed, "nobh"},
1713 {Opt_removed, "bh"},
ac27a0ec 1714 {Opt_commit, "commit=%u"},
30773840
TT
1715 {Opt_min_batch_time, "min_batch_time=%u"},
1716 {Opt_max_batch_time, "max_batch_time=%u"},
ac27a0ec 1717 {Opt_journal_dev, "journal_dev=%u"},
ad4eec61 1718 {Opt_journal_path, "journal_path=%s"},
818d276c 1719 {Opt_journal_checksum, "journal_checksum"},
c6d3d56d 1720 {Opt_nojournal_checksum, "nojournal_checksum"},
818d276c 1721 {Opt_journal_async_commit, "journal_async_commit"},
ac27a0ec
DK
1722 {Opt_abort, "abort"},
1723 {Opt_data_journal, "data=journal"},
1724 {Opt_data_ordered, "data=ordered"},
1725 {Opt_data_writeback, "data=writeback"},
5bf5683a
HK
1726 {Opt_data_err_abort, "data_err=abort"},
1727 {Opt_data_err_ignore, "data_err=ignore"},
ac27a0ec
DK
1728 {Opt_offusrjquota, "usrjquota="},
1729 {Opt_usrjquota, "usrjquota=%s"},
1730 {Opt_offgrpjquota, "grpjquota="},
1731 {Opt_grpjquota, "grpjquota=%s"},
1732 {Opt_jqfmt_vfsold, "jqfmt=vfsold"},
1733 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
5a20bdfc 1734 {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"},
ac27a0ec
DK
1735 {Opt_grpquota, "grpquota"},
1736 {Opt_noquota, "noquota"},
1737 {Opt_quota, "quota"},
1738 {Opt_usrquota, "usrquota"},
49da9392 1739 {Opt_prjquota, "prjquota"},
ac27a0ec 1740 {Opt_barrier, "barrier=%u"},
06705bff
TT
1741 {Opt_barrier, "barrier"},
1742 {Opt_nobarrier, "nobarrier"},
25ec56b5 1743 {Opt_i_version, "i_version"},
923ae0ff 1744 {Opt_dax, "dax"},
9cb20f94
IW
1745 {Opt_dax_always, "dax=always"},
1746 {Opt_dax_inode, "dax=inode"},
1747 {Opt_dax_never, "dax=never"},
c9de560d 1748 {Opt_stripe, "stripe=%u"},
64769240 1749 {Opt_delalloc, "delalloc"},
327eaf73
TT
1750 {Opt_warn_on_error, "warn_on_error"},
1751 {Opt_nowarn_on_error, "nowarn_on_error"},
a26f4992
TT
1752 {Opt_lazytime, "lazytime"},
1753 {Opt_nolazytime, "nolazytime"},
670e9875 1754 {Opt_debug_want_extra_isize, "debug_want_extra_isize=%u"},
dd919b98 1755 {Opt_nodelalloc, "nodelalloc"},
36ade451
JK
1756 {Opt_removed, "mblk_io_submit"},
1757 {Opt_removed, "nomblk_io_submit"},
6fd058f7
TT
1758 {Opt_block_validity, "block_validity"},
1759 {Opt_noblock_validity, "noblock_validity"},
240799cd 1760 {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
b3881f74 1761 {Opt_journal_ioprio, "journal_ioprio=%u"},
afd4672d 1762 {Opt_auto_da_alloc, "auto_da_alloc=%u"},
06705bff
TT
1763 {Opt_auto_da_alloc, "auto_da_alloc"},
1764 {Opt_noauto_da_alloc, "noauto_da_alloc"},
744692dc 1765 {Opt_dioread_nolock, "dioread_nolock"},
244adf64 1766 {Opt_dioread_lock, "nodioread_nolock"},
744692dc 1767 {Opt_dioread_lock, "dioread_lock"},
5328e635
ES
1768 {Opt_discard, "discard"},
1769 {Opt_nodiscard, "nodiscard"},
fc6cb1cd
TT
1770 {Opt_init_itable, "init_itable=%u"},
1771 {Opt_init_itable, "init_itable"},
1772 {Opt_noinit_itable, "noinit_itable"},
8016e29f 1773#ifdef CONFIG_EXT4_DEBUG
99c880de 1774 {Opt_fc_debug_force, "fc_debug_force"},
8016e29f
HS
1775 {Opt_fc_debug_max_replay, "fc_debug_max_replay=%u"},
1776#endif
df981d03 1777 {Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
ed318a6c 1778 {Opt_test_dummy_encryption, "test_dummy_encryption=%s"},
6ddb2447 1779 {Opt_test_dummy_encryption, "test_dummy_encryption"},
4f74d15f 1780 {Opt_inlinecrypt, "inlinecrypt"},
cdb7ee4c
TE
1781 {Opt_nombcache, "nombcache"},
1782 {Opt_nombcache, "no_mbcache"}, /* for backward compatibility */
21175ca4
HS
1783 {Opt_removed, "prefetch_block_bitmaps"},
1784 {Opt_no_prefetch_block_bitmaps, "no_prefetch_block_bitmaps"},
196e402a 1785 {Opt_mb_optimize_scan, "mb_optimize_scan=%d"},
c7198b9c
TT
1786 {Opt_removed, "check=none"}, /* mount option from ext2/3 */
1787 {Opt_removed, "nocheck"}, /* mount option from ext2/3 */
1788 {Opt_removed, "reservation"}, /* mount option from ext2/3 */
1789 {Opt_removed, "noreservation"}, /* mount option from ext2/3 */
1790 {Opt_removed, "journal=%u"}, /* mount option from ext2/3 */
f3f12faa 1791 {Opt_err, NULL},
ac27a0ec
DK
1792};
1793
617ba13b 1794static ext4_fsblk_t get_sb_block(void **data)
ac27a0ec 1795{
617ba13b 1796 ext4_fsblk_t sb_block;
ac27a0ec
DK
1797 char *options = (char *) *data;
1798
1799 if (!options || strncmp(options, "sb=", 3) != 0)
1800 return 1; /* Default location */
0b8e58a1 1801
ac27a0ec 1802 options += 3;
0b8e58a1 1803 /* TODO: use simple_strtoll with >32bit ext4 */
ac27a0ec
DK
1804 sb_block = simple_strtoul(options, &options, 0);
1805 if (*options && *options != ',') {
4776004f 1806 printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n",
ac27a0ec
DK
1807 (char *) *data);
1808 return 1;
1809 }
1810 if (*options == ',')
1811 options++;
1812 *data = (void *) options;
0b8e58a1 1813
ac27a0ec
DK
1814 return sb_block;
1815}
1816
b3881f74 1817#define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
196e402a
HS
1818#define DEFAULT_MB_OPTIMIZE_SCAN (-1)
1819
d6006186
EB
1820static const char deprecated_msg[] =
1821 "Mount option \"%s\" will be removed by %s\n"
437ca0fd 1822 "Contact linux-ext4@vger.kernel.org if you think we should keep it.\n";
b3881f74 1823
56c50f11
DM
1824#ifdef CONFIG_QUOTA
1825static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
1826{
1827 struct ext4_sb_info *sbi = EXT4_SB(sb);
33458eab 1828 char *qname, *old_qname = get_qf_name(sb, sbi, qtype);
03dafb5f 1829 int ret = -1;
56c50f11 1830
33458eab 1831 if (sb_any_quota_loaded(sb) && !old_qname) {
56c50f11
DM
1832 ext4_msg(sb, KERN_ERR,
1833 "Cannot change journaled "
1834 "quota options when quota turned on");
57f73c2c 1835 return -1;
56c50f11 1836 }
e2b911c5 1837 if (ext4_has_feature_quota(sb)) {
c325a67c
TT
1838 ext4_msg(sb, KERN_INFO, "Journaled quota options "
1839 "ignored when QUOTA feature is enabled");
1840 return 1;
262b4662 1841 }
56c50f11
DM
1842 qname = match_strdup(args);
1843 if (!qname) {
1844 ext4_msg(sb, KERN_ERR,
1845 "Not enough memory for storing quotafile name");
57f73c2c 1846 return -1;
56c50f11 1847 }
33458eab
TT
1848 if (old_qname) {
1849 if (strcmp(old_qname, qname) == 0)
03dafb5f
CG
1850 ret = 1;
1851 else
1852 ext4_msg(sb, KERN_ERR,
1853 "%s quota file already specified",
1854 QTYPE2NAME(qtype));
1855 goto errout;
56c50f11 1856 }
03dafb5f 1857 if (strchr(qname, '/')) {
56c50f11
DM
1858 ext4_msg(sb, KERN_ERR,
1859 "quotafile must be on filesystem root");
03dafb5f 1860 goto errout;
56c50f11 1861 }
33458eab 1862 rcu_assign_pointer(sbi->s_qf_names[qtype], qname);
fd8c37ec 1863 set_opt(sb, QUOTA);
56c50f11 1864 return 1;
03dafb5f
CG
1865errout:
1866 kfree(qname);
1867 return ret;
56c50f11
DM
1868}
1869
1870static int clear_qf_name(struct super_block *sb, int qtype)
1871{
1872
1873 struct ext4_sb_info *sbi = EXT4_SB(sb);
33458eab 1874 char *old_qname = get_qf_name(sb, sbi, qtype);
56c50f11 1875
33458eab 1876 if (sb_any_quota_loaded(sb) && old_qname) {
56c50f11
DM
1877 ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options"
1878 " when quota turned on");
57f73c2c 1879 return -1;
56c50f11 1880 }
33458eab
TT
1881 rcu_assign_pointer(sbi->s_qf_names[qtype], NULL);
1882 synchronize_rcu();
1883 kfree(old_qname);
56c50f11
DM
1884 return 1;
1885}
1886#endif
1887
26092bf5
TT
1888#define MOPT_SET 0x0001
1889#define MOPT_CLEAR 0x0002
1890#define MOPT_NOSUPPORT 0x0004
1891#define MOPT_EXPLICIT 0x0008
1892#define MOPT_CLEAR_ERR 0x0010
1893#define MOPT_GTE0 0x0020
ac27a0ec 1894#ifdef CONFIG_QUOTA
26092bf5
TT
1895#define MOPT_Q 0
1896#define MOPT_QFMT 0x0040
1897#else
1898#define MOPT_Q MOPT_NOSUPPORT
1899#define MOPT_QFMT MOPT_NOSUPPORT
ac27a0ec 1900#endif
26092bf5 1901#define MOPT_DATAJ 0x0080
8dc0aa8c
TT
1902#define MOPT_NO_EXT2 0x0100
1903#define MOPT_NO_EXT3 0x0200
1904#define MOPT_EXT4_ONLY (MOPT_NO_EXT2 | MOPT_NO_EXT3)
ad4eec61 1905#define MOPT_STRING 0x0400
9cb20f94 1906#define MOPT_SKIP 0x0800
995a3ed6 1907#define MOPT_2 0x1000
26092bf5
TT
1908
1909static const struct mount_opts {
1910 int token;
1911 int mount_opt;
1912 int flags;
1913} ext4_mount_opts[] = {
1914 {Opt_minix_df, EXT4_MOUNT_MINIX_DF, MOPT_SET},
1915 {Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR},
1916 {Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET},
1917 {Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR},
26092bf5
TT
1918 {Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET},
1919 {Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR},
8dc0aa8c
TT
1920 {Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK,
1921 MOPT_EXT4_ONLY | MOPT_SET},
1922 {Opt_dioread_lock, EXT4_MOUNT_DIOREAD_NOLOCK,
1923 MOPT_EXT4_ONLY | MOPT_CLEAR},
26092bf5
TT
1924 {Opt_discard, EXT4_MOUNT_DISCARD, MOPT_SET},
1925 {Opt_nodiscard, EXT4_MOUNT_DISCARD, MOPT_CLEAR},
8dc0aa8c
TT
1926 {Opt_delalloc, EXT4_MOUNT_DELALLOC,
1927 MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1928 {Opt_nodelalloc, EXT4_MOUNT_DELALLOC,
59d9fa5c 1929 MOPT_EXT4_ONLY | MOPT_CLEAR},
327eaf73
TT
1930 {Opt_warn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_SET},
1931 {Opt_nowarn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_CLEAR},
c6d3d56d
DW
1932 {Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
1933 MOPT_EXT4_ONLY | MOPT_CLEAR},
8dc0aa8c 1934 {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
1e381f60 1935 MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
26092bf5 1936 {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT |
8dc0aa8c 1937 EXT4_MOUNT_JOURNAL_CHECKSUM),
1e381f60 1938 MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
8dc0aa8c 1939 {Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_NO_EXT2 | MOPT_SET},
26092bf5
TT
1940 {Opt_err_panic, EXT4_MOUNT_ERRORS_PANIC, MOPT_SET | MOPT_CLEAR_ERR},
1941 {Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR},
1942 {Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET | MOPT_CLEAR_ERR},
8dc0aa8c 1943 {Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT,
7915a861 1944 MOPT_NO_EXT2},
8dc0aa8c 1945 {Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT,
7915a861 1946 MOPT_NO_EXT2},
26092bf5
TT
1947 {Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET},
1948 {Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR},
1949 {Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET},
1950 {Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR},
1951 {Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR},
1952 {Opt_commit, 0, MOPT_GTE0},
1953 {Opt_max_batch_time, 0, MOPT_GTE0},
1954 {Opt_min_batch_time, 0, MOPT_GTE0},
1955 {Opt_inode_readahead_blks, 0, MOPT_GTE0},
1956 {Opt_init_itable, 0, MOPT_GTE0},
9cb20f94
IW
1957 {Opt_dax, EXT4_MOUNT_DAX_ALWAYS, MOPT_SET | MOPT_SKIP},
1958 {Opt_dax_always, EXT4_MOUNT_DAX_ALWAYS,
1959 MOPT_EXT4_ONLY | MOPT_SET | MOPT_SKIP},
1960 {Opt_dax_inode, EXT4_MOUNT2_DAX_INODE,
1961 MOPT_EXT4_ONLY | MOPT_SET | MOPT_SKIP},
1962 {Opt_dax_never, EXT4_MOUNT2_DAX_NEVER,
1963 MOPT_EXT4_ONLY | MOPT_SET | MOPT_SKIP},
26092bf5 1964 {Opt_stripe, 0, MOPT_GTE0},
0efb3b23
JK
1965 {Opt_resuid, 0, MOPT_GTE0},
1966 {Opt_resgid, 0, MOPT_GTE0},
5ba92bcf
CM
1967 {Opt_journal_dev, 0, MOPT_NO_EXT2 | MOPT_GTE0},
1968 {Opt_journal_path, 0, MOPT_NO_EXT2 | MOPT_STRING},
1969 {Opt_journal_ioprio, 0, MOPT_NO_EXT2 | MOPT_GTE0},
8dc0aa8c
TT
1970 {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
1971 {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
1972 {Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA,
1973 MOPT_NO_EXT2 | MOPT_DATAJ},
26092bf5
TT
1974 {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET},
1975 {Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR},
03010a33 1976#ifdef CONFIG_EXT4_FS_POSIX_ACL
26092bf5
TT
1977 {Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET},
1978 {Opt_noacl, EXT4_MOUNT_POSIX_ACL, MOPT_CLEAR},
ac27a0ec 1979#else
26092bf5
TT
1980 {Opt_acl, 0, MOPT_NOSUPPORT},
1981 {Opt_noacl, 0, MOPT_NOSUPPORT},
ac27a0ec 1982#endif
26092bf5
TT
1983 {Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET},
1984 {Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET},
670e9875 1985 {Opt_debug_want_extra_isize, 0, MOPT_GTE0},
26092bf5
TT
1986 {Opt_quota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, MOPT_SET | MOPT_Q},
1987 {Opt_usrquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA,
1988 MOPT_SET | MOPT_Q},
1989 {Opt_grpquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_GRPQUOTA,
1990 MOPT_SET | MOPT_Q},
49da9392
JK
1991 {Opt_prjquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_PRJQUOTA,
1992 MOPT_SET | MOPT_Q},
26092bf5 1993 {Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
49da9392
JK
1994 EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA),
1995 MOPT_CLEAR | MOPT_Q},
174fe5ba
KX
1996 {Opt_usrjquota, 0, MOPT_Q | MOPT_STRING},
1997 {Opt_grpjquota, 0, MOPT_Q | MOPT_STRING},
26092bf5
TT
1998 {Opt_offusrjquota, 0, MOPT_Q},
1999 {Opt_offgrpjquota, 0, MOPT_Q},
2000 {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT},
2001 {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT},
2002 {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
df981d03 2003 {Opt_max_dir_size_kb, 0, MOPT_GTE0},
ed318a6c 2004 {Opt_test_dummy_encryption, 0, MOPT_STRING},
cdb7ee4c 2005 {Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
21175ca4 2006 {Opt_no_prefetch_block_bitmaps, EXT4_MOUNT_NO_PREFETCH_BLOCK_BITMAPS,
3d392b26 2007 MOPT_SET},
196e402a 2008 {Opt_mb_optimize_scan, EXT4_MOUNT2_MB_OPTIMIZE_SCAN, MOPT_GTE0},
99c880de 2009#ifdef CONFIG_EXT4_DEBUG
0f0672ff
HS
2010 {Opt_fc_debug_force, EXT4_MOUNT2_JOURNAL_FAST_COMMIT,
2011 MOPT_SET | MOPT_2 | MOPT_EXT4_ONLY},
8016e29f
HS
2012 {Opt_fc_debug_max_replay, 0, MOPT_GTE0},
2013#endif
26092bf5
TT
2014 {Opt_err, 0, 0}
2015};
2016
c83ad55e
GKB
2017#ifdef CONFIG_UNICODE
2018static const struct ext4_sb_encodings {
2019 __u16 magic;
2020 char *name;
2021 char *version;
2022} ext4_sb_encoding_map[] = {
2023 {EXT4_ENC_UTF8_12_1, "utf8", "12.1.0"},
2024};
2025
2026static int ext4_sb_read_encoding(const struct ext4_super_block *es,
2027 const struct ext4_sb_encodings **encoding,
2028 __u16 *flags)
2029{
2030 __u16 magic = le16_to_cpu(es->s_encoding);
2031 int i;
2032
2033 for (i = 0; i < ARRAY_SIZE(ext4_sb_encoding_map); i++)
2034 if (magic == ext4_sb_encoding_map[i].magic)
2035 break;
2036
2037 if (i >= ARRAY_SIZE(ext4_sb_encoding_map))
2038 return -EINVAL;
2039
2040 *encoding = &ext4_sb_encoding_map[i];
2041 *flags = le16_to_cpu(es->s_encoding_flags);
2042
2043 return 0;
2044}
2045#endif
2046
ed318a6c
EB
2047static int ext4_set_test_dummy_encryption(struct super_block *sb,
2048 const char *opt,
2049 const substring_t *arg,
2050 bool is_remount)
2051{
2052#ifdef CONFIG_FS_ENCRYPTION
2053 struct ext4_sb_info *sbi = EXT4_SB(sb);
2054 int err;
2055
2056 /*
2057 * This mount option is just for testing, and it's not worthwhile to
2058 * implement the extra complexity (e.g. RCU protection) that would be
2059 * needed to allow it to be set or changed during remount. We do allow
2060 * it to be specified during remount, but only if there is no change.
2061 */
ac4acb1f 2062 if (is_remount && !sbi->s_dummy_enc_policy.policy) {
ed318a6c
EB
2063 ext4_msg(sb, KERN_WARNING,
2064 "Can't set test_dummy_encryption on remount");
2065 return -1;
2066 }
c8c868ab 2067 err = fscrypt_set_test_dummy_encryption(sb, arg->from,
ac4acb1f 2068 &sbi->s_dummy_enc_policy);
ed318a6c
EB
2069 if (err) {
2070 if (err == -EEXIST)
2071 ext4_msg(sb, KERN_WARNING,
2072 "Can't change test_dummy_encryption on remount");
2073 else if (err == -EINVAL)
2074 ext4_msg(sb, KERN_WARNING,
2075 "Value of option \"%s\" is unrecognized", opt);
2076 else
2077 ext4_msg(sb, KERN_WARNING,
2078 "Error processing option \"%s\" [%d]",
2079 opt, err);
2080 return -1;
2081 }
2082 ext4_msg(sb, KERN_WARNING, "Test dummy encryption mode enabled");
2083#else
2084 ext4_msg(sb, KERN_WARNING,
2085 "Test dummy encryption mount option ignored");
2086#endif
2087 return 1;
2088}
2089
b237e304
HS
2090struct ext4_parsed_options {
2091 unsigned long journal_devnum;
2092 unsigned int journal_ioprio;
196e402a 2093 int mb_optimize_scan;
b237e304
HS
2094};
2095
26092bf5 2096static int handle_mount_opt(struct super_block *sb, char *opt, int token,
b237e304
HS
2097 substring_t *args, struct ext4_parsed_options *parsed_opts,
2098 int is_remount)
26092bf5
TT
2099{
2100 struct ext4_sb_info *sbi = EXT4_SB(sb);
2101 const struct mount_opts *m;
08cefc7a
EB
2102 kuid_t uid;
2103 kgid_t gid;
26092bf5
TT
2104 int arg = 0;
2105
57f73c2c
TT
2106#ifdef CONFIG_QUOTA
2107 if (token == Opt_usrjquota)
2108 return set_qf_name(sb, USRQUOTA, &args[0]);
2109 else if (token == Opt_grpjquota)
2110 return set_qf_name(sb, GRPQUOTA, &args[0]);
2111 else if (token == Opt_offusrjquota)
2112 return clear_qf_name(sb, USRQUOTA);
2113 else if (token == Opt_offgrpjquota)
2114 return clear_qf_name(sb, GRPQUOTA);
2115#endif
26092bf5 2116 switch (token) {
f7048605
TT
2117 case Opt_noacl:
2118 case Opt_nouser_xattr:
2119 ext4_msg(sb, KERN_WARNING, deprecated_msg, opt, "3.5");
2120 break;
26092bf5
TT
2121 case Opt_sb:
2122 return 1; /* handled by get_sb_block() */
2123 case Opt_removed:
5f3633e3 2124 ext4_msg(sb, KERN_WARNING, "Ignoring removed %s option", opt);
26092bf5 2125 return 1;
26092bf5 2126 case Opt_abort:
9b5f6c9b 2127 ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
26092bf5
TT
2128 return 1;
2129 case Opt_i_version:
357fdad0 2130 sb->s_flags |= SB_I_VERSION;
26092bf5 2131 return 1;
a26f4992 2132 case Opt_lazytime:
1751e8a6 2133 sb->s_flags |= SB_LAZYTIME;
a26f4992
TT
2134 return 1;
2135 case Opt_nolazytime:
1751e8a6 2136 sb->s_flags &= ~SB_LAZYTIME;
a26f4992 2137 return 1;
4f74d15f
EB
2138 case Opt_inlinecrypt:
2139#ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT
2140 sb->s_flags |= SB_INLINECRYPT;
2141#else
2142 ext4_msg(sb, KERN_ERR, "inline encryption not supported");
2143#endif
2144 return 1;
26092bf5
TT
2145 }
2146
5f3633e3
JK
2147 for (m = ext4_mount_opts; m->token != Opt_err; m++)
2148 if (token == m->token)
2149 break;
2150
2151 if (m->token == Opt_err) {
2152 ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" "
2153 "or missing value", opt);
2154 return -1;
2155 }
2156
8dc0aa8c
TT
2157 if ((m->flags & MOPT_NO_EXT2) && IS_EXT2_SB(sb)) {
2158 ext4_msg(sb, KERN_ERR,
2159 "Mount option \"%s\" incompatible with ext2", opt);
2160 return -1;
2161 }
2162 if ((m->flags & MOPT_NO_EXT3) && IS_EXT3_SB(sb)) {
2163 ext4_msg(sb, KERN_ERR,
2164 "Mount option \"%s\" incompatible with ext3", opt);
2165 return -1;
2166 }
2167
ad4eec61 2168 if (args->from && !(m->flags & MOPT_STRING) && match_int(args, &arg))
5f3633e3
JK
2169 return -1;
2170 if (args->from && (m->flags & MOPT_GTE0) && (arg < 0))
2171 return -1;
c93cf2d7
DM
2172 if (m->flags & MOPT_EXPLICIT) {
2173 if (m->mount_opt & EXT4_MOUNT_DELALLOC) {
2174 set_opt2(sb, EXPLICIT_DELALLOC);
1e381f60
DM
2175 } else if (m->mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) {
2176 set_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM);
c93cf2d7
DM
2177 } else
2178 return -1;
2179 }
5f3633e3
JK
2180 if (m->flags & MOPT_CLEAR_ERR)
2181 clear_opt(sb, ERRORS_MASK);
2182 if (token == Opt_noquota && sb_any_quota_loaded(sb)) {
2183 ext4_msg(sb, KERN_ERR, "Cannot change quota "
2184 "options when quota turned on");
2185 return -1;
2186 }
2187
2188 if (m->flags & MOPT_NOSUPPORT) {
2189 ext4_msg(sb, KERN_ERR, "%s option not supported", opt);
2190 } else if (token == Opt_commit) {
2191 if (arg == 0)
2192 arg = JBD2_DEFAULT_MAX_COMMIT_AGE;
9ba55543 2193 else if (arg > INT_MAX / HZ) {
2194 ext4_msg(sb, KERN_ERR,
2195 "Invalid commit interval %d, "
2196 "must be smaller than %d",
2197 arg, INT_MAX / HZ);
2198 return -1;
2199 }
5f3633e3 2200 sbi->s_commit_interval = HZ * arg;
670e9875 2201 } else if (token == Opt_debug_want_extra_isize) {
9803387c
TT
2202 if ((arg & 1) ||
2203 (arg < 4) ||
2204 (arg > (sbi->s_inode_size - EXT4_GOOD_OLD_INODE_SIZE))) {
2205 ext4_msg(sb, KERN_ERR,
2206 "Invalid want_extra_isize %d", arg);
2207 return -1;
2208 }
670e9875 2209 sbi->s_want_extra_isize = arg;
5f3633e3 2210 } else if (token == Opt_max_batch_time) {
5f3633e3
JK
2211 sbi->s_max_batch_time = arg;
2212 } else if (token == Opt_min_batch_time) {
2213 sbi->s_min_batch_time = arg;
2214 } else if (token == Opt_inode_readahead_blks) {
e33e60ea
JK
2215 if (arg && (arg > (1 << 30) || !is_power_of_2(arg))) {
2216 ext4_msg(sb, KERN_ERR,
2217 "EXT4-fs: inode_readahead_blks must be "
2218 "0 or a power of 2 smaller than 2^31");
26092bf5 2219 return -1;
5f3633e3
JK
2220 }
2221 sbi->s_inode_readahead_blks = arg;
2222 } else if (token == Opt_init_itable) {
2223 set_opt(sb, INIT_INODE_TABLE);
2224 if (!args->from)
2225 arg = EXT4_DEF_LI_WAIT_MULT;
2226 sbi->s_li_wait_mult = arg;
2227 } else if (token == Opt_max_dir_size_kb) {
2228 sbi->s_max_dir_size_kb = arg;
8016e29f
HS
2229#ifdef CONFIG_EXT4_DEBUG
2230 } else if (token == Opt_fc_debug_max_replay) {
2231 sbi->s_fc_debug_max_replay = arg;
2232#endif
5f3633e3
JK
2233 } else if (token == Opt_stripe) {
2234 sbi->s_stripe = arg;
2235 } else if (token == Opt_resuid) {
2236 uid = make_kuid(current_user_ns(), arg);
2237 if (!uid_valid(uid)) {
2238 ext4_msg(sb, KERN_ERR, "Invalid uid value %d", arg);
26092bf5
TT
2239 return -1;
2240 }
5f3633e3
JK
2241 sbi->s_resuid = uid;
2242 } else if (token == Opt_resgid) {
2243 gid = make_kgid(current_user_ns(), arg);
2244 if (!gid_valid(gid)) {
2245 ext4_msg(sb, KERN_ERR, "Invalid gid value %d", arg);
2246 return -1;
2247 }
2248 sbi->s_resgid = gid;
2249 } else if (token == Opt_journal_dev) {
2250 if (is_remount) {
2251 ext4_msg(sb, KERN_ERR,
2252 "Cannot specify journal on remount");
2253 return -1;
2254 }
b237e304 2255 parsed_opts->journal_devnum = arg;
ad4eec61
ES
2256 } else if (token == Opt_journal_path) {
2257 char *journal_path;
2258 struct inode *journal_inode;
2259 struct path path;
2260 int error;
2261
2262 if (is_remount) {
2263 ext4_msg(sb, KERN_ERR,
2264 "Cannot specify journal on remount");
2265 return -1;
2266 }
2267 journal_path = match_strdup(&args[0]);
2268 if (!journal_path) {
2269 ext4_msg(sb, KERN_ERR, "error: could not dup "
2270 "journal device string");
2271 return -1;
2272 }
2273
2274 error = kern_path(journal_path, LOOKUP_FOLLOW, &path);
2275 if (error) {
2276 ext4_msg(sb, KERN_ERR, "error: could not find "
2277 "journal device path: error %d", error);
2278 kfree(journal_path);
2279 return -1;
2280 }
2281
2b0143b5 2282 journal_inode = d_inode(path.dentry);
ad4eec61
ES
2283 if (!S_ISBLK(journal_inode->i_mode)) {
2284 ext4_msg(sb, KERN_ERR, "error: journal path %s "
2285 "is not a block device", journal_path);
2286 path_put(&path);
2287 kfree(journal_path);
2288 return -1;
2289 }
2290
b237e304 2291 parsed_opts->journal_devnum = new_encode_dev(journal_inode->i_rdev);
ad4eec61
ES
2292 path_put(&path);
2293 kfree(journal_path);
5f3633e3
JK
2294 } else if (token == Opt_journal_ioprio) {
2295 if (arg > 7) {
2296 ext4_msg(sb, KERN_ERR, "Invalid journal IO priority"
2297 " (must be 0-7)");
2298 return -1;
2299 }
b237e304 2300 parsed_opts->journal_ioprio =
5f3633e3 2301 IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg);
6ddb2447 2302 } else if (token == Opt_test_dummy_encryption) {
ed318a6c
EB
2303 return ext4_set_test_dummy_encryption(sb, opt, &args[0],
2304 is_remount);
5f3633e3
JK
2305 } else if (m->flags & MOPT_DATAJ) {
2306 if (is_remount) {
2307 if (!sbi->s_journal)
2308 ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option");
2309 else if (test_opt(sb, DATA_FLAGS) != m->mount_opt) {
0efb3b23 2310 ext4_msg(sb, KERN_ERR,
26092bf5 2311 "Cannot change data mode on remount");
26092bf5 2312 return -1;
ac27a0ec 2313 }
26092bf5 2314 } else {
5f3633e3
JK
2315 clear_opt(sb, DATA_FLAGS);
2316 sbi->s_mount_opt |= m->mount_opt;
ac27a0ec 2317 }
5f3633e3
JK
2318#ifdef CONFIG_QUOTA
2319 } else if (m->flags & MOPT_QFMT) {
2320 if (sb_any_quota_loaded(sb) &&
2321 sbi->s_jquota_fmt != m->mount_opt) {
2322 ext4_msg(sb, KERN_ERR, "Cannot change journaled "
2323 "quota options when quota turned on");
2324 return -1;
2325 }
e2b911c5 2326 if (ext4_has_feature_quota(sb)) {
c325a67c
TT
2327 ext4_msg(sb, KERN_INFO,
2328 "Quota format mount options ignored "
262b4662 2329 "when QUOTA feature is enabled");
c325a67c 2330 return 1;
262b4662 2331 }
5f3633e3 2332 sbi->s_jquota_fmt = m->mount_opt;
923ae0ff 2333#endif
9cb20f94
IW
2334 } else if (token == Opt_dax || token == Opt_dax_always ||
2335 token == Opt_dax_inode || token == Opt_dax_never) {
ef83b6e8 2336#ifdef CONFIG_FS_DAX
9cb20f94
IW
2337 switch (token) {
2338 case Opt_dax:
2339 case Opt_dax_always:
829b37b8
TT
2340 if (is_remount &&
2341 (!(sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) ||
2342 (sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER))) {
2343 fail_dax_change_remount:
2344 ext4_msg(sb, KERN_ERR, "can't change "
2345 "dax mount option while remounting");
2346 return -1;
2347 }
2348 if (is_remount &&
2349 (test_opt(sb, DATA_FLAGS) ==
2350 EXT4_MOUNT_JOURNAL_DATA)) {
2351 ext4_msg(sb, KERN_ERR, "can't mount with "
2352 "both data=journal and dax");
2353 return -1;
2354 }
9cb20f94
IW
2355 ext4_msg(sb, KERN_WARNING,
2356 "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
2357 sbi->s_mount_opt |= EXT4_MOUNT_DAX_ALWAYS;
2358 sbi->s_mount_opt2 &= ~EXT4_MOUNT2_DAX_NEVER;
2359 break;
2360 case Opt_dax_never:
829b37b8
TT
2361 if (is_remount &&
2362 (!(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) ||
2363 (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS)))
2364 goto fail_dax_change_remount;
9cb20f94
IW
2365 sbi->s_mount_opt2 |= EXT4_MOUNT2_DAX_NEVER;
2366 sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
2367 break;
2368 case Opt_dax_inode:
829b37b8
TT
2369 if (is_remount &&
2370 ((sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) ||
2371 (sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) ||
2372 !(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_INODE)))
2373 goto fail_dax_change_remount;
9cb20f94
IW
2374 sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
2375 sbi->s_mount_opt2 &= ~EXT4_MOUNT2_DAX_NEVER;
2376 /* Strictly for printing options */
2377 sbi->s_mount_opt2 |= EXT4_MOUNT2_DAX_INODE;
2378 break;
2379 }
ef83b6e8 2380#else
923ae0ff 2381 ext4_msg(sb, KERN_INFO, "dax option not supported");
9cb20f94
IW
2382 sbi->s_mount_opt2 |= EXT4_MOUNT2_DAX_NEVER;
2383 sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
923ae0ff 2384 return -1;
5f3633e3 2385#endif
7915a861
AN
2386 } else if (token == Opt_data_err_abort) {
2387 sbi->s_mount_opt |= m->mount_opt;
2388 } else if (token == Opt_data_err_ignore) {
2389 sbi->s_mount_opt &= ~m->mount_opt;
196e402a
HS
2390 } else if (token == Opt_mb_optimize_scan) {
2391 if (arg != 0 && arg != 1) {
2392 ext4_msg(sb, KERN_WARNING,
2393 "mb_optimize_scan should be set to 0 or 1.");
2394 return -1;
2395 }
2396 parsed_opts->mb_optimize_scan = arg;
5f3633e3
JK
2397 } else {
2398 if (!args->from)
2399 arg = 1;
2400 if (m->flags & MOPT_CLEAR)
2401 arg = !arg;
2402 else if (unlikely(!(m->flags & MOPT_SET))) {
2403 ext4_msg(sb, KERN_WARNING,
2404 "buggy handling of option %s", opt);
2405 WARN_ON(1);
2406 return -1;
2407 }
995a3ed6
HS
2408 if (m->flags & MOPT_2) {
2409 if (arg != 0)
2410 sbi->s_mount_opt2 |= m->mount_opt;
2411 else
2412 sbi->s_mount_opt2 &= ~m->mount_opt;
2413 } else {
2414 if (arg != 0)
2415 sbi->s_mount_opt |= m->mount_opt;
2416 else
2417 sbi->s_mount_opt &= ~m->mount_opt;
2418 }
26092bf5 2419 }
5f3633e3 2420 return 1;
26092bf5
TT
2421}
2422
2423static int parse_options(char *options, struct super_block *sb,
b237e304 2424 struct ext4_parsed_options *ret_opts,
26092bf5
TT
2425 int is_remount)
2426{
1e1a76ed 2427 struct ext4_sb_info __maybe_unused *sbi = EXT4_SB(sb);
33458eab 2428 char *p, __maybe_unused *usr_qf_name, __maybe_unused *grp_qf_name;
26092bf5
TT
2429 substring_t args[MAX_OPT_ARGS];
2430 int token;
2431
2432 if (!options)
2433 return 1;
2434
2435 while ((p = strsep(&options, ",")) != NULL) {
2436 if (!*p)
2437 continue;
2438 /*
2439 * Initialize args struct so we know whether arg was
2440 * found; some options take optional arguments.
2441 */
caecd0af 2442 args[0].to = args[0].from = NULL;
26092bf5 2443 token = match_token(p, tokens, args);
b237e304
HS
2444 if (handle_mount_opt(sb, p, token, args, ret_opts,
2445 is_remount) < 0)
26092bf5 2446 return 0;
ac27a0ec
DK
2447 }
2448#ifdef CONFIG_QUOTA
49da9392
JK
2449 /*
2450 * We do the test below only for project quotas. 'usrquota' and
2451 * 'grpquota' mount options are allowed even without quota feature
2452 * to support legacy quotas in quota files.
2453 */
2454 if (test_opt(sb, PRJQUOTA) && !ext4_has_feature_project(sb)) {
2455 ext4_msg(sb, KERN_ERR, "Project quota feature not enabled. "
2456 "Cannot enable project quota enforcement.");
2457 return 0;
2458 }
33458eab
TT
2459 usr_qf_name = get_qf_name(sb, sbi, USRQUOTA);
2460 grp_qf_name = get_qf_name(sb, sbi, GRPQUOTA);
2461 if (usr_qf_name || grp_qf_name) {
2462 if (test_opt(sb, USRQUOTA) && usr_qf_name)
fd8c37ec 2463 clear_opt(sb, USRQUOTA);
ac27a0ec 2464
33458eab 2465 if (test_opt(sb, GRPQUOTA) && grp_qf_name)
fd8c37ec 2466 clear_opt(sb, GRPQUOTA);
ac27a0ec 2467
56c50f11 2468 if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) {
b31e1552
ES
2469 ext4_msg(sb, KERN_ERR, "old and new quota "
2470 "format mixing");
ac27a0ec
DK
2471 return 0;
2472 }
2473
2474 if (!sbi->s_jquota_fmt) {
b31e1552
ES
2475 ext4_msg(sb, KERN_ERR, "journaled quota format "
2476 "not specified");
ac27a0ec
DK
2477 return 0;
2478 }
ac27a0ec
DK
2479 }
2480#endif
626b035b
RH
2481 if (test_opt(sb, DIOREAD_NOLOCK)) {
2482 int blocksize =
2483 BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
2484 if (blocksize < PAGE_SIZE)
2485 ext4_msg(sb, KERN_WARNING, "Warning: mounting with an "
2486 "experimental mount option 'dioread_nolock' "
2487 "for blocksize < PAGE_SIZE");
2488 }
ac27a0ec
DK
2489 return 1;
2490}
2491
2adf6da8
TT
2492static inline void ext4_show_quota_options(struct seq_file *seq,
2493 struct super_block *sb)
2494{
2495#if defined(CONFIG_QUOTA)
2496 struct ext4_sb_info *sbi = EXT4_SB(sb);
33458eab 2497 char *usr_qf_name, *grp_qf_name;
2adf6da8
TT
2498
2499 if (sbi->s_jquota_fmt) {
2500 char *fmtname = "";
2501
2502 switch (sbi->s_jquota_fmt) {
2503 case QFMT_VFS_OLD:
2504 fmtname = "vfsold";
2505 break;
2506 case QFMT_VFS_V0:
2507 fmtname = "vfsv0";
2508 break;
2509 case QFMT_VFS_V1:
2510 fmtname = "vfsv1";
2511 break;
2512 }
2513 seq_printf(seq, ",jqfmt=%s", fmtname);
2514 }
2515
33458eab
TT
2516 rcu_read_lock();
2517 usr_qf_name = rcu_dereference(sbi->s_qf_names[USRQUOTA]);
2518 grp_qf_name = rcu_dereference(sbi->s_qf_names[GRPQUOTA]);
2519 if (usr_qf_name)
2520 seq_show_option(seq, "usrjquota", usr_qf_name);
2521 if (grp_qf_name)
2522 seq_show_option(seq, "grpjquota", grp_qf_name);
2523 rcu_read_unlock();
2adf6da8
TT
2524#endif
2525}
2526
5a916be1
TT
2527static const char *token2str(int token)
2528{
50df9fd5 2529 const struct match_token *t;
5a916be1
TT
2530
2531 for (t = tokens; t->token != Opt_err; t++)
2532 if (t->token == token && !strchr(t->pattern, '='))
2533 break;
2534 return t->pattern;
2535}
2536
2adf6da8
TT
2537/*
2538 * Show an option if
2539 * - it's set to a non-default value OR
2540 * - if the per-sb default is different from the global default
2541 */
66acdcf4
TT
2542static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
2543 int nodefs)
2adf6da8 2544{
2adf6da8
TT
2545 struct ext4_sb_info *sbi = EXT4_SB(sb);
2546 struct ext4_super_block *es = sbi->s_es;
68afa7e0 2547 int def_errors, def_mount_opt = sbi->s_def_mount_opt;
5a916be1 2548 const struct mount_opts *m;
66acdcf4 2549 char sep = nodefs ? '\n' : ',';
2adf6da8 2550
66acdcf4
TT
2551#define SEQ_OPTS_PUTS(str) seq_printf(seq, "%c" str, sep)
2552#define SEQ_OPTS_PRINT(str, arg) seq_printf(seq, "%c" str, sep, arg)
2adf6da8
TT
2553
2554 if (sbi->s_sb_block != 1)
5a916be1
TT
2555 SEQ_OPTS_PRINT("sb=%llu", sbi->s_sb_block);
2556
2557 for (m = ext4_mount_opts; m->token != Opt_err; m++) {
2558 int want_set = m->flags & MOPT_SET;
2559 if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) ||
9cb20f94 2560 (m->flags & MOPT_CLEAR_ERR) || m->flags & MOPT_SKIP)
5a916be1 2561 continue;
68afa7e0 2562 if (!nodefs && !(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt)))
5a916be1
TT
2563 continue; /* skip if same as the default */
2564 if ((want_set &&
2565 (sbi->s_mount_opt & m->mount_opt) != m->mount_opt) ||
2566 (!want_set && (sbi->s_mount_opt & m->mount_opt)))
2567 continue; /* select Opt_noFoo vs Opt_Foo */
2568 SEQ_OPTS_PRINT("%s", token2str(m->token));
2adf6da8 2569 }
5a916be1 2570
08cefc7a 2571 if (nodefs || !uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT4_DEF_RESUID)) ||
5a916be1 2572 le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID)
08cefc7a
EB
2573 SEQ_OPTS_PRINT("resuid=%u",
2574 from_kuid_munged(&init_user_ns, sbi->s_resuid));
2575 if (nodefs || !gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT4_DEF_RESGID)) ||
5a916be1 2576 le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID)
08cefc7a
EB
2577 SEQ_OPTS_PRINT("resgid=%u",
2578 from_kgid_munged(&init_user_ns, sbi->s_resgid));
66acdcf4 2579 def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors);
5a916be1
TT
2580 if (test_opt(sb, ERRORS_RO) && def_errors != EXT4_ERRORS_RO)
2581 SEQ_OPTS_PUTS("errors=remount-ro");
2adf6da8 2582 if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE)
5a916be1 2583 SEQ_OPTS_PUTS("errors=continue");
2adf6da8 2584 if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC)
5a916be1 2585 SEQ_OPTS_PUTS("errors=panic");
66acdcf4 2586 if (nodefs || sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ)
5a916be1 2587 SEQ_OPTS_PRINT("commit=%lu", sbi->s_commit_interval / HZ);
66acdcf4 2588 if (nodefs || sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME)
5a916be1 2589 SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time);
66acdcf4 2590 if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME)
5a916be1 2591 SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time);
357fdad0 2592 if (sb->s_flags & SB_I_VERSION)
5a916be1 2593 SEQ_OPTS_PUTS("i_version");
66acdcf4 2594 if (nodefs || sbi->s_stripe)
5a916be1 2595 SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe);
68afa7e0
TN
2596 if (nodefs || EXT4_MOUNT_DATA_FLAGS &
2597 (sbi->s_mount_opt ^ def_mount_opt)) {
5a916be1
TT
2598 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
2599 SEQ_OPTS_PUTS("data=journal");
2600 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
2601 SEQ_OPTS_PUTS("data=ordered");
2602 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
2603 SEQ_OPTS_PUTS("data=writeback");
2604 }
66acdcf4
TT
2605 if (nodefs ||
2606 sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
5a916be1
TT
2607 SEQ_OPTS_PRINT("inode_readahead_blks=%u",
2608 sbi->s_inode_readahead_blks);
2adf6da8 2609
ceec0376 2610 if (test_opt(sb, INIT_INODE_TABLE) && (nodefs ||
66acdcf4 2611 (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)))
5a916be1 2612 SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
df981d03
TT
2613 if (nodefs || sbi->s_max_dir_size_kb)
2614 SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb);
7915a861
AN
2615 if (test_opt(sb, DATA_ERR_ABORT))
2616 SEQ_OPTS_PUTS("data_err=abort");
ed318a6c
EB
2617
2618 fscrypt_show_test_dummy_encryption(seq, sep, sb);
2adf6da8 2619
4f74d15f
EB
2620 if (sb->s_flags & SB_INLINECRYPT)
2621 SEQ_OPTS_PUTS("inlinecrypt");
2622
9cb20f94
IW
2623 if (test_opt(sb, DAX_ALWAYS)) {
2624 if (IS_EXT2_SB(sb))
2625 SEQ_OPTS_PUTS("dax");
2626 else
2627 SEQ_OPTS_PUTS("dax=always");
2628 } else if (test_opt2(sb, DAX_NEVER)) {
2629 SEQ_OPTS_PUTS("dax=never");
2630 } else if (test_opt2(sb, DAX_INODE)) {
2631 SEQ_OPTS_PUTS("dax=inode");
2632 }
2adf6da8 2633 ext4_show_quota_options(seq, sb);
2adf6da8
TT
2634 return 0;
2635}
2636
66acdcf4
TT
2637static int ext4_show_options(struct seq_file *seq, struct dentry *root)
2638{
2639 return _ext4_show_options(seq, root->d_sb, 0);
2640}
2641
ebd173be 2642int ext4_seq_options_show(struct seq_file *seq, void *offset)
66acdcf4
TT
2643{
2644 struct super_block *sb = seq->private;
2645 int rc;
2646
bc98a42c 2647 seq_puts(seq, sb_rdonly(sb) ? "ro" : "rw");
66acdcf4
TT
2648 rc = _ext4_show_options(seq, sb, 1);
2649 seq_puts(seq, "\n");
2650 return rc;
2651}
2652
617ba13b 2653static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
ac27a0ec
DK
2654 int read_only)
2655{
617ba13b 2656 struct ext4_sb_info *sbi = EXT4_SB(sb);
c89128a0 2657 int err = 0;
ac27a0ec 2658
617ba13b 2659 if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
b31e1552
ES
2660 ext4_msg(sb, KERN_ERR, "revision level too high, "
2661 "forcing read-only mode");
c89128a0 2662 err = -EROFS;
5adaccac 2663 goto done;
ac27a0ec
DK
2664 }
2665 if (read_only)
281b5995 2666 goto done;
617ba13b 2667 if (!(sbi->s_mount_state & EXT4_VALID_FS))
b31e1552
ES
2668 ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "
2669 "running e2fsck is recommended");
c8b459f4 2670 else if (sbi->s_mount_state & EXT4_ERROR_FS)
b31e1552
ES
2671 ext4_msg(sb, KERN_WARNING,
2672 "warning: mounting fs with errors, "
2673 "running e2fsck is recommended");
ed3ce80a 2674 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 &&
ac27a0ec
DK
2675 le16_to_cpu(es->s_mnt_count) >=
2676 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
b31e1552
ES
2677 ext4_msg(sb, KERN_WARNING,
2678 "warning: maximal mount count reached, "
2679 "running e2fsck is recommended");
ac27a0ec 2680 else if (le32_to_cpu(es->s_checkinterval) &&
6a0678a7
AB
2681 (ext4_get_tstamp(es, s_lastcheck) +
2682 le32_to_cpu(es->s_checkinterval) <= ktime_get_real_seconds()))
b31e1552
ES
2683 ext4_msg(sb, KERN_WARNING,
2684 "warning: checktime reached, "
2685 "running e2fsck is recommended");
0b8e58a1 2686 if (!sbi->s_journal)
0390131b 2687 es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
ac27a0ec 2688 if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
617ba13b 2689 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
e8546d06 2690 le16_add_cpu(&es->s_mnt_count, 1);
6a0678a7 2691 ext4_update_tstamp(es, s_mtime);
02f310fc 2692 if (sbi->s_journal) {
e2b911c5 2693 ext4_set_feature_journal_needs_recovery(sb);
02f310fc
JK
2694 if (ext4_has_feature_orphan_file(sb))
2695 ext4_set_feature_orphan_present(sb);
2696 }
ac27a0ec 2697
4392fbc4 2698 err = ext4_commit_super(sb);
281b5995 2699done:
ac27a0ec 2700 if (test_opt(sb, DEBUG))
a9df9a49 2701 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
a2595b8a 2702 "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n",
ac27a0ec
DK
2703 sb->s_blocksize,
2704 sbi->s_groups_count,
617ba13b
MC
2705 EXT4_BLOCKS_PER_GROUP(sb),
2706 EXT4_INODES_PER_GROUP(sb),
a2595b8a 2707 sbi->s_mount_opt, sbi->s_mount_opt2);
ac27a0ec 2708
7abc52c2 2709 cleancache_init_fs(sb);
c89128a0 2710 return err;
ac27a0ec
DK
2711}
2712
117fff10
TT
2713int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
2714{
2715 struct ext4_sb_info *sbi = EXT4_SB(sb);
7c990728 2716 struct flex_groups **old_groups, **new_groups;
37b0b6b8 2717 int size, i, j;
117fff10
TT
2718
2719 if (!sbi->s_log_groups_per_flex)
2720 return 0;
2721
2722 size = ext4_flex_group(sbi, ngroup - 1) + 1;
2723 if (size <= sbi->s_flex_groups_allocated)
2724 return 0;
2725
7c990728
SJS
2726 new_groups = kvzalloc(roundup_pow_of_two(size *
2727 sizeof(*sbi->s_flex_groups)), GFP_KERNEL);
117fff10 2728 if (!new_groups) {
7c990728
SJS
2729 ext4_msg(sb, KERN_ERR,
2730 "not enough memory for %d flex group pointers", size);
117fff10
TT
2731 return -ENOMEM;
2732 }
7c990728
SJS
2733 for (i = sbi->s_flex_groups_allocated; i < size; i++) {
2734 new_groups[i] = kvzalloc(roundup_pow_of_two(
2735 sizeof(struct flex_groups)),
2736 GFP_KERNEL);
2737 if (!new_groups[i]) {
37b0b6b8
DC
2738 for (j = sbi->s_flex_groups_allocated; j < i; j++)
2739 kvfree(new_groups[j]);
7c990728
SJS
2740 kvfree(new_groups);
2741 ext4_msg(sb, KERN_ERR,
2742 "not enough memory for %d flex groups", size);
2743 return -ENOMEM;
2744 }
117fff10 2745 }
7c990728
SJS
2746 rcu_read_lock();
2747 old_groups = rcu_dereference(sbi->s_flex_groups);
2748 if (old_groups)
2749 memcpy(new_groups, old_groups,
2750 (sbi->s_flex_groups_allocated *
2751 sizeof(struct flex_groups *)));
2752 rcu_read_unlock();
2753 rcu_assign_pointer(sbi->s_flex_groups, new_groups);
2754 sbi->s_flex_groups_allocated = size;
2755 if (old_groups)
2756 ext4_kvfree_array_rcu(old_groups);
117fff10
TT
2757 return 0;
2758}
2759
772cb7c8
JS
2760static int ext4_fill_flex_info(struct super_block *sb)
2761{
2762 struct ext4_sb_info *sbi = EXT4_SB(sb);
2763 struct ext4_group_desc *gdp = NULL;
7c990728 2764 struct flex_groups *fg;
772cb7c8 2765 ext4_group_t flex_group;
117fff10 2766 int i, err;
772cb7c8 2767
503358ae 2768 sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
d50f2ab6 2769 if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) {
772cb7c8
JS
2770 sbi->s_log_groups_per_flex = 0;
2771 return 1;
2772 }
2773
117fff10
TT
2774 err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count);
2775 if (err)
9933fc0a 2776 goto failed;
772cb7c8 2777
772cb7c8 2778 for (i = 0; i < sbi->s_groups_count; i++) {
88b6edd1 2779 gdp = ext4_get_group_desc(sb, i, NULL);
772cb7c8
JS
2780
2781 flex_group = ext4_flex_group(sbi, i);
7c990728
SJS
2782 fg = sbi_array_rcu_deref(sbi, s_flex_groups, flex_group);
2783 atomic_add(ext4_free_inodes_count(sb, gdp), &fg->free_inodes);
90ba983f 2784 atomic64_add(ext4_free_group_clusters(sb, gdp),
7c990728
SJS
2785 &fg->free_clusters);
2786 atomic_add(ext4_used_dirs_count(sb, gdp), &fg->used_dirs);
772cb7c8
JS
2787 }
2788
2789 return 1;
2790failed:
2791 return 0;
2792}
2793
e2b911c5 2794static __le16 ext4_group_desc_csum(struct super_block *sb, __u32 block_group,
feb0ab32 2795 struct ext4_group_desc *gdp)
717d50e4 2796{
b47820ed 2797 int offset = offsetof(struct ext4_group_desc, bg_checksum);
717d50e4 2798 __u16 crc = 0;
feb0ab32 2799 __le32 le_group = cpu_to_le32(block_group);
e2b911c5 2800 struct ext4_sb_info *sbi = EXT4_SB(sb);
717d50e4 2801
9aa5d32b 2802 if (ext4_has_metadata_csum(sbi->s_sb)) {
feb0ab32 2803 /* Use new metadata_csum algorithm */
feb0ab32 2804 __u32 csum32;
b47820ed 2805 __u16 dummy_csum = 0;
feb0ab32 2806
feb0ab32
DW
2807 csum32 = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&le_group,
2808 sizeof(le_group));
b47820ed
DJ
2809 csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp, offset);
2810 csum32 = ext4_chksum(sbi, csum32, (__u8 *)&dummy_csum,
2811 sizeof(dummy_csum));
2812 offset += sizeof(dummy_csum);
2813 if (offset < sbi->s_desc_size)
2814 csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp + offset,
2815 sbi->s_desc_size - offset);
feb0ab32
DW
2816
2817 crc = csum32 & 0xFFFF;
2818 goto out;
717d50e4
AD
2819 }
2820
feb0ab32 2821 /* old crc16 code */
e2b911c5 2822 if (!ext4_has_feature_gdt_csum(sb))
813d32f9
DW
2823 return 0;
2824
feb0ab32
DW
2825 crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
2826 crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
2827 crc = crc16(crc, (__u8 *)gdp, offset);
2828 offset += sizeof(gdp->bg_checksum); /* skip checksum */
2829 /* for checksum of struct ext4_group_desc do the rest...*/
e2b911c5 2830 if (ext4_has_feature_64bit(sb) &&
feb0ab32
DW
2831 offset < le16_to_cpu(sbi->s_es->s_desc_size))
2832 crc = crc16(crc, (__u8 *)gdp + offset,
2833 le16_to_cpu(sbi->s_es->s_desc_size) -
2834 offset);
2835
2836out:
717d50e4
AD
2837 return cpu_to_le16(crc);
2838}
2839
feb0ab32 2840int ext4_group_desc_csum_verify(struct super_block *sb, __u32 block_group,
717d50e4
AD
2841 struct ext4_group_desc *gdp)
2842{
feb0ab32 2843 if (ext4_has_group_desc_csum(sb) &&
e2b911c5 2844 (gdp->bg_checksum != ext4_group_desc_csum(sb, block_group, gdp)))
717d50e4
AD
2845 return 0;
2846
2847 return 1;
2848}
2849
feb0ab32
DW
2850void ext4_group_desc_csum_set(struct super_block *sb, __u32 block_group,
2851 struct ext4_group_desc *gdp)
2852{
2853 if (!ext4_has_group_desc_csum(sb))
2854 return;
e2b911c5 2855 gdp->bg_checksum = ext4_group_desc_csum(sb, block_group, gdp);
feb0ab32
DW
2856}
2857
ac27a0ec 2858/* Called at mount-time, super-block is locked */
bfff6873 2859static int ext4_check_descriptors(struct super_block *sb,
829fa70d 2860 ext4_fsblk_t sb_block,
bfff6873 2861 ext4_group_t *first_not_zeroed)
ac27a0ec 2862{
617ba13b
MC
2863 struct ext4_sb_info *sbi = EXT4_SB(sb);
2864 ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
2865 ext4_fsblk_t last_block;
44de022c 2866 ext4_fsblk_t last_bg_block = sb_block + ext4_bg_num_gdb(sb, 0);
bd81d8ee
LV
2867 ext4_fsblk_t block_bitmap;
2868 ext4_fsblk_t inode_bitmap;
2869 ext4_fsblk_t inode_table;
ce421581 2870 int flexbg_flag = 0;
bfff6873 2871 ext4_group_t i, grp = sbi->s_groups_count;
ac27a0ec 2872
e2b911c5 2873 if (ext4_has_feature_flex_bg(sb))
ce421581
JS
2874 flexbg_flag = 1;
2875
af5bc92d 2876 ext4_debug("Checking group descriptors");
ac27a0ec 2877
197cd65a
AM
2878 for (i = 0; i < sbi->s_groups_count; i++) {
2879 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
2880
ce421581 2881 if (i == sbi->s_groups_count - 1 || flexbg_flag)
bd81d8ee 2882 last_block = ext4_blocks_count(sbi->s_es) - 1;
ac27a0ec
DK
2883 else
2884 last_block = first_block +
617ba13b 2885 (EXT4_BLOCKS_PER_GROUP(sb) - 1);
ac27a0ec 2886
bfff6873
LC
2887 if ((grp == sbi->s_groups_count) &&
2888 !(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
2889 grp = i;
2890
8fadc143 2891 block_bitmap = ext4_block_bitmap(sb, gdp);
829fa70d
TT
2892 if (block_bitmap == sb_block) {
2893 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2894 "Block bitmap for group %u overlaps "
2895 "superblock", i);
18db4b4e
TT
2896 if (!sb_rdonly(sb))
2897 return 0;
829fa70d 2898 }
77260807
TT
2899 if (block_bitmap >= sb_block + 1 &&
2900 block_bitmap <= last_bg_block) {
2901 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2902 "Block bitmap for group %u overlaps "
2903 "block group descriptors", i);
2904 if (!sb_rdonly(sb))
2905 return 0;
2906 }
2b2d6d01 2907 if (block_bitmap < first_block || block_bitmap > last_block) {
b31e1552 2908 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
a9df9a49 2909 "Block bitmap for group %u not in group "
b31e1552 2910 "(block %llu)!", i, block_bitmap);
ac27a0ec
DK
2911 return 0;
2912 }
8fadc143 2913 inode_bitmap = ext4_inode_bitmap(sb, gdp);
829fa70d
TT
2914 if (inode_bitmap == sb_block) {
2915 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2916 "Inode bitmap for group %u overlaps "
2917 "superblock", i);
18db4b4e
TT
2918 if (!sb_rdonly(sb))
2919 return 0;
829fa70d 2920 }
77260807
TT
2921 if (inode_bitmap >= sb_block + 1 &&
2922 inode_bitmap <= last_bg_block) {
2923 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2924 "Inode bitmap for group %u overlaps "
2925 "block group descriptors", i);
2926 if (!sb_rdonly(sb))
2927 return 0;
2928 }
2b2d6d01 2929 if (inode_bitmap < first_block || inode_bitmap > last_block) {
b31e1552 2930 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
a9df9a49 2931 "Inode bitmap for group %u not in group "
b31e1552 2932 "(block %llu)!", i, inode_bitmap);
ac27a0ec
DK
2933 return 0;
2934 }
8fadc143 2935 inode_table = ext4_inode_table(sb, gdp);
829fa70d
TT
2936 if (inode_table == sb_block) {
2937 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2938 "Inode table for group %u overlaps "
2939 "superblock", i);
18db4b4e
TT
2940 if (!sb_rdonly(sb))
2941 return 0;
829fa70d 2942 }
77260807
TT
2943 if (inode_table >= sb_block + 1 &&
2944 inode_table <= last_bg_block) {
2945 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2946 "Inode table for group %u overlaps "
2947 "block group descriptors", i);
2948 if (!sb_rdonly(sb))
2949 return 0;
2950 }
bd81d8ee 2951 if (inode_table < first_block ||
2b2d6d01 2952 inode_table + sbi->s_itb_per_group - 1 > last_block) {
b31e1552 2953 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
a9df9a49 2954 "Inode table for group %u not in group "
b31e1552 2955 "(block %llu)!", i, inode_table);
ac27a0ec
DK
2956 return 0;
2957 }
955ce5f5 2958 ext4_lock_group(sb, i);
feb0ab32 2959 if (!ext4_group_desc_csum_verify(sb, i, gdp)) {
b31e1552
ES
2960 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2961 "Checksum for group %u failed (%u!=%u)",
e2b911c5 2962 i, le16_to_cpu(ext4_group_desc_csum(sb, i,
b31e1552 2963 gdp)), le16_to_cpu(gdp->bg_checksum));
bc98a42c 2964 if (!sb_rdonly(sb)) {
955ce5f5 2965 ext4_unlock_group(sb, i);
8a266467 2966 return 0;
7ee1ec4c 2967 }
717d50e4 2968 }
955ce5f5 2969 ext4_unlock_group(sb, i);
ce421581
JS
2970 if (!flexbg_flag)
2971 first_block += EXT4_BLOCKS_PER_GROUP(sb);
ac27a0ec 2972 }
bfff6873
LC
2973 if (NULL != first_not_zeroed)
2974 *first_not_zeroed = grp;
ac27a0ec
DK
2975 return 1;
2976}
2977
cd2291a4
ES
2978/*
2979 * Maximal extent format file size.
2980 * Resulting logical blkno at s_maxbytes must fit in our on-disk
2981 * extent format containers, within a sector_t, and within i_blocks
2982 * in the vfs. ext4 inode has 48 bits of i_block in fsblock units,
2983 * so that won't be a limiting factor.
2984 *
f17722f9
LC
2985 * However there is other limiting factor. We do store extents in the form
2986 * of starting block and length, hence the resulting length of the extent
2987 * covering maximum file size must fit into on-disk format containers as
2988 * well. Given that length is always by 1 unit bigger than max unit (because
2989 * we count 0 as well) we have to lower the s_maxbytes by one fs block.
2990 *
cd2291a4
ES
2991 * Note, this does *not* consider any metadata overhead for vfs i_blocks.
2992 */
f287a1a5 2993static loff_t ext4_max_size(int blkbits, int has_huge_files)
cd2291a4
ES
2994{
2995 loff_t res;
2996 loff_t upper_limit = MAX_LFS_FILESIZE;
2997
72deb455
CH
2998 BUILD_BUG_ON(sizeof(blkcnt_t) < sizeof(u64));
2999
3000 if (!has_huge_files) {
cd2291a4
ES
3001 upper_limit = (1LL << 32) - 1;
3002
3003 /* total blocks in file system block size */
3004 upper_limit >>= (blkbits - 9);
3005 upper_limit <<= blkbits;
3006 }
3007
f17722f9
LC
3008 /*
3009 * 32-bit extent-start container, ee_block. We lower the maxbytes
3010 * by one fs block, so ee_len can cover the extent of maximum file
3011 * size
3012 */
3013 res = (1LL << 32) - 1;
cd2291a4 3014 res <<= blkbits;
cd2291a4
ES
3015
3016 /* Sanity check against vm- & vfs- imposed limits */
3017 if (res > upper_limit)
3018 res = upper_limit;
3019
3020 return res;
3021}
ac27a0ec 3022
ac27a0ec 3023/*
cd2291a4 3024 * Maximal bitmap file size. There is a direct, and {,double-,triple-}indirect
0fc1b451
AK
3025 * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks.
3026 * We need to be 1 filesystem block less than the 2^48 sector limit.
ac27a0ec 3027 */
f287a1a5 3028static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
ac27a0ec 3029{
75ca6ad4 3030 unsigned long long upper_limit, res = EXT4_NDIR_BLOCKS;
0fc1b451 3031 int meta_blocks;
75ca6ad4
RH
3032
3033 /*
3034 * This is calculated to be the largest file size for a dense, block
0b8e58a1
AD
3035 * mapped file such that the file's total number of 512-byte sectors,
3036 * including data and all indirect blocks, does not exceed (2^48 - 1).
3037 *
3038 * __u32 i_blocks_lo and _u16 i_blocks_high represent the total
3039 * number of 512-byte sectors of the file.
0fc1b451 3040 */
72deb455 3041 if (!has_huge_files) {
0fc1b451 3042 /*
72deb455
CH
3043 * !has_huge_files or implies that the inode i_block field
3044 * represents total file blocks in 2^32 512-byte sectors ==
3045 * size of vfs inode i_blocks * 8
0fc1b451
AK
3046 */
3047 upper_limit = (1LL << 32) - 1;
3048
3049 /* total blocks in file system block size */
3050 upper_limit >>= (bits - 9);
3051
3052 } else {
8180a562
AK
3053 /*
3054 * We use 48 bit ext4_inode i_blocks
3055 * With EXT4_HUGE_FILE_FL set the i_blocks
3056 * represent total number of blocks in
3057 * file system block size
3058 */
0fc1b451
AK
3059 upper_limit = (1LL << 48) - 1;
3060
0fc1b451
AK
3061 }
3062
3063 /* indirect blocks */
3064 meta_blocks = 1;
3065 /* double indirect blocks */
3066 meta_blocks += 1 + (1LL << (bits-2));
3067 /* tripple indirect blocks */
3068 meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
3069
3070 upper_limit -= meta_blocks;
3071 upper_limit <<= bits;
ac27a0ec
DK
3072
3073 res += 1LL << (bits-2);
3074 res += 1LL << (2*(bits-2));
3075 res += 1LL << (3*(bits-2));
3076 res <<= bits;
3077 if (res > upper_limit)
3078 res = upper_limit;
0fc1b451
AK
3079
3080 if (res > MAX_LFS_FILESIZE)
3081 res = MAX_LFS_FILESIZE;
3082
75ca6ad4 3083 return (loff_t)res;
ac27a0ec
DK
3084}
3085
617ba13b 3086static ext4_fsblk_t descriptor_loc(struct super_block *sb,
0b8e58a1 3087 ext4_fsblk_t logical_sb_block, int nr)
ac27a0ec 3088{
617ba13b 3089 struct ext4_sb_info *sbi = EXT4_SB(sb);
fd2d4291 3090 ext4_group_t bg, first_meta_bg;
ac27a0ec
DK
3091 int has_super = 0;
3092
3093 first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
3094
e2b911c5 3095 if (!ext4_has_feature_meta_bg(sb) || nr < first_meta_bg)
70bbb3e0 3096 return logical_sb_block + nr + 1;
ac27a0ec 3097 bg = sbi->s_desc_per_block * nr;
617ba13b 3098 if (ext4_bg_has_super(sb, bg))
ac27a0ec 3099 has_super = 1;
0b8e58a1 3100
bd63f6b0
DW
3101 /*
3102 * If we have a meta_bg fs with 1k blocks, group 0's GDT is at
3103 * block 2, not 1. If s_first_data_block == 0 (bigalloc is enabled
3104 * on modern mke2fs or blksize > 1k on older mke2fs) then we must
3105 * compensate.
3106 */
3107 if (sb->s_blocksize == 1024 && nr == 0 &&
49598e04 3108 le32_to_cpu(sbi->s_es->s_first_data_block) == 0)
bd63f6b0
DW
3109 has_super++;
3110
617ba13b 3111 return (has_super + ext4_group_first_block_no(sb, bg));
ac27a0ec
DK
3112}
3113
c9de560d
AT
3114/**
3115 * ext4_get_stripe_size: Get the stripe size.
3116 * @sbi: In memory super block info
3117 *
3118 * If we have specified it via mount option, then
3119 * use the mount option value. If the value specified at mount time is
3120 * greater than the blocks per group use the super block value.
3121 * If the super block value is greater than blocks per group return 0.
3122 * Allocator needs it be less than blocks per group.
3123 *
3124 */
3125static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
3126{
3127 unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride);
3128 unsigned long stripe_width =
3129 le32_to_cpu(sbi->s_es->s_raid_stripe_width);
3eb08658 3130 int ret;
c9de560d
AT
3131
3132 if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group)
3eb08658 3133 ret = sbi->s_stripe;
5469d7c3 3134 else if (stripe_width && stripe_width <= sbi->s_blocks_per_group)
3eb08658 3135 ret = stripe_width;
5469d7c3 3136 else if (stride && stride <= sbi->s_blocks_per_group)
3eb08658
DE
3137 ret = stride;
3138 else
3139 ret = 0;
c9de560d 3140
3eb08658
DE
3141 /*
3142 * If the stripe width is 1, this makes no sense and
3143 * we set it to 0 to turn off stripe handling code.
3144 */
3145 if (ret <= 1)
3146 ret = 0;
c9de560d 3147
3eb08658 3148 return ret;
c9de560d 3149}
ac27a0ec 3150
a13fb1a4
ES
3151/*
3152 * Check whether this filesystem can be mounted based on
3153 * the features present and the RDONLY/RDWR mount requested.
3154 * Returns 1 if this filesystem can be mounted as requested,
3155 * 0 if it cannot be.
3156 */
25c6d98f 3157int ext4_feature_set_ok(struct super_block *sb, int readonly)
a13fb1a4 3158{
e2b911c5 3159 if (ext4_has_unknown_ext4_incompat_features(sb)) {
a13fb1a4
ES
3160 ext4_msg(sb, KERN_ERR,
3161 "Couldn't mount because of "
3162 "unsupported optional features (%x)",
3163 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
3164 ~EXT4_FEATURE_INCOMPAT_SUPP));
3165 return 0;
3166 }
3167
c83ad55e
GKB
3168#ifndef CONFIG_UNICODE
3169 if (ext4_has_feature_casefold(sb)) {
3170 ext4_msg(sb, KERN_ERR,
3171 "Filesystem with casefold feature cannot be "
3172 "mounted without CONFIG_UNICODE");
3173 return 0;
3174 }
3175#endif
3176
a13fb1a4
ES
3177 if (readonly)
3178 return 1;
3179
e2b911c5 3180 if (ext4_has_feature_readonly(sb)) {
2cb5cc8b 3181 ext4_msg(sb, KERN_INFO, "filesystem is read-only");
1751e8a6 3182 sb->s_flags |= SB_RDONLY;
2cb5cc8b
DW
3183 return 1;
3184 }
3185
a13fb1a4 3186 /* Check that feature set is OK for a read-write mount */
e2b911c5 3187 if (ext4_has_unknown_ext4_ro_compat_features(sb)) {
a13fb1a4
ES
3188 ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of "
3189 "unsupported optional features (%x)",
3190 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
3191 ~EXT4_FEATURE_RO_COMPAT_SUPP));
3192 return 0;
3193 }
e2b911c5 3194 if (ext4_has_feature_bigalloc(sb) && !ext4_has_feature_extents(sb)) {
bab08ab9
TT
3195 ext4_msg(sb, KERN_ERR,
3196 "Can't support bigalloc feature without "
3197 "extents feature\n");
3198 return 0;
3199 }
7c319d32 3200
9db176bc 3201#if !IS_ENABLED(CONFIG_QUOTA) || !IS_ENABLED(CONFIG_QFMT_V2)
d65d87a0
TT
3202 if (!readonly && (ext4_has_feature_quota(sb) ||
3203 ext4_has_feature_project(sb))) {
7c319d32 3204 ext4_msg(sb, KERN_ERR,
d65d87a0 3205 "The kernel was not built with CONFIG_QUOTA and CONFIG_QFMT_V2");
689c958c
LX
3206 return 0;
3207 }
7c319d32 3208#endif /* CONFIG_QUOTA */
a13fb1a4
ES
3209 return 1;
3210}
3211
66e61a9e
TT
3212/*
3213 * This function is called once a day if we have errors logged
3214 * on the file system
3215 */
235699a8 3216static void print_daily_error_info(struct timer_list *t)
66e61a9e 3217{
235699a8
KC
3218 struct ext4_sb_info *sbi = from_timer(sbi, t, s_err_report);
3219 struct super_block *sb = sbi->s_sb;
3220 struct ext4_super_block *es = sbi->s_es;
66e61a9e
TT
3221
3222 if (es->s_error_count)
ae0f78de
TT
3223 /* fsck newer than v1.41.13 is needed to clean this condition. */
3224 ext4_msg(sb, KERN_NOTICE, "error count since last fsck: %u",
66e61a9e
TT
3225 le32_to_cpu(es->s_error_count));
3226 if (es->s_first_error_time) {
6a0678a7
AB
3227 printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %llu: %.*s:%d",
3228 sb->s_id,
3229 ext4_get_tstamp(es, s_first_error_time),
66e61a9e
TT
3230 (int) sizeof(es->s_first_error_func),
3231 es->s_first_error_func,
3232 le32_to_cpu(es->s_first_error_line));
3233 if (es->s_first_error_ino)
651e1c3b 3234 printk(KERN_CONT ": inode %u",
66e61a9e
TT
3235 le32_to_cpu(es->s_first_error_ino));
3236 if (es->s_first_error_block)
651e1c3b 3237 printk(KERN_CONT ": block %llu", (unsigned long long)
66e61a9e 3238 le64_to_cpu(es->s_first_error_block));
651e1c3b 3239 printk(KERN_CONT "\n");
66e61a9e
TT
3240 }
3241 if (es->s_last_error_time) {
6a0678a7
AB
3242 printk(KERN_NOTICE "EXT4-fs (%s): last error at time %llu: %.*s:%d",
3243 sb->s_id,
3244 ext4_get_tstamp(es, s_last_error_time),
66e61a9e
TT
3245 (int) sizeof(es->s_last_error_func),
3246 es->s_last_error_func,
3247 le32_to_cpu(es->s_last_error_line));
3248 if (es->s_last_error_ino)
651e1c3b 3249 printk(KERN_CONT ": inode %u",
66e61a9e
TT
3250 le32_to_cpu(es->s_last_error_ino));
3251 if (es->s_last_error_block)
651e1c3b 3252 printk(KERN_CONT ": block %llu", (unsigned long long)
66e61a9e 3253 le64_to_cpu(es->s_last_error_block));
651e1c3b 3254 printk(KERN_CONT "\n");
66e61a9e
TT
3255 }
3256 mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */
3257}
3258
bfff6873
LC
3259/* Find next suitable group and run ext4_init_inode_table */
3260static int ext4_run_li_request(struct ext4_li_request *elr)
3261{
3262 struct ext4_group_desc *gdp = NULL;
3d392b26
TT
3263 struct super_block *sb = elr->lr_super;
3264 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
3265 ext4_group_t group = elr->lr_next_group;
3d392b26 3266 unsigned int prefetch_ios = 0;
bfff6873 3267 int ret = 0;
49c38fef 3268 u64 start_time;
bfff6873 3269
3d392b26
TT
3270 if (elr->lr_mode == EXT4_LI_MODE_PREFETCH_BBITMAP) {
3271 elr->lr_next_group = ext4_mb_prefetch(sb, group,
3272 EXT4_SB(sb)->s_mb_prefetch, &prefetch_ios);
3273 if (prefetch_ios)
3274 ext4_mb_prefetch_fini(sb, elr->lr_next_group,
3275 prefetch_ios);
3276 trace_ext4_prefetch_bitmaps(sb, group, elr->lr_next_group,
3277 prefetch_ios);
3278 if (group >= elr->lr_next_group) {
3279 ret = 1;
3280 if (elr->lr_first_not_zeroed != ngroups &&
3281 !sb_rdonly(sb) && test_opt(sb, INIT_INODE_TABLE)) {
3282 elr->lr_next_group = elr->lr_first_not_zeroed;
3283 elr->lr_mode = EXT4_LI_MODE_ITABLE;
3284 ret = 0;
3285 }
3286 }
3287 return ret;
3288 }
bfff6873 3289
3d392b26 3290 for (; group < ngroups; group++) {
bfff6873
LC
3291 gdp = ext4_get_group_desc(sb, group, NULL);
3292 if (!gdp) {
3293 ret = 1;
3294 break;
3295 }
3296
3297 if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
3298 break;
3299 }
3300
7f511862 3301 if (group >= ngroups)
bfff6873
LC
3302 ret = 1;
3303
3304 if (!ret) {
49c38fef 3305 start_time = ktime_get_real_ns();
bfff6873
LC
3306 ret = ext4_init_inode_table(sb, group,
3307 elr->lr_timeout ? 0 : 1);
3d392b26 3308 trace_ext4_lazy_itable_init(sb, group);
bfff6873 3309 if (elr->lr_timeout == 0) {
49c38fef
SX
3310 elr->lr_timeout = nsecs_to_jiffies((ktime_get_real_ns() - start_time) *
3311 EXT4_SB(elr->lr_super)->s_li_wait_mult);
bfff6873
LC
3312 }
3313 elr->lr_next_sched = jiffies + elr->lr_timeout;
3314 elr->lr_next_group = group + 1;
3315 }
bfff6873
LC
3316 return ret;
3317}
3318
3319/*
3320 * Remove lr_request from the list_request and free the
4ed5c033 3321 * request structure. Should be called with li_list_mtx held
bfff6873
LC
3322 */
3323static void ext4_remove_li_request(struct ext4_li_request *elr)
3324{
bfff6873
LC
3325 if (!elr)
3326 return;
3327
bfff6873 3328 list_del(&elr->lr_request);
3d392b26 3329 EXT4_SB(elr->lr_super)->s_li_request = NULL;
bfff6873
LC
3330 kfree(elr);
3331}
3332
3333static void ext4_unregister_li_request(struct super_block *sb)
3334{
1bb933fb
LC
3335 mutex_lock(&ext4_li_mtx);
3336 if (!ext4_li_info) {
3337 mutex_unlock(&ext4_li_mtx);
bfff6873 3338 return;
1bb933fb 3339 }
bfff6873
LC
3340
3341 mutex_lock(&ext4_li_info->li_list_mtx);
1bb933fb 3342 ext4_remove_li_request(EXT4_SB(sb)->s_li_request);
bfff6873 3343 mutex_unlock(&ext4_li_info->li_list_mtx);
1bb933fb 3344 mutex_unlock(&ext4_li_mtx);
bfff6873
LC
3345}
3346
8f1f7453
ES
3347static struct task_struct *ext4_lazyinit_task;
3348
bfff6873
LC
3349/*
3350 * This is the function where ext4lazyinit thread lives. It walks
3351 * through the request list searching for next scheduled filesystem.
3352 * When such a fs is found, run the lazy initialization request
3353 * (ext4_rn_li_request) and keep track of the time spend in this
3354 * function. Based on that time we compute next schedule time of
3355 * the request. When walking through the list is complete, compute
3356 * next waking time and put itself into sleep.
3357 */
3358static int ext4_lazyinit_thread(void *arg)
3359{
3360 struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg;
3361 struct list_head *pos, *n;
3362 struct ext4_li_request *elr;
4ed5c033 3363 unsigned long next_wakeup, cur;
bfff6873
LC
3364
3365 BUG_ON(NULL == eli);
3366
bfff6873
LC
3367cont_thread:
3368 while (true) {
3369 next_wakeup = MAX_JIFFY_OFFSET;
3370
3371 mutex_lock(&eli->li_list_mtx);
3372 if (list_empty(&eli->li_request_list)) {
3373 mutex_unlock(&eli->li_list_mtx);
3374 goto exit_thread;
3375 }
bfff6873 3376 list_for_each_safe(pos, n, &eli->li_request_list) {
e22834f0
DM
3377 int err = 0;
3378 int progress = 0;
bfff6873
LC
3379 elr = list_entry(pos, struct ext4_li_request,
3380 lr_request);
3381
e22834f0
DM
3382 if (time_before(jiffies, elr->lr_next_sched)) {
3383 if (time_before(elr->lr_next_sched, next_wakeup))
3384 next_wakeup = elr->lr_next_sched;
3385 continue;
3386 }
3387 if (down_read_trylock(&elr->lr_super->s_umount)) {
3388 if (sb_start_write_trylock(elr->lr_super)) {
3389 progress = 1;
3390 /*
3391 * We hold sb->s_umount, sb can not
3392 * be removed from the list, it is
3393 * now safe to drop li_list_mtx
3394 */
3395 mutex_unlock(&eli->li_list_mtx);
3396 err = ext4_run_li_request(elr);
3397 sb_end_write(elr->lr_super);
3398 mutex_lock(&eli->li_list_mtx);
3399 n = pos->next;
b2c78cd0 3400 }
e22834f0
DM
3401 up_read((&elr->lr_super->s_umount));
3402 }
3403 /* error, remove the lazy_init job */
3404 if (err) {
3405 ext4_remove_li_request(elr);
3406 continue;
3407 }
3408 if (!progress) {
3409 elr->lr_next_sched = jiffies +
3410 (prandom_u32()
3411 % (EXT4_DEF_LI_MAX_START_DELAY * HZ));
bfff6873 3412 }
bfff6873
LC
3413 if (time_before(elr->lr_next_sched, next_wakeup))
3414 next_wakeup = elr->lr_next_sched;
3415 }
3416 mutex_unlock(&eli->li_list_mtx);
3417
a0acae0e 3418 try_to_freeze();
bfff6873 3419
4ed5c033
LC
3420 cur = jiffies;
3421 if ((time_after_eq(cur, next_wakeup)) ||
f4245bd4 3422 (MAX_JIFFY_OFFSET == next_wakeup)) {
bfff6873
LC
3423 cond_resched();
3424 continue;
3425 }
3426
4ed5c033
LC
3427 schedule_timeout_interruptible(next_wakeup - cur);
3428
8f1f7453
ES
3429 if (kthread_should_stop()) {
3430 ext4_clear_request_list();
3431 goto exit_thread;
3432 }
bfff6873
LC
3433 }
3434
3435exit_thread:
3436 /*
3437 * It looks like the request list is empty, but we need
3438 * to check it under the li_list_mtx lock, to prevent any
3439 * additions into it, and of course we should lock ext4_li_mtx
3440 * to atomically free the list and ext4_li_info, because at
3441 * this point another ext4 filesystem could be registering
3442 * new one.
3443 */
3444 mutex_lock(&ext4_li_mtx);
3445 mutex_lock(&eli->li_list_mtx);
3446 if (!list_empty(&eli->li_request_list)) {
3447 mutex_unlock(&eli->li_list_mtx);
3448 mutex_unlock(&ext4_li_mtx);
3449 goto cont_thread;
3450 }
3451 mutex_unlock(&eli->li_list_mtx);
bfff6873
LC
3452 kfree(ext4_li_info);
3453 ext4_li_info = NULL;
3454 mutex_unlock(&ext4_li_mtx);
3455
3456 return 0;
3457}
3458
3459static void ext4_clear_request_list(void)
3460{
3461 struct list_head *pos, *n;
3462 struct ext4_li_request *elr;
3463
3464 mutex_lock(&ext4_li_info->li_list_mtx);
bfff6873
LC
3465 list_for_each_safe(pos, n, &ext4_li_info->li_request_list) {
3466 elr = list_entry(pos, struct ext4_li_request,
3467 lr_request);
3468 ext4_remove_li_request(elr);
3469 }
3470 mutex_unlock(&ext4_li_info->li_list_mtx);
3471}
3472
3473static int ext4_run_lazyinit_thread(void)
3474{
8f1f7453
ES
3475 ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread,
3476 ext4_li_info, "ext4lazyinit");
3477 if (IS_ERR(ext4_lazyinit_task)) {
3478 int err = PTR_ERR(ext4_lazyinit_task);
bfff6873 3479 ext4_clear_request_list();
bfff6873
LC
3480 kfree(ext4_li_info);
3481 ext4_li_info = NULL;
92b97816 3482 printk(KERN_CRIT "EXT4-fs: error %d creating inode table "
bfff6873
LC
3483 "initialization thread\n",
3484 err);
3485 return err;
3486 }
3487 ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING;
bfff6873
LC
3488 return 0;
3489}
3490
3491/*
3492 * Check whether it make sense to run itable init. thread or not.
3493 * If there is at least one uninitialized inode table, return
3494 * corresponding group number, else the loop goes through all
3495 * groups and return total number of groups.
3496 */
3497static ext4_group_t ext4_has_uninit_itable(struct super_block *sb)
3498{
3499 ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count;
3500 struct ext4_group_desc *gdp = NULL;
3501
8844618d
TT
3502 if (!ext4_has_group_desc_csum(sb))
3503 return ngroups;
3504
bfff6873
LC
3505 for (group = 0; group < ngroups; group++) {
3506 gdp = ext4_get_group_desc(sb, group, NULL);
3507 if (!gdp)
3508 continue;
3509
50122847 3510 if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
bfff6873
LC
3511 break;
3512 }
3513
3514 return group;
3515}
3516
3517static int ext4_li_info_new(void)
3518{
3519 struct ext4_lazy_init *eli = NULL;
3520
3521 eli = kzalloc(sizeof(*eli), GFP_KERNEL);
3522 if (!eli)
3523 return -ENOMEM;
3524
bfff6873
LC
3525 INIT_LIST_HEAD(&eli->li_request_list);
3526 mutex_init(&eli->li_list_mtx);
3527
bfff6873
LC
3528 eli->li_state |= EXT4_LAZYINIT_QUIT;
3529
3530 ext4_li_info = eli;
3531
3532 return 0;
3533}
3534
3535static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
3536 ext4_group_t start)
3537{
bfff6873 3538 struct ext4_li_request *elr;
bfff6873
LC
3539
3540 elr = kzalloc(sizeof(*elr), GFP_KERNEL);
3541 if (!elr)
3542 return NULL;
3543
3544 elr->lr_super = sb;
3d392b26 3545 elr->lr_first_not_zeroed = start;
21175ca4 3546 if (test_opt(sb, NO_PREFETCH_BLOCK_BITMAPS)) {
3d392b26
TT
3547 elr->lr_mode = EXT4_LI_MODE_ITABLE;
3548 elr->lr_next_group = start;
21175ca4
HS
3549 } else {
3550 elr->lr_mode = EXT4_LI_MODE_PREFETCH_BBITMAP;
3d392b26 3551 }
bfff6873
LC
3552
3553 /*
3554 * Randomize first schedule time of the request to
3555 * spread the inode table initialization requests
3556 * better.
3557 */
dd1f723b
TT
3558 elr->lr_next_sched = jiffies + (prandom_u32() %
3559 (EXT4_DEF_LI_MAX_START_DELAY * HZ));
bfff6873
LC
3560 return elr;
3561}
3562
7f511862
TT
3563int ext4_register_li_request(struct super_block *sb,
3564 ext4_group_t first_not_zeroed)
bfff6873
LC
3565{
3566 struct ext4_sb_info *sbi = EXT4_SB(sb);
7f511862 3567 struct ext4_li_request *elr = NULL;
49598e04 3568 ext4_group_t ngroups = sbi->s_groups_count;
6c5a6cb9 3569 int ret = 0;
bfff6873 3570
7f511862 3571 mutex_lock(&ext4_li_mtx);
51ce6511
LC
3572 if (sbi->s_li_request != NULL) {
3573 /*
3574 * Reset timeout so it can be computed again, because
3575 * s_li_wait_mult might have changed.
3576 */
3577 sbi->s_li_request->lr_timeout = 0;
7f511862 3578 goto out;
51ce6511 3579 }
bfff6873 3580
21175ca4 3581 if (test_opt(sb, NO_PREFETCH_BLOCK_BITMAPS) &&
3d392b26
TT
3582 (first_not_zeroed == ngroups || sb_rdonly(sb) ||
3583 !test_opt(sb, INIT_INODE_TABLE)))
7f511862 3584 goto out;
bfff6873
LC
3585
3586 elr = ext4_li_request_new(sb, first_not_zeroed);
7f511862
TT
3587 if (!elr) {
3588 ret = -ENOMEM;
3589 goto out;
3590 }
bfff6873
LC
3591
3592 if (NULL == ext4_li_info) {
3593 ret = ext4_li_info_new();
3594 if (ret)
3595 goto out;
3596 }
3597
3598 mutex_lock(&ext4_li_info->li_list_mtx);
3599 list_add(&elr->lr_request, &ext4_li_info->li_request_list);
3600 mutex_unlock(&ext4_li_info->li_list_mtx);
3601
3602 sbi->s_li_request = elr;
46e4690b
TM
3603 /*
3604 * set elr to NULL here since it has been inserted to
3605 * the request_list and the removal and free of it is
3606 * handled by ext4_clear_request_list from now on.
3607 */
3608 elr = NULL;
bfff6873
LC
3609
3610 if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {
3611 ret = ext4_run_lazyinit_thread();
3612 if (ret)
3613 goto out;
3614 }
bfff6873 3615out:
beed5ecb
NK
3616 mutex_unlock(&ext4_li_mtx);
3617 if (ret)
bfff6873 3618 kfree(elr);
bfff6873
LC
3619 return ret;
3620}
3621
3622/*
3623 * We do not need to lock anything since this is called on
3624 * module unload.
3625 */
3626static void ext4_destroy_lazyinit_thread(void)
3627{
3628 /*
3629 * If thread exited earlier
3630 * there's nothing to be done.
3631 */
8f1f7453 3632 if (!ext4_li_info || !ext4_lazyinit_task)
bfff6873
LC
3633 return;
3634
8f1f7453 3635 kthread_stop(ext4_lazyinit_task);
bfff6873
LC
3636}
3637
25ed6e8a
DW
3638static int set_journal_csum_feature_set(struct super_block *sb)
3639{
3640 int ret = 1;
3641 int compat, incompat;
3642 struct ext4_sb_info *sbi = EXT4_SB(sb);
3643
9aa5d32b 3644 if (ext4_has_metadata_csum(sb)) {
db9ee220 3645 /* journal checksum v3 */
25ed6e8a 3646 compat = 0;
db9ee220 3647 incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3;
25ed6e8a
DW
3648 } else {
3649 /* journal checksum v1 */
3650 compat = JBD2_FEATURE_COMPAT_CHECKSUM;
3651 incompat = 0;
3652 }
3653
feb8c6d3
DW
3654 jbd2_journal_clear_features(sbi->s_journal,
3655 JBD2_FEATURE_COMPAT_CHECKSUM, 0,
3656 JBD2_FEATURE_INCOMPAT_CSUM_V3 |
3657 JBD2_FEATURE_INCOMPAT_CSUM_V2);
25ed6e8a
DW
3658 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
3659 ret = jbd2_journal_set_features(sbi->s_journal,
3660 compat, 0,
3661 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT |
3662 incompat);
3663 } else if (test_opt(sb, JOURNAL_CHECKSUM)) {
3664 ret = jbd2_journal_set_features(sbi->s_journal,
3665 compat, 0,
3666 incompat);
3667 jbd2_journal_clear_features(sbi->s_journal, 0, 0,
3668 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
3669 } else {
feb8c6d3
DW
3670 jbd2_journal_clear_features(sbi->s_journal, 0, 0,
3671 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
25ed6e8a
DW
3672 }
3673
3674 return ret;
3675}
3676
952fc18e
TT
3677/*
3678 * Note: calculating the overhead so we can be compatible with
3679 * historical BSD practice is quite difficult in the face of
3680 * clusters/bigalloc. This is because multiple metadata blocks from
3681 * different block group can end up in the same allocation cluster.
3682 * Calculating the exact overhead in the face of clustered allocation
3683 * requires either O(all block bitmaps) in memory or O(number of block
3684 * groups**2) in time. We will still calculate the superblock for
3685 * older file systems --- and if we come across with a bigalloc file
3686 * system with zero in s_overhead_clusters the estimate will be close to
3687 * correct especially for very large cluster sizes --- but for newer
3688 * file systems, it's better to calculate this figure once at mkfs
3689 * time, and store it in the superblock. If the superblock value is
3690 * present (even for non-bigalloc file systems), we will use it.
3691 */
3692static int count_overhead(struct super_block *sb, ext4_group_t grp,
3693 char *buf)
3694{
3695 struct ext4_sb_info *sbi = EXT4_SB(sb);
3696 struct ext4_group_desc *gdp;
3697 ext4_fsblk_t first_block, last_block, b;
3698 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
3699 int s, j, count = 0;
3700
e2b911c5 3701 if (!ext4_has_feature_bigalloc(sb))
0548bbb8
TT
3702 return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) +
3703 sbi->s_itb_per_group + 2);
3704
952fc18e
TT
3705 first_block = le32_to_cpu(sbi->s_es->s_first_data_block) +
3706 (grp * EXT4_BLOCKS_PER_GROUP(sb));
3707 last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1;
3708 for (i = 0; i < ngroups; i++) {
3709 gdp = ext4_get_group_desc(sb, i, NULL);
3710 b = ext4_block_bitmap(sb, gdp);
3711 if (b >= first_block && b <= last_block) {
3712 ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
3713 count++;
3714 }
3715 b = ext4_inode_bitmap(sb, gdp);
3716 if (b >= first_block && b <= last_block) {
3717 ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
3718 count++;
3719 }
3720 b = ext4_inode_table(sb, gdp);
3721 if (b >= first_block && b + sbi->s_itb_per_group <= last_block)
3722 for (j = 0; j < sbi->s_itb_per_group; j++, b++) {
3723 int c = EXT4_B2C(sbi, b - first_block);
3724 ext4_set_bit(c, buf);
3725 count++;
3726 }
3727 if (i != grp)
3728 continue;
3729 s = 0;
3730 if (ext4_bg_has_super(sb, grp)) {
3731 ext4_set_bit(s++, buf);
3732 count++;
3733 }
c48ae41b
TT
3734 j = ext4_bg_num_gdb(sb, grp);
3735 if (s + j > EXT4_BLOCKS_PER_GROUP(sb)) {
3736 ext4_error(sb, "Invalid number of block group "
3737 "descriptor blocks: %d", j);
3738 j = EXT4_BLOCKS_PER_GROUP(sb) - s;
952fc18e 3739 }
c48ae41b
TT
3740 count += j;
3741 for (; j > 0; j--)
3742 ext4_set_bit(EXT4_B2C(sbi, s++), buf);
952fc18e
TT
3743 }
3744 if (!count)
3745 return 0;
3746 return EXT4_CLUSTERS_PER_GROUP(sb) -
3747 ext4_count_free(buf, EXT4_CLUSTERS_PER_GROUP(sb) / 8);
3748}
3749
3750/*
3751 * Compute the overhead and stash it in sbi->s_overhead
3752 */
3753int ext4_calculate_overhead(struct super_block *sb)
3754{
3755 struct ext4_sb_info *sbi = EXT4_SB(sb);
3756 struct ext4_super_block *es = sbi->s_es;
3c816ded
EW
3757 struct inode *j_inode;
3758 unsigned int j_blocks, j_inum = le32_to_cpu(es->s_journal_inum);
952fc18e
TT
3759 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
3760 ext4_fsblk_t overhead = 0;
4fdb5543 3761 char *buf = (char *) get_zeroed_page(GFP_NOFS);
952fc18e 3762
952fc18e
TT
3763 if (!buf)
3764 return -ENOMEM;
3765
3766 /*
3767 * Compute the overhead (FS structures). This is constant
3768 * for a given filesystem unless the number of block groups
3769 * changes so we cache the previous value until it does.
3770 */
3771
3772 /*
3773 * All of the blocks before first_data_block are overhead
3774 */
3775 overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block));
3776
3777 /*
3778 * Add the overhead found in each block group
3779 */
3780 for (i = 0; i < ngroups; i++) {
3781 int blks;
3782
3783 blks = count_overhead(sb, i, buf);
3784 overhead += blks;
3785 if (blks)
3786 memset(buf, 0, PAGE_SIZE);
3787 cond_resched();
3788 }
3c816ded
EW
3789
3790 /*
3791 * Add the internal journal blocks whether the journal has been
3792 * loaded or not
3793 */
ee7ed3aa 3794 if (sbi->s_journal && !sbi->s_journal_bdev)
ede7dc7f 3795 overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_total_len);
f1eec3b0
RH
3796 else if (ext4_has_feature_journal(sb) && !sbi->s_journal && j_inum) {
3797 /* j_inum for internal journal is non-zero */
3c816ded
EW
3798 j_inode = ext4_get_journal_inode(sb, j_inum);
3799 if (j_inode) {
3800 j_blocks = j_inode->i_size >> sb->s_blocksize_bits;
3801 overhead += EXT4_NUM_B2C(sbi, j_blocks);
3802 iput(j_inode);
3803 } else {
3804 ext4_msg(sb, KERN_ERR, "can't get journal size");
3805 }
3806 }
952fc18e
TT
3807 sbi->s_overhead = overhead;
3808 smp_wmb();
3809 free_page((unsigned long) buf);
3810 return 0;
3811}
3812
b5799018 3813static void ext4_set_resv_clusters(struct super_block *sb)
27dd4385
LC
3814{
3815 ext4_fsblk_t resv_clusters;
b5799018 3816 struct ext4_sb_info *sbi = EXT4_SB(sb);
27dd4385 3817
30fac0f7
JK
3818 /*
3819 * There's no need to reserve anything when we aren't using extents.
3820 * The space estimates are exact, there are no unwritten extents,
3821 * hole punching doesn't need new metadata... This is needed especially
3822 * to keep ext2/3 backward compatibility.
3823 */
e2b911c5 3824 if (!ext4_has_feature_extents(sb))
b5799018 3825 return;
27dd4385
LC
3826 /*
3827 * By default we reserve 2% or 4096 clusters, whichever is smaller.
3828 * This should cover the situations where we can not afford to run
3829 * out of space like for example punch hole, or converting
556615dc 3830 * unwritten extents in delalloc path. In most cases such
27dd4385
LC
3831 * allocation would require 1, or 2 blocks, higher numbers are
3832 * very rare.
3833 */
b5799018
TT
3834 resv_clusters = (ext4_blocks_count(sbi->s_es) >>
3835 sbi->s_cluster_bits);
27dd4385
LC
3836
3837 do_div(resv_clusters, 50);
3838 resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096);
3839
b5799018 3840 atomic64_set(&sbi->s_resv_clusters, resv_clusters);
27dd4385
LC
3841}
3842
ca9b404f
RA
3843static const char *ext4_quota_mode(struct super_block *sb)
3844{
3845#ifdef CONFIG_QUOTA
3846 if (!ext4_quota_capable(sb))
3847 return "none";
3848
3849 if (EXT4_SB(sb)->s_journal && ext4_is_quota_journalled(sb))
3850 return "journalled";
3851 else
3852 return "writeback";
3853#else
3854 return "disabled";
3855#endif
3856}
3857
188c299e
JK
3858static void ext4_setup_csum_trigger(struct super_block *sb,
3859 enum ext4_journal_trigger_type type,
3860 void (*trigger)(
3861 struct jbd2_buffer_trigger_type *type,
3862 struct buffer_head *bh,
3863 void *mapped_data,
3864 size_t size))
3865{
3866 struct ext4_sb_info *sbi = EXT4_SB(sb);
3867
3868 sbi->s_journal_triggers[type].sb = sb;
3869 sbi->s_journal_triggers[type].tr_triggers.t_frozen = trigger;
3870}
3871
2b2d6d01 3872static int ext4_fill_super(struct super_block *sb, void *data, int silent)
ac27a0ec 3873{
5e405595 3874 struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev);
d4c402d9 3875 char *orig_data = kstrdup(data, GFP_KERNEL);
1d0c3924 3876 struct buffer_head *bh, **group_desc;
617ba13b 3877 struct ext4_super_block *es = NULL;
5aee0f8a 3878 struct ext4_sb_info *sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
7c990728 3879 struct flex_groups **flex_groups;
617ba13b
MC
3880 ext4_fsblk_t block;
3881 ext4_fsblk_t sb_block = get_sb_block(&data);
70bbb3e0 3882 ext4_fsblk_t logical_sb_block;
ac27a0ec 3883 unsigned long offset = 0;
ac27a0ec
DK
3884 unsigned long def_mount_opts;
3885 struct inode *root;
0390131b 3886 const char *descr;
dcc7dae3 3887 int ret = -ENOMEM;
281b5995 3888 int blocksize, clustersize;
4ec11028
TT
3889 unsigned int db_count;
3890 unsigned int i;
ef5fd681 3891 int needs_recovery, has_huge_files;
bd81d8ee 3892 __u64 blocks_count;
07aa2ea1 3893 int err = 0;
bfff6873 3894 ext4_group_t first_not_zeroed;
b237e304
HS
3895 struct ext4_parsed_options parsed_opts;
3896
3897 /* Set defaults for the variables that will be set during parsing */
3898 parsed_opts.journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
3899 parsed_opts.journal_devnum = 0;
196e402a 3900 parsed_opts.mb_optimize_scan = DEFAULT_MB_OPTIMIZE_SCAN;
ac27a0ec 3901
5aee0f8a
TT
3902 if ((data && !orig_data) || !sbi)
3903 goto out_free_base;
705895b6 3904
aed9eb1b 3905 sbi->s_daxdev = dax_dev;
705895b6
PE
3906 sbi->s_blockgroup_lock =
3907 kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
5aee0f8a
TT
3908 if (!sbi->s_blockgroup_lock)
3909 goto out_free_base;
3910
ac27a0ec 3911 sb->s_fs_info = sbi;
2c0544b2 3912 sbi->s_sb = sb;
240799cd 3913 sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
d9c9bef1 3914 sbi->s_sb_block = sb_block;
8446fe92
CH
3915 sbi->s_sectors_written_start =
3916 part_stat_read(sb->s_bdev, sectors[STAT_WRITE]);
ac27a0ec 3917
9f6200bb 3918 /* Cleanup superblock name */
ec3904dc 3919 strreplace(sb->s_id, '/', '!');
9f6200bb 3920
07aa2ea1 3921 /* -EINVAL is default */
dcc7dae3 3922 ret = -EINVAL;
617ba13b 3923 blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
ac27a0ec 3924 if (!blocksize) {
b31e1552 3925 ext4_msg(sb, KERN_ERR, "unable to set blocksize");
ac27a0ec
DK
3926 goto out_fail;
3927 }
3928
3929 /*
617ba13b 3930 * The ext4 superblock will not be buffer aligned for other than 1kB
ac27a0ec
DK
3931 * block sizes. We need to calculate the offset from buffer start.
3932 */
617ba13b 3933 if (blocksize != EXT4_MIN_BLOCK_SIZE) {
70bbb3e0
AM
3934 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
3935 offset = do_div(logical_sb_block, blocksize);
ac27a0ec 3936 } else {
70bbb3e0 3937 logical_sb_block = sb_block;
ac27a0ec
DK
3938 }
3939
8394a6ab 3940 bh = ext4_sb_bread_unmovable(sb, logical_sb_block);
3941 if (IS_ERR(bh)) {
b31e1552 3942 ext4_msg(sb, KERN_ERR, "unable to read superblock");
8394a6ab 3943 ret = PTR_ERR(bh);
ac27a0ec
DK
3944 goto out_fail;
3945 }
3946 /*
3947 * Note: s_es must be initialized as soon as possible because
617ba13b 3948 * some ext4 macro-instructions depend on its value
ac27a0ec 3949 */
2716b802 3950 es = (struct ext4_super_block *) (bh->b_data + offset);
ac27a0ec
DK
3951 sbi->s_es = es;
3952 sb->s_magic = le16_to_cpu(es->s_magic);
617ba13b
MC
3953 if (sb->s_magic != EXT4_SUPER_MAGIC)
3954 goto cantfind_ext4;
afc32f7e 3955 sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written);
ac27a0ec 3956
feb0ab32 3957 /* Warn if metadata_csum and gdt_csum are both set. */
e2b911c5
DW
3958 if (ext4_has_feature_metadata_csum(sb) &&
3959 ext4_has_feature_gdt_csum(sb))
363307e6 3960 ext4_warning(sb, "metadata_csum and uninit_bg are "
feb0ab32
DW
3961 "redundant flags; please run fsck.");
3962
d25425f8
DW
3963 /* Check for a known checksum algorithm */
3964 if (!ext4_verify_csum_type(sb, es)) {
3965 ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
3966 "unknown checksum algorithm.");
3967 silent = 1;
3968 goto cantfind_ext4;
3969 }
02f310fc
JK
3970 ext4_setup_csum_trigger(sb, EXT4_JTR_ORPHAN_FILE,
3971 ext4_orphan_file_block_trigger);
d25425f8 3972
0441984a 3973 /* Load the checksum driver */
a45403b5
TT
3974 sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
3975 if (IS_ERR(sbi->s_chksum_driver)) {
3976 ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver.");
3977 ret = PTR_ERR(sbi->s_chksum_driver);
3978 sbi->s_chksum_driver = NULL;
3979 goto failed_mount;
0441984a
DW
3980 }
3981
a9c47317
DW
3982 /* Check superblock checksum */
3983 if (!ext4_superblock_csum_verify(sb, es)) {
3984 ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
3985 "invalid superblock checksum. Run e2fsck?");
3986 silent = 1;
6a797d27 3987 ret = -EFSBADCRC;
a9c47317
DW
3988 goto cantfind_ext4;
3989 }
3990
3991 /* Precompute checksum seed for all metadata */
e2b911c5 3992 if (ext4_has_feature_csum_seed(sb))
8c81bd8f 3993 sbi->s_csum_seed = le32_to_cpu(es->s_checksum_seed);
dec214d0 3994 else if (ext4_has_metadata_csum(sb) || ext4_has_feature_ea_inode(sb))
a9c47317
DW
3995 sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid,
3996 sizeof(es->s_uuid));
3997
ac27a0ec
DK
3998 /* Set defaults before we parse the mount options */
3999 def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
fd8c37ec 4000 set_opt(sb, INIT_INODE_TABLE);
617ba13b 4001 if (def_mount_opts & EXT4_DEFM_DEBUG)
fd8c37ec 4002 set_opt(sb, DEBUG);
87f26807 4003 if (def_mount_opts & EXT4_DEFM_BSDGROUPS)
fd8c37ec 4004 set_opt(sb, GRPID);
617ba13b 4005 if (def_mount_opts & EXT4_DEFM_UID16)
fd8c37ec 4006 set_opt(sb, NO_UID32);
ea663336 4007 /* xattr user namespace & acls are now defaulted on */
ea663336 4008 set_opt(sb, XATTR_USER);
03010a33 4009#ifdef CONFIG_EXT4_FS_POSIX_ACL
ea663336 4010 set_opt(sb, POSIX_ACL);
2e7842b8 4011#endif
995a3ed6
HS
4012 if (ext4_has_feature_fast_commit(sb))
4013 set_opt2(sb, JOURNAL_FAST_COMMIT);
98c1a759
DW
4014 /* don't forget to enable journal_csum when metadata_csum is enabled. */
4015 if (ext4_has_metadata_csum(sb))
4016 set_opt(sb, JOURNAL_CHECKSUM);
4017
617ba13b 4018 if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
fd8c37ec 4019 set_opt(sb, JOURNAL_DATA);
617ba13b 4020 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
fd8c37ec 4021 set_opt(sb, ORDERED_DATA);
617ba13b 4022 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
fd8c37ec 4023 set_opt(sb, WRITEBACK_DATA);
617ba13b
MC
4024
4025 if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC)
fd8c37ec 4026 set_opt(sb, ERRORS_PANIC);
bb4f397a 4027 else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE)
fd8c37ec 4028 set_opt(sb, ERRORS_CONT);
bb4f397a 4029 else
fd8c37ec 4030 set_opt(sb, ERRORS_RO);
45f1a9c3
DW
4031 /* block_validity enabled by default; disable with noblock_validity */
4032 set_opt(sb, BLOCK_VALIDITY);
8b67f04a 4033 if (def_mount_opts & EXT4_DEFM_DISCARD)
fd8c37ec 4034 set_opt(sb, DISCARD);
ac27a0ec 4035
08cefc7a
EB
4036 sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid));
4037 sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid));
30773840
TT
4038 sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
4039 sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
4040 sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
ac27a0ec 4041
8b67f04a 4042 if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0)
fd8c37ec 4043 set_opt(sb, BARRIER);
ac27a0ec 4044
dd919b98
AK
4045 /*
4046 * enable delayed allocation by default
4047 * Use -o nodelalloc to turn it off
4048 */
bc0b75f7 4049 if (!IS_EXT3_SB(sb) && !IS_EXT2_SB(sb) &&
8b67f04a 4050 ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
fd8c37ec 4051 set_opt(sb, DELALLOC);
dd919b98 4052
51ce6511
LC
4053 /*
4054 * set default s_li_wait_mult for lazyinit, for the case there is
4055 * no mount option specified.
4056 */
4057 sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
4058
c9200760
TT
4059 if (le32_to_cpu(es->s_log_block_size) >
4060 (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
4f97a681 4061 ext4_msg(sb, KERN_ERR,
c9200760
TT
4062 "Invalid log block size: %u",
4063 le32_to_cpu(es->s_log_block_size));
4f97a681
TT
4064 goto failed_mount;
4065 }
c9200760
TT
4066 if (le32_to_cpu(es->s_log_cluster_size) >
4067 (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
4f97a681 4068 ext4_msg(sb, KERN_ERR,
c9200760
TT
4069 "Invalid log cluster size: %u",
4070 le32_to_cpu(es->s_log_cluster_size));
4f97a681
TT
4071 goto failed_mount;
4072 }
4073
c9200760
TT
4074 blocksize = EXT4_MIN_BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
4075
4076 if (blocksize == PAGE_SIZE)
4077 set_opt(sb, DIOREAD_NOLOCK);
4f97a681 4078
9803387c
TT
4079 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
4080 sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
4081 sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
4082 } else {
4083 sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
4084 sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
4085 if (sbi->s_first_ino < EXT4_GOOD_OLD_FIRST_INO) {
4086 ext4_msg(sb, KERN_ERR, "invalid first ino: %u",
4087 sbi->s_first_ino);
4088 goto failed_mount;
4089 }
4090 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
4091 (!is_power_of_2(sbi->s_inode_size)) ||
4092 (sbi->s_inode_size > blocksize)) {
4093 ext4_msg(sb, KERN_ERR,
4094 "unsupported inode size: %d",
4095 sbi->s_inode_size);
4f97a681 4096 ext4_msg(sb, KERN_ERR, "blocksize: %d", blocksize);
9803387c
TT
4097 goto failed_mount;
4098 }
4099 /*
4100 * i_atime_extra is the last extra field available for
4101 * [acm]times in struct ext4_inode. Checking for that
4102 * field should suffice to ensure we have extra space
4103 * for all three.
4104 */
4105 if (sbi->s_inode_size >= offsetof(struct ext4_inode, i_atime_extra) +
4106 sizeof(((struct ext4_inode *)0)->i_atime_extra)) {
4107 sb->s_time_gran = 1;
4108 sb->s_time_max = EXT4_EXTRA_TIMESTAMP_MAX;
4109 } else {
4110 sb->s_time_gran = NSEC_PER_SEC;
4111 sb->s_time_max = EXT4_NON_EXTRA_TIMESTAMP_MAX;
4112 }
4113 sb->s_time_min = EXT4_TIMESTAMP_MIN;
4114 }
4115 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
4116 sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
4117 EXT4_GOOD_OLD_INODE_SIZE;
4118 if (ext4_has_feature_extra_isize(sb)) {
4119 unsigned v, max = (sbi->s_inode_size -
4120 EXT4_GOOD_OLD_INODE_SIZE);
4121
4122 v = le16_to_cpu(es->s_want_extra_isize);
4123 if (v > max) {
4124 ext4_msg(sb, KERN_ERR,
4125 "bad s_want_extra_isize: %d", v);
4126 goto failed_mount;
4127 }
4128 if (sbi->s_want_extra_isize < v)
4129 sbi->s_want_extra_isize = v;
4130
4131 v = le16_to_cpu(es->s_min_extra_isize);
4132 if (v > max) {
4133 ext4_msg(sb, KERN_ERR,
4134 "bad s_min_extra_isize: %d", v);
4135 goto failed_mount;
4136 }
4137 if (sbi->s_want_extra_isize < v)
4138 sbi->s_want_extra_isize = v;
4139 }
4140 }
4141
5aee0f8a
TT
4142 if (sbi->s_es->s_mount_opts[0]) {
4143 char *s_mount_opts = kstrndup(sbi->s_es->s_mount_opts,
4144 sizeof(sbi->s_es->s_mount_opts),
4145 GFP_KERNEL);
4146 if (!s_mount_opts)
4147 goto failed_mount;
b237e304 4148 if (!parse_options(s_mount_opts, sb, &parsed_opts, 0)) {
5aee0f8a
TT
4149 ext4_msg(sb, KERN_WARNING,
4150 "failed to parse options in superblock: %s",
4151 s_mount_opts);
4152 }
4153 kfree(s_mount_opts);
8b67f04a 4154 }
5a916be1 4155 sbi->s_def_mount_opt = sbi->s_mount_opt;
b237e304 4156 if (!parse_options((char *) data, sb, &parsed_opts, 0))
ac27a0ec
DK
4157 goto failed_mount;
4158
c83ad55e 4159#ifdef CONFIG_UNICODE
f8f4acb6 4160 if (ext4_has_feature_casefold(sb) && !sb->s_encoding) {
c83ad55e
GKB
4161 const struct ext4_sb_encodings *encoding_info;
4162 struct unicode_map *encoding;
4163 __u16 encoding_flags;
4164
c83ad55e
GKB
4165 if (ext4_sb_read_encoding(es, &encoding_info,
4166 &encoding_flags)) {
4167 ext4_msg(sb, KERN_ERR,
4168 "Encoding requested by superblock is unknown");
4169 goto failed_mount;
4170 }
4171
4172 encoding = utf8_load(encoding_info->version);
4173 if (IS_ERR(encoding)) {
4174 ext4_msg(sb, KERN_ERR,
4175 "can't mount with superblock charset: %s-%s "
4176 "not supported by the kernel. flags: 0x%x.",
4177 encoding_info->name, encoding_info->version,
4178 encoding_flags);
4179 goto failed_mount;
4180 }
4181 ext4_msg(sb, KERN_INFO,"Using encoding defined by superblock: "
4182 "%s-%s with flags 0x%hx", encoding_info->name,
4183 encoding_info->version?:"\b", encoding_flags);
4184
f8f4acb6
DR
4185 sb->s_encoding = encoding;
4186 sb->s_encoding_flags = encoding_flags;
c83ad55e
GKB
4187 }
4188#endif
4189
56889787 4190 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
556e0319 4191 printk_once(KERN_WARNING "EXT4-fs: Warning: mounting with data=journal disables delayed allocation, dioread_nolock, O_DIRECT and fast_commit support!\n");
781c036b 4192 /* can't mount with both data=journal and dioread_nolock. */
244adf64 4193 clear_opt(sb, DIOREAD_NOLOCK);
556e0319 4194 clear_opt2(sb, JOURNAL_FAST_COMMIT);
56889787
TT
4195 if (test_opt2(sb, EXPLICIT_DELALLOC)) {
4196 ext4_msg(sb, KERN_ERR, "can't mount with "
4197 "both data=journal and delalloc");
4198 goto failed_mount;
4199 }
fc626fe3 4200 if (test_opt(sb, DAX_ALWAYS)) {
923ae0ff
RZ
4201 ext4_msg(sb, KERN_ERR, "can't mount with "
4202 "both data=journal and dax");
4203 goto failed_mount;
4204 }
73b92a2a
SK
4205 if (ext4_has_feature_encrypt(sb)) {
4206 ext4_msg(sb, KERN_WARNING,
4207 "encrypted files will use data=ordered "
4208 "instead of data journaling mode");
4209 }
56889787
TT
4210 if (test_opt(sb, DELALLOC))
4211 clear_opt(sb, DELALLOC);
001e4a87
TH
4212 } else {
4213 sb->s_iflags |= SB_I_CGROUPWB;
56889787
TT
4214 }
4215
1751e8a6
LT
4216 sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
4217 (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
ac27a0ec 4218
617ba13b 4219 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
e2b911c5
DW
4220 (ext4_has_compat_features(sb) ||
4221 ext4_has_ro_compat_features(sb) ||
4222 ext4_has_incompat_features(sb)))
b31e1552
ES
4223 ext4_msg(sb, KERN_WARNING,
4224 "feature flags set on rev 0 fs, "
4225 "running e2fsck is recommended");
469108ff 4226
ed3654eb
TT
4227 if (es->s_creator_os == cpu_to_le32(EXT4_OS_HURD)) {
4228 set_opt2(sb, HURD_COMPAT);
e2b911c5 4229 if (ext4_has_feature_64bit(sb)) {
ed3654eb
TT
4230 ext4_msg(sb, KERN_ERR,
4231 "The Hurd can't support 64-bit file systems");
4232 goto failed_mount;
4233 }
dec214d0
TE
4234
4235 /*
4236 * ea_inode feature uses l_i_version field which is not
4237 * available in HURD_COMPAT mode.
4238 */
4239 if (ext4_has_feature_ea_inode(sb)) {
4240 ext4_msg(sb, KERN_ERR,
4241 "ea_inode feature is not supported for Hurd");
4242 goto failed_mount;
4243 }
ed3654eb
TT
4244 }
4245
2035e776
TT
4246 if (IS_EXT2_SB(sb)) {
4247 if (ext2_feature_set_ok(sb))
4248 ext4_msg(sb, KERN_INFO, "mounting ext2 file system "
4249 "using the ext4 subsystem");
4250 else {
0d9366d6
ES
4251 /*
4252 * If we're probing be silent, if this looks like
4253 * it's actually an ext[34] filesystem.
4254 */
4255 if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb)))
4256 goto failed_mount;
2035e776
TT
4257 ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due "
4258 "to feature incompatibilities");
4259 goto failed_mount;
4260 }
4261 }
4262
4263 if (IS_EXT3_SB(sb)) {
4264 if (ext3_feature_set_ok(sb))
4265 ext4_msg(sb, KERN_INFO, "mounting ext3 file system "
4266 "using the ext4 subsystem");
4267 else {
0d9366d6
ES
4268 /*
4269 * If we're probing be silent, if this looks like
4270 * it's actually an ext4 filesystem.
4271 */
4272 if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb)))
4273 goto failed_mount;
2035e776
TT
4274 ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due "
4275 "to feature incompatibilities");
4276 goto failed_mount;
4277 }
4278 }
4279
ac27a0ec
DK
4280 /*
4281 * Check feature flags regardless of the revision level, since we
4282 * previously didn't change the revision level when setting the flags,
4283 * so there is a chance incompat flags are set on a rev 0 filesystem.
4284 */
bc98a42c 4285 if (!ext4_feature_set_ok(sb, (sb_rdonly(sb))))
ac27a0ec 4286 goto failed_mount;
a13fb1a4 4287
5b9554dc
TT
4288 if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) {
4289 ext4_msg(sb, KERN_ERR,
4290 "Number of reserved GDT blocks insanely large: %d",
4291 le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks));
4292 goto failed_mount;
4293 }
4294
bdd3c50d
CH
4295 if (dax_supported(dax_dev, sb->s_bdev, blocksize, 0,
4296 bdev_nr_sectors(sb->s_bdev)))
a8ab6d38
IW
4297 set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags);
4298
fc626fe3 4299 if (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) {
559db4c6
RZ
4300 if (ext4_has_feature_inline_data(sb)) {
4301 ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem"
4302 " that may contain inline data");
361d24d4 4303 goto failed_mount;
559db4c6 4304 }
a8ab6d38 4305 if (!test_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags)) {
24f3478d 4306 ext4_msg(sb, KERN_ERR,
361d24d4
ES
4307 "DAX unsupported by block device.");
4308 goto failed_mount;
24f3478d 4309 }
923ae0ff
RZ
4310 }
4311
e2b911c5 4312 if (ext4_has_feature_encrypt(sb) && es->s_encryption_level) {
6ddb2447
TT
4313 ext4_msg(sb, KERN_ERR, "Unsupported encryption level %d",
4314 es->s_encryption_level);
4315 goto failed_mount;
4316 }
4317
ac27a0ec 4318 if (sb->s_blocksize != blocksize) {
afd09b61
AM
4319 /*
4320 * bh must be released before kill_bdev(), otherwise
4321 * it won't be freed and its page also. kill_bdev()
4322 * is called by sb_set_blocksize().
4323 */
4324 brelse(bh);
ce40733c
AK
4325 /* Validate the filesystem blocksize */
4326 if (!sb_set_blocksize(sb, blocksize)) {
b31e1552 4327 ext4_msg(sb, KERN_ERR, "bad block size %d",
ce40733c 4328 blocksize);
afd09b61 4329 bh = NULL;
ac27a0ec
DK
4330 goto failed_mount;
4331 }
4332
70bbb3e0
AM
4333 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
4334 offset = do_div(logical_sb_block, blocksize);
8394a6ab 4335 bh = ext4_sb_bread_unmovable(sb, logical_sb_block);
4336 if (IS_ERR(bh)) {
b31e1552
ES
4337 ext4_msg(sb, KERN_ERR,
4338 "Can't read superblock on 2nd try");
8394a6ab 4339 ret = PTR_ERR(bh);
4340 bh = NULL;
ac27a0ec
DK
4341 goto failed_mount;
4342 }
2716b802 4343 es = (struct ext4_super_block *)(bh->b_data + offset);
ac27a0ec 4344 sbi->s_es = es;
617ba13b 4345 if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
b31e1552
ES
4346 ext4_msg(sb, KERN_ERR,
4347 "Magic mismatch, very weird!");
ac27a0ec
DK
4348 goto failed_mount;
4349 }
4350 }
4351
e2b911c5 4352 has_huge_files = ext4_has_feature_huge_file(sb);
f287a1a5
TT
4353 sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
4354 has_huge_files);
4355 sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
ac27a0ec 4356
0d1ee42f 4357 sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
e2b911c5 4358 if (ext4_has_feature_64bit(sb)) {
8fadc143 4359 if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
0d1ee42f 4360 sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
d8ea6cf8 4361 !is_power_of_2(sbi->s_desc_size)) {
b31e1552
ES
4362 ext4_msg(sb, KERN_ERR,
4363 "unsupported descriptor size %lu",
0d1ee42f
AR
4364 sbi->s_desc_size);
4365 goto failed_mount;
4366 }
4367 } else
4368 sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
0b8e58a1 4369
ac27a0ec 4370 sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
ac27a0ec 4371 sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
0b8e58a1 4372
617ba13b 4373 sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb);
ac27a0ec 4374 if (sbi->s_inodes_per_block == 0)
617ba13b 4375 goto cantfind_ext4;
cd6bb35b
TT
4376 if (sbi->s_inodes_per_group < sbi->s_inodes_per_block ||
4377 sbi->s_inodes_per_group > blocksize * 8) {
4378 ext4_msg(sb, KERN_ERR, "invalid inodes per group: %lu\n",
b9c538da 4379 sbi->s_inodes_per_group);
cd6bb35b
TT
4380 goto failed_mount;
4381 }
ac27a0ec
DK
4382 sbi->s_itb_per_group = sbi->s_inodes_per_group /
4383 sbi->s_inodes_per_block;
0d1ee42f 4384 sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb);
ac27a0ec
DK
4385 sbi->s_sbh = bh;
4386 sbi->s_mount_state = le16_to_cpu(es->s_state);
e57aa839
FW
4387 sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
4388 sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
0b8e58a1 4389
2b2d6d01 4390 for (i = 0; i < 4; i++)
ac27a0ec
DK
4391 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
4392 sbi->s_def_hash_version = es->s_def_hash_version;
e2b911c5 4393 if (ext4_has_feature_dir_index(sb)) {
23301410
TT
4394 i = le32_to_cpu(es->s_flags);
4395 if (i & EXT2_FLAGS_UNSIGNED_HASH)
4396 sbi->s_hash_unsigned = 3;
4397 else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
f99b2589 4398#ifdef __CHAR_UNSIGNED__
bc98a42c 4399 if (!sb_rdonly(sb))
23301410
TT
4400 es->s_flags |=
4401 cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
4402 sbi->s_hash_unsigned = 3;
f99b2589 4403#else
bc98a42c 4404 if (!sb_rdonly(sb))
23301410
TT
4405 es->s_flags |=
4406 cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
f99b2589 4407#endif
23301410 4408 }
f99b2589 4409 }
ac27a0ec 4410
281b5995
TT
4411 /* Handle clustersize */
4412 clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size);
ef5fd681 4413 if (ext4_has_feature_bigalloc(sb)) {
281b5995
TT
4414 if (clustersize < blocksize) {
4415 ext4_msg(sb, KERN_ERR,
4416 "cluster size (%d) smaller than "
4417 "block size (%d)", clustersize, blocksize);
4418 goto failed_mount;
4419 }
4420 sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) -
4421 le32_to_cpu(es->s_log_block_size);
4422 sbi->s_clusters_per_group =
4423 le32_to_cpu(es->s_clusters_per_group);
4424 if (sbi->s_clusters_per_group > blocksize * 8) {
4425 ext4_msg(sb, KERN_ERR,
4426 "#clusters per group too big: %lu",
4427 sbi->s_clusters_per_group);
4428 goto failed_mount;
4429 }
4430 if (sbi->s_blocks_per_group !=
4431 (sbi->s_clusters_per_group * (clustersize / blocksize))) {
4432 ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and "
4433 "clusters per group (%lu) inconsistent",
4434 sbi->s_blocks_per_group,
4435 sbi->s_clusters_per_group);
4436 goto failed_mount;
4437 }
4438 } else {
4439 if (clustersize != blocksize) {
bfe0a5f4
TT
4440 ext4_msg(sb, KERN_ERR,
4441 "fragment/cluster size (%d) != "
4442 "block size (%d)", clustersize, blocksize);
4443 goto failed_mount;
281b5995
TT
4444 }
4445 if (sbi->s_blocks_per_group > blocksize * 8) {
4446 ext4_msg(sb, KERN_ERR,
4447 "#blocks per group too big: %lu",
4448 sbi->s_blocks_per_group);
4449 goto failed_mount;
4450 }
4451 sbi->s_clusters_per_group = sbi->s_blocks_per_group;
4452 sbi->s_cluster_bits = 0;
ac27a0ec 4453 }
281b5995
TT
4454 sbi->s_cluster_ratio = clustersize / blocksize;
4455
960fd856
TT
4456 /* Do we have standard group size of clustersize * 8 blocks ? */
4457 if (sbi->s_blocks_per_group == clustersize << 3)
4458 set_opt2(sb, STD_GROUP_SIZE);
4459
bf43d84b
ES
4460 /*
4461 * Test whether we have more sectors than will fit in sector_t,
4462 * and whether the max offset is addressable by the page cache.
4463 */
5a9ae68a 4464 err = generic_check_addressable(sb->s_blocksize_bits,
30ca22c7 4465 ext4_blocks_count(es));
5a9ae68a 4466 if (err) {
b31e1552 4467 ext4_msg(sb, KERN_ERR, "filesystem"
bf43d84b 4468 " too large to mount safely on this system");
ac27a0ec
DK
4469 goto failed_mount;
4470 }
4471
617ba13b
MC
4472 if (EXT4_BLOCKS_PER_GROUP(sb) == 0)
4473 goto cantfind_ext4;
e7c95593 4474
0f2ddca6
FTN
4475 /* check blocks count against device size */
4476 blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits;
4477 if (blocks_count && ext4_blocks_count(es) > blocks_count) {
b31e1552
ES
4478 ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu "
4479 "exceeds size of device (%llu blocks)",
0f2ddca6
FTN
4480 ext4_blocks_count(es), blocks_count);
4481 goto failed_mount;
4482 }
4483
0b8e58a1
AD
4484 /*
4485 * It makes no sense for the first data block to be beyond the end
4486 * of the filesystem.
4487 */
4488 if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
5635a62b 4489 ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
b31e1552
ES
4490 "block %u is beyond end of filesystem (%llu)",
4491 le32_to_cpu(es->s_first_data_block),
4492 ext4_blocks_count(es));
e7c95593
ES
4493 goto failed_mount;
4494 }
bfe0a5f4
TT
4495 if ((es->s_first_data_block == 0) && (es->s_log_block_size == 0) &&
4496 (sbi->s_cluster_ratio == 1)) {
4497 ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
4498 "block is 0 with a 1k block and cluster size");
4499 goto failed_mount;
4500 }
4501
bd81d8ee
LV
4502 blocks_count = (ext4_blocks_count(es) -
4503 le32_to_cpu(es->s_first_data_block) +
4504 EXT4_BLOCKS_PER_GROUP(sb) - 1);
4505 do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
4ec11028 4506 if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
df41460a 4507 ext4_msg(sb, KERN_WARNING, "groups count too large: %llu "
4ec11028 4508 "(block count %llu, first data block %u, "
df41460a 4509 "blocks per group %lu)", blocks_count,
4ec11028
TT
4510 ext4_blocks_count(es),
4511 le32_to_cpu(es->s_first_data_block),
4512 EXT4_BLOCKS_PER_GROUP(sb));
4513 goto failed_mount;
4514 }
bd81d8ee 4515 sbi->s_groups_count = blocks_count;
fb0a387d
ES
4516 sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count,
4517 (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
9e463084
TT
4518 if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) !=
4519 le32_to_cpu(es->s_inodes_count)) {
4520 ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu",
4521 le32_to_cpu(es->s_inodes_count),
4522 ((u64)sbi->s_groups_count * sbi->s_inodes_per_group));
4523 ret = -EINVAL;
4524 goto failed_mount;
4525 }
617ba13b
MC
4526 db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
4527 EXT4_DESC_PER_BLOCK(sb);
3a4b77cd 4528 if (ext4_has_feature_meta_bg(sb)) {
2ba3e6e8 4529 if (le32_to_cpu(es->s_first_meta_bg) > db_count) {
3a4b77cd
EG
4530 ext4_msg(sb, KERN_WARNING,
4531 "first meta block group too large: %u "
4532 "(group descriptor block count %u)",
4533 le32_to_cpu(es->s_first_meta_bg), db_count);
4534 goto failed_mount;
4535 }
4536 }
1d0c3924
TT
4537 rcu_assign_pointer(sbi->s_group_desc,
4538 kvmalloc_array(db_count,
4539 sizeof(struct buffer_head *),
4540 GFP_KERNEL));
ac27a0ec 4541 if (sbi->s_group_desc == NULL) {
b31e1552 4542 ext4_msg(sb, KERN_ERR, "not enough memory");
2cde417d 4543 ret = -ENOMEM;
ac27a0ec
DK
4544 goto failed_mount;
4545 }
4546
705895b6 4547 bgl_lock_init(sbi->s_blockgroup_lock);
ac27a0ec 4548
85c8f176
AP
4549 /* Pre-read the descriptors into the buffer cache */
4550 for (i = 0; i < db_count; i++) {
4551 block = descriptor_loc(sb, logical_sb_block, i);
5df1d412 4552 ext4_sb_breadahead_unmovable(sb, block);
85c8f176
AP
4553 }
4554
ac27a0ec 4555 for (i = 0; i < db_count; i++) {
1d0c3924
TT
4556 struct buffer_head *bh;
4557
70bbb3e0 4558 block = descriptor_loc(sb, logical_sb_block, i);
8394a6ab 4559 bh = ext4_sb_bread_unmovable(sb, block);
4560 if (IS_ERR(bh)) {
b31e1552
ES
4561 ext4_msg(sb, KERN_ERR,
4562 "can't read group descriptor %d", i);
ac27a0ec 4563 db_count = i;
8394a6ab 4564 ret = PTR_ERR(bh);
ac27a0ec
DK
4565 goto failed_mount2;
4566 }
1d0c3924
TT
4567 rcu_read_lock();
4568 rcu_dereference(sbi->s_group_desc)[i] = bh;
4569 rcu_read_unlock();
ac27a0ec 4570 }
44de022c 4571 sbi->s_gdb_count = db_count;
829fa70d 4572 if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) {
b31e1552 4573 ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
6a797d27 4574 ret = -EFSCORRUPTED;
f9ae9cf5 4575 goto failed_mount2;
ac27a0ec 4576 }
772cb7c8 4577
235699a8 4578 timer_setup(&sbi->s_err_report, print_daily_error_info, 0);
c92dc856
JK
4579 spin_lock_init(&sbi->s_error_lock);
4580 INIT_WORK(&sbi->s_error_work, flush_stashed_error_work);
04496411 4581
a75ae78f 4582 /* Register extent status tree shrinker */
eb68d0e2 4583 if (ext4_es_register_shrinker(sbi))
ce7e010a 4584 goto failed_mount3;
ce7e010a 4585
c9de560d 4586 sbi->s_stripe = ext4_get_stripe_size(sbi);
67a5da56 4587 sbi->s_extent_max_zeroout_kb = 32;
c9de560d 4588
f9ae9cf5
TT
4589 /*
4590 * set up enough so that it can read an inode
4591 */
f6e63f90 4592 sb->s_op = &ext4_sops;
617ba13b
MC
4593 sb->s_export_op = &ext4_export_ops;
4594 sb->s_xattr = ext4_xattr_handlers;
643fa961 4595#ifdef CONFIG_FS_ENCRYPTION
a7550b30 4596 sb->s_cop = &ext4_cryptops;
ffcc4182 4597#endif
c93d8f88
EB
4598#ifdef CONFIG_FS_VERITY
4599 sb->s_vop = &ext4_verityops;
4600#endif
ac27a0ec 4601#ifdef CONFIG_QUOTA
617ba13b 4602 sb->dq_op = &ext4_quota_operations;
e2b911c5 4603 if (ext4_has_feature_quota(sb))
1fa5efe3 4604 sb->s_qcop = &dquot_quotactl_sysfile_ops;
262b4662
JK
4605 else
4606 sb->s_qcop = &ext4_qctl_operations;
689c958c 4607 sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
ac27a0ec 4608#endif
85787090 4609 memcpy(&sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
f2fa2ffc 4610
ac27a0ec 4611 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
3b9d4ed2 4612 mutex_init(&sbi->s_orphan_lock);
ac27a0ec 4613
aa75f4d3
HS
4614 /* Initialize fast commit stuff */
4615 atomic_set(&sbi->s_fc_subtid, 0);
4616 atomic_set(&sbi->s_fc_ineligible_updates, 0);
4617 INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_MAIN]);
4618 INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_STAGING]);
4619 INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_MAIN]);
4620 INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_STAGING]);
4621 sbi->s_fc_bytes = 0;
9b5f6c9b
HS
4622 ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
4623 ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
aa75f4d3
HS
4624 spin_lock_init(&sbi->s_fc_lock);
4625 memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats));
8016e29f
HS
4626 sbi->s_fc_replay_state.fc_regions = NULL;
4627 sbi->s_fc_replay_state.fc_regions_size = 0;
4628 sbi->s_fc_replay_state.fc_regions_used = 0;
4629 sbi->s_fc_replay_state.fc_regions_valid = 0;
4630 sbi->s_fc_replay_state.fc_modified_inodes = NULL;
4631 sbi->s_fc_replay_state.fc_modified_inodes_size = 0;
4632 sbi->s_fc_replay_state.fc_modified_inodes_used = 0;
aa75f4d3 4633
ac27a0ec
DK
4634 sb->s_root = NULL;
4635
4636 needs_recovery = (es->s_last_orphan != 0 ||
02f310fc 4637 ext4_has_feature_orphan_present(sb) ||
e2b911c5 4638 ext4_has_feature_journal_needs_recovery(sb));
ac27a0ec 4639
bc98a42c 4640 if (ext4_has_feature_mmp(sb) && !sb_rdonly(sb))
c5e06d10 4641 if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block)))
50460fe8 4642 goto failed_mount3a;
c5e06d10 4643
ac27a0ec
DK
4644 /*
4645 * The first inode we look at is the journal inode. Don't try
4646 * root first: it may be modified in the journal!
4647 */
e2b911c5 4648 if (!test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) {
b237e304 4649 err = ext4_load_journal(sb, es, parsed_opts.journal_devnum);
4753d8a2 4650 if (err)
50460fe8 4651 goto failed_mount3a;
bc98a42c 4652 } else if (test_opt(sb, NOLOAD) && !sb_rdonly(sb) &&
e2b911c5 4653 ext4_has_feature_journal_needs_recovery(sb)) {
b31e1552
ES
4654 ext4_msg(sb, KERN_ERR, "required journal recovery "
4655 "suppressed and not mounted read-only");
744692dc 4656 goto failed_mount_wq;
ac27a0ec 4657 } else {
1e381f60
DM
4658 /* Nojournal mode, all journal mount options are illegal */
4659 if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) {
4660 ext4_msg(sb, KERN_ERR, "can't mount with "
4661 "journal_checksum, fs mounted w/o journal");
4662 goto failed_mount_wq;
4663 }
4664 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
4665 ext4_msg(sb, KERN_ERR, "can't mount with "
4666 "journal_async_commit, fs mounted w/o journal");
4667 goto failed_mount_wq;
4668 }
4669 if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
4670 ext4_msg(sb, KERN_ERR, "can't mount with "
4671 "commit=%lu, fs mounted w/o journal",
4672 sbi->s_commit_interval / HZ);
4673 goto failed_mount_wq;
4674 }
4675 if (EXT4_MOUNT_DATA_FLAGS &
4676 (sbi->s_mount_opt ^ sbi->s_def_mount_opt)) {
4677 ext4_msg(sb, KERN_ERR, "can't mount with "
4678 "data=, fs mounted w/o journal");
4679 goto failed_mount_wq;
4680 }
50b29d8f 4681 sbi->s_def_mount_opt &= ~EXT4_MOUNT_JOURNAL_CHECKSUM;
1e381f60 4682 clear_opt(sb, JOURNAL_CHECKSUM);
fd8c37ec 4683 clear_opt(sb, DATA_FLAGS);
995a3ed6 4684 clear_opt2(sb, JOURNAL_FAST_COMMIT);
0390131b
FM
4685 sbi->s_journal = NULL;
4686 needs_recovery = 0;
4687 goto no_journal;
ac27a0ec
DK
4688 }
4689
e2b911c5 4690 if (ext4_has_feature_64bit(sb) &&
eb40a09c
JS
4691 !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
4692 JBD2_FEATURE_INCOMPAT_64BIT)) {
b31e1552 4693 ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
744692dc 4694 goto failed_mount_wq;
eb40a09c
JS
4695 }
4696
25ed6e8a
DW
4697 if (!set_journal_csum_feature_set(sb)) {
4698 ext4_msg(sb, KERN_ERR, "Failed to set journal checksum "
4699 "feature set");
4700 goto failed_mount_wq;
d4da6c9c 4701 }
818d276c 4702
a1e5e465
HS
4703 if (test_opt2(sb, JOURNAL_FAST_COMMIT) &&
4704 !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
4705 JBD2_FEATURE_INCOMPAT_FAST_COMMIT)) {
4706 ext4_msg(sb, KERN_ERR,
4707 "Failed to set fast commit journal feature");
4708 goto failed_mount_wq;
4709 }
4710
ac27a0ec
DK
4711 /* We have now updated the journal if required, so we can
4712 * validate the data journaling mode. */
4713 switch (test_opt(sb, DATA_FLAGS)) {
4714 case 0:
4715 /* No mode set, assume a default based on the journal
63f57933
AM
4716 * capabilities: ORDERED_DATA if the journal can
4717 * cope, else JOURNAL_DATA
4718 */
dab291af 4719 if (jbd2_journal_check_available_features
27f394a7 4720 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
fd8c37ec 4721 set_opt(sb, ORDERED_DATA);
27f394a7
TN
4722 sbi->s_def_mount_opt |= EXT4_MOUNT_ORDERED_DATA;
4723 } else {
fd8c37ec 4724 set_opt(sb, JOURNAL_DATA);
27f394a7
TN
4725 sbi->s_def_mount_opt |= EXT4_MOUNT_JOURNAL_DATA;
4726 }
ac27a0ec
DK
4727 break;
4728
617ba13b
MC
4729 case EXT4_MOUNT_ORDERED_DATA:
4730 case EXT4_MOUNT_WRITEBACK_DATA:
dab291af
MC
4731 if (!jbd2_journal_check_available_features
4732 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
b31e1552
ES
4733 ext4_msg(sb, KERN_ERR, "Journal does not support "
4734 "requested data journaling mode");
744692dc 4735 goto failed_mount_wq;
ac27a0ec 4736 }
5a150bde 4737 break;
ac27a0ec
DK
4738 default:
4739 break;
4740 }
ab04df78
JK
4741
4742 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA &&
4743 test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
4744 ext4_msg(sb, KERN_ERR, "can't mount with "
4745 "journal_async_commit in data=ordered mode");
4746 goto failed_mount_wq;
4747 }
4748
b237e304 4749 set_task_ioprio(sbi->s_journal->j_task, parsed_opts.journal_ioprio);
ac27a0ec 4750
342af94e 4751 sbi->s_journal->j_submit_inode_data_buffers =
afb585a9 4752 ext4_journal_submit_inode_data_buffers;
342af94e 4753 sbi->s_journal->j_finish_inode_data_buffers =
afb585a9 4754 ext4_journal_finish_inode_data_buffers;
18aadd47 4755
ce7e010a 4756no_journal:
cdb7ee4c
TE
4757 if (!test_opt(sb, NO_MBCACHE)) {
4758 sbi->s_ea_block_cache = ext4_xattr_create_cache();
4759 if (!sbi->s_ea_block_cache) {
dec214d0 4760 ext4_msg(sb, KERN_ERR,
cdb7ee4c 4761 "Failed to create ea_block_cache");
dec214d0
TE
4762 goto failed_mount_wq;
4763 }
cdb7ee4c
TE
4764
4765 if (ext4_has_feature_ea_inode(sb)) {
4766 sbi->s_ea_inode_cache = ext4_xattr_create_cache();
4767 if (!sbi->s_ea_inode_cache) {
4768 ext4_msg(sb, KERN_ERR,
4769 "Failed to create ea_inode_cache");
4770 goto failed_mount_wq;
4771 }
4772 }
9c191f70
M
4773 }
4774
c93d8f88
EB
4775 if (ext4_has_feature_verity(sb) && blocksize != PAGE_SIZE) {
4776 ext4_msg(sb, KERN_ERR, "Unsupported blocksize for fs-verity");
4777 goto failed_mount_wq;
4778 }
4779
bc98a42c 4780 if (DUMMY_ENCRYPTION_ENABLED(sbi) && !sb_rdonly(sb) &&
e2b911c5
DW
4781 !ext4_has_feature_encrypt(sb)) {
4782 ext4_set_feature_encrypt(sb);
4392fbc4 4783 ext4_commit_super(sb);
6ddb2447
TT
4784 }
4785
952fc18e
TT
4786 /*
4787 * Get the # of file system overhead blocks from the
4788 * superblock if present.
4789 */
4790 if (es->s_overhead_clusters)
4791 sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
4792 else {
07aa2ea1
LC
4793 err = ext4_calculate_overhead(sb);
4794 if (err)
952fc18e
TT
4795 goto failed_mount_wq;
4796 }
4797
fd89d5f2
TH
4798 /*
4799 * The maximum number of concurrent works can be high and
4800 * concurrency isn't really necessary. Limit it to 1.
4801 */
2e8fa54e
JK
4802 EXT4_SB(sb)->rsv_conversion_wq =
4803 alloc_workqueue("ext4-rsv-conversion", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
4804 if (!EXT4_SB(sb)->rsv_conversion_wq) {
4805 printk(KERN_ERR "EXT4-fs: failed to create workqueue\n");
07aa2ea1 4806 ret = -ENOMEM;
2e8fa54e
JK
4807 goto failed_mount4;
4808 }
4809
ac27a0ec 4810 /*
dab291af 4811 * The jbd2_journal_load will have done any necessary log recovery,
ac27a0ec
DK
4812 * so we can safely mount the rest of the filesystem now.
4813 */
4814
8a363970 4815 root = ext4_iget(sb, EXT4_ROOT_INO, EXT4_IGET_SPECIAL);
1d1fe1ee 4816 if (IS_ERR(root)) {
b31e1552 4817 ext4_msg(sb, KERN_ERR, "get root inode failed");
1d1fe1ee 4818 ret = PTR_ERR(root);
32a9bb57 4819 root = NULL;
ac27a0ec
DK
4820 goto failed_mount4;
4821 }
4822 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
b31e1552 4823 ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
94bf608a 4824 iput(root);
ac27a0ec
DK
4825 goto failed_mount4;
4826 }
b886ee3e 4827
48fde701 4828 sb->s_root = d_make_root(root);
1d1fe1ee 4829 if (!sb->s_root) {
b31e1552 4830 ext4_msg(sb, KERN_ERR, "get root dentry failed");
1d1fe1ee
DH
4831 ret = -ENOMEM;
4832 goto failed_mount4;
4833 }
ac27a0ec 4834
c89128a0
JK
4835 ret = ext4_setup_super(sb, es, sb_rdonly(sb));
4836 if (ret == -EROFS) {
1751e8a6 4837 sb->s_flags |= SB_RDONLY;
c89128a0
JK
4838 ret = 0;
4839 } else if (ret)
4840 goto failed_mount4a;
ef7f3835 4841
b5799018 4842 ext4_set_resv_clusters(sb);
27dd4385 4843
0f5bde1d
JK
4844 if (test_opt(sb, BLOCK_VALIDITY)) {
4845 err = ext4_setup_system_zone(sb);
4846 if (err) {
4847 ext4_msg(sb, KERN_ERR, "failed to initialize system "
4848 "zone (%d)", err);
4849 goto failed_mount4a;
4850 }
f9ae9cf5 4851 }
8016e29f 4852 ext4_fc_replay_cleanup(sb);
f9ae9cf5
TT
4853
4854 ext4_ext_init(sb);
196e402a
HS
4855
4856 /*
4857 * Enable optimize_scan if number of groups is > threshold. This can be
4858 * turned off by passing "mb_optimize_scan=0". This can also be
4859 * turned on forcefully by passing "mb_optimize_scan=1".
4860 */
4861 if (parsed_opts.mb_optimize_scan == 1)
4862 set_opt2(sb, MB_OPTIMIZE_SCAN);
4863 else if (parsed_opts.mb_optimize_scan == 0)
4864 clear_opt2(sb, MB_OPTIMIZE_SCAN);
4865 else if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD)
4866 set_opt2(sb, MB_OPTIMIZE_SCAN);
4867
f9ae9cf5
TT
4868 err = ext4_mb_init(sb);
4869 if (err) {
4870 ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
4871 err);
dcf2d804 4872 goto failed_mount5;
c2774d84
AK
4873 }
4874
027f14f5
TT
4875 /*
4876 * We can only set up the journal commit callback once
4877 * mballoc is initialized
4878 */
4879 if (sbi->s_journal)
4880 sbi->s_journal->j_commit_callback =
4881 ext4_journal_commit_callback;
4882
d5e03cbb 4883 block = ext4_count_free_clusters(sb);
666245d9 4884 ext4_free_blocks_count_set(sbi->s_es,
d5e03cbb 4885 EXT4_C2B(sbi, block));
908c7f19
TH
4886 err = percpu_counter_init(&sbi->s_freeclusters_counter, block,
4887 GFP_KERNEL);
d5e03cbb
TT
4888 if (!err) {
4889 unsigned long freei = ext4_count_free_inodes(sb);
4890 sbi->s_es->s_free_inodes_count = cpu_to_le32(freei);
908c7f19
TH
4891 err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
4892 GFP_KERNEL);
d5e03cbb 4893 }
b2bbb92f
JK
4894 /*
4895 * Update the checksum after updating free space/inode
4896 * counters. Otherwise the superblock can have an incorrect
4897 * checksum in the buffer cache until it is written out and
4898 * e2fsprogs programs trying to open a file system immediately
4899 * after it is mounted can fail.
4900 */
4901 ext4_superblock_csum_set(sb);
d5e03cbb
TT
4902 if (!err)
4903 err = percpu_counter_init(&sbi->s_dirs_counter,
908c7f19 4904 ext4_count_dirs(sb), GFP_KERNEL);
d5e03cbb 4905 if (!err)
908c7f19
TH
4906 err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0,
4907 GFP_KERNEL);
efc61345
EW
4908 if (!err)
4909 err = percpu_counter_init(&sbi->s_sra_exceeded_retry_limit, 0,
4910 GFP_KERNEL);
c8585c6f 4911 if (!err)
bbd55937 4912 err = percpu_init_rwsem(&sbi->s_writepages_rwsem);
c8585c6f 4913
d5e03cbb
TT
4914 if (err) {
4915 ext4_msg(sb, KERN_ERR, "insufficient memory");
4916 goto failed_mount6;
4917 }
4918
e2b911c5 4919 if (ext4_has_feature_flex_bg(sb))
d5e03cbb
TT
4920 if (!ext4_fill_flex_info(sb)) {
4921 ext4_msg(sb, KERN_ERR,
4922 "unable to initialize "
4923 "flex_bg meta info!");
8f6840c4 4924 ret = -ENOMEM;
d5e03cbb
TT
4925 goto failed_mount6;
4926 }
4927
bfff6873
LC
4928 err = ext4_register_li_request(sb, first_not_zeroed);
4929 if (err)
dcf2d804 4930 goto failed_mount6;
bfff6873 4931
b5799018 4932 err = ext4_register_sysfs(sb);
dcf2d804
TM
4933 if (err)
4934 goto failed_mount7;
3197ebdb 4935
02f310fc
JK
4936 err = ext4_init_orphan_info(sb);
4937 if (err)
4938 goto failed_mount8;
9b2ff357
JK
4939#ifdef CONFIG_QUOTA
4940 /* Enable quota usage during mount. */
bc98a42c 4941 if (ext4_has_feature_quota(sb) && !sb_rdonly(sb)) {
9b2ff357
JK
4942 err = ext4_enable_quotas(sb);
4943 if (err)
02f310fc 4944 goto failed_mount9;
9b2ff357
JK
4945 }
4946#endif /* CONFIG_QUOTA */
4947
bc71726c 4948 /*
4949 * Save the original bdev mapping's wb_err value which could be
4950 * used to detect the metadata async write error.
4951 */
4952 spin_lock_init(&sbi->s_bdev_wb_lock);
9704a322
ZX
4953 errseq_check_and_advance(&sb->s_bdev->bd_inode->i_mapping->wb_err,
4954 &sbi->s_bdev_wb_err);
bc71726c 4955 sb->s_bdev->bd_super = sb;
617ba13b
MC
4956 EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
4957 ext4_orphan_cleanup(sb, es);
4958 EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
0390131b 4959 if (needs_recovery) {
b31e1552 4960 ext4_msg(sb, KERN_INFO, "recovery complete");
11215630
JK
4961 err = ext4_mark_recovery_complete(sb, es);
4962 if (err)
02f310fc 4963 goto failed_mount9;
0390131b
FM
4964 }
4965 if (EXT4_SB(sb)->s_journal) {
4966 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
4967 descr = " journalled data mode";
4968 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
4969 descr = " ordered data mode";
4970 else
4971 descr = " writeback data mode";
4972 } else
4973 descr = "out journal";
4974
79add3a3
LC
4975 if (test_opt(sb, DISCARD)) {
4976 struct request_queue *q = bdev_get_queue(sb->s_bdev);
4977 if (!blk_queue_discard(q))
4978 ext4_msg(sb, KERN_WARNING,
4979 "mounting with \"discard\" option, but "
4980 "the device does not support discard");
4981 }
4982
e294a537
TT
4983 if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount"))
4984 ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
ca9b404f 4985 "Opts: %.*s%s%s. Quota mode: %s.", descr,
5aee0f8a
TT
4986 (int) sizeof(sbi->s_es->s_mount_opts),
4987 sbi->s_es->s_mount_opts,
ca9b404f
RA
4988 *sbi->s_es->s_mount_opts ? "; " : "", orig_data,
4989 ext4_quota_mode(sb));
ac27a0ec 4990
66e61a9e
TT
4991 if (es->s_error_count)
4992 mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
ac27a0ec 4993
efbed4dc
TT
4994 /* Enable message ratelimiting. Default is 10 messages per 5 secs. */
4995 ratelimit_state_init(&sbi->s_err_ratelimit_state, 5 * HZ, 10);
4996 ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10);
4997 ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10);
1cf006ed
DM
4998 atomic_set(&sbi->s_warning_count, 0);
4999 atomic_set(&sbi->s_msg_count, 0);
efbed4dc 5000
d4c402d9 5001 kfree(orig_data);
ac27a0ec
DK
5002 return 0;
5003
617ba13b 5004cantfind_ext4:
ac27a0ec 5005 if (!silent)
b31e1552 5006 ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
ac27a0ec
DK
5007 goto failed_mount;
5008
02f310fc
JK
5009failed_mount9:
5010 ext4_release_orphan_info(sb);
72ba7450 5011failed_mount8:
ebd173be 5012 ext4_unregister_sysfs(sb);
cb8d53d2 5013 kobject_put(&sbi->s_kobj);
dcf2d804
TM
5014failed_mount7:
5015 ext4_unregister_li_request(sb);
5016failed_mount6:
f9ae9cf5 5017 ext4_mb_release(sb);
7c990728
SJS
5018 rcu_read_lock();
5019 flex_groups = rcu_dereference(sbi->s_flex_groups);
5020 if (flex_groups) {
5021 for (i = 0; i < sbi->s_flex_groups_allocated; i++)
5022 kvfree(flex_groups[i]);
5023 kvfree(flex_groups);
5024 }
5025 rcu_read_unlock();
d5e03cbb
TT
5026 percpu_counter_destroy(&sbi->s_freeclusters_counter);
5027 percpu_counter_destroy(&sbi->s_freeinodes_counter);
5028 percpu_counter_destroy(&sbi->s_dirs_counter);
5029 percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
efc61345 5030 percpu_counter_destroy(&sbi->s_sra_exceeded_retry_limit);
bbd55937 5031 percpu_free_rwsem(&sbi->s_writepages_rwsem);
00764937 5032failed_mount5:
f9ae9cf5
TT
5033 ext4_ext_release(sb);
5034 ext4_release_system_zone(sb);
5035failed_mount4a:
94bf608a 5036 dput(sb->s_root);
32a9bb57 5037 sb->s_root = NULL;
94bf608a 5038failed_mount4:
b31e1552 5039 ext4_msg(sb, KERN_ERR, "mount failed");
2e8fa54e
JK
5040 if (EXT4_SB(sb)->rsv_conversion_wq)
5041 destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
4c0425ff 5042failed_mount_wq:
50c15df6
CX
5043 ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
5044 sbi->s_ea_inode_cache = NULL;
5045
5046 ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
5047 sbi->s_ea_block_cache = NULL;
5048
0390131b 5049 if (sbi->s_journal) {
bb9464e0 5050 /* flush s_error_work before journal destroy. */
5051 flush_work(&sbi->s_error_work);
0390131b
FM
5052 jbd2_journal_destroy(sbi->s_journal);
5053 sbi->s_journal = NULL;
5054 }
50460fe8 5055failed_mount3a:
d3922a77 5056 ext4_es_unregister_shrinker(sbi);
eb68d0e2 5057failed_mount3:
bb9464e0 5058 /* flush s_error_work before sbi destroy */
c92dc856 5059 flush_work(&sbi->s_error_work);
2a4ae3bc 5060 del_timer_sync(&sbi->s_err_report);
618f0031 5061 ext4_stop_mmpd(sbi);
ac27a0ec 5062failed_mount2:
1d0c3924
TT
5063 rcu_read_lock();
5064 group_desc = rcu_dereference(sbi->s_group_desc);
ac27a0ec 5065 for (i = 0; i < db_count; i++)
1d0c3924
TT
5066 brelse(group_desc[i]);
5067 kvfree(group_desc);
5068 rcu_read_unlock();
ac27a0ec 5069failed_mount:
0441984a
DW
5070 if (sbi->s_chksum_driver)
5071 crypto_free_shash(sbi->s_chksum_driver);
c83ad55e
GKB
5072
5073#ifdef CONFIG_UNICODE
f8f4acb6 5074 utf8_unload(sb->s_encoding);
c83ad55e
GKB
5075#endif
5076
ac27a0ec 5077#ifdef CONFIG_QUOTA
a2d4a646 5078 for (i = 0; i < EXT4_MAXQUOTAS; i++)
0ba33fac 5079 kfree(get_qf_name(sb, sbi, i));
ac27a0ec 5080#endif
ac4acb1f 5081 fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy);
afd09b61 5082 /* ext4_blkdev_remove() calls kill_bdev(), release bh before it. */
ac27a0ec 5083 brelse(bh);
afd09b61 5084 ext4_blkdev_remove(sbi);
ac27a0ec
DK
5085out_fail:
5086 sb->s_fs_info = NULL;
f6830165 5087 kfree(sbi->s_blockgroup_lock);
5aee0f8a 5088out_free_base:
ac27a0ec 5089 kfree(sbi);
d4c402d9 5090 kfree(orig_data);
5e405595 5091 fs_put_dax(dax_dev);
07aa2ea1 5092 return err ? err : ret;
ac27a0ec
DK
5093}
5094
5095/*
5096 * Setup any per-fs journal parameters now. We'll do this both on
5097 * initial mount, once the journal has been initialised but before we've
5098 * done any recovery; and again on any subsequent remount.
5099 */
617ba13b 5100static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
ac27a0ec 5101{
617ba13b 5102 struct ext4_sb_info *sbi = EXT4_SB(sb);
ac27a0ec 5103
30773840
TT
5104 journal->j_commit_interval = sbi->s_commit_interval;
5105 journal->j_min_batch_time = sbi->s_min_batch_time;
5106 journal->j_max_batch_time = sbi->s_max_batch_time;
6866d7b3 5107 ext4_fc_init(sb, journal);
ac27a0ec 5108
a931da6a 5109 write_lock(&journal->j_state_lock);
ac27a0ec 5110 if (test_opt(sb, BARRIER))
dab291af 5111 journal->j_flags |= JBD2_BARRIER;
ac27a0ec 5112 else
dab291af 5113 journal->j_flags &= ~JBD2_BARRIER;
5bf5683a
HK
5114 if (test_opt(sb, DATA_ERR_ABORT))
5115 journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
5116 else
5117 journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
a931da6a 5118 write_unlock(&journal->j_state_lock);
ac27a0ec
DK
5119}
5120
c6cb7e77
EW
5121static struct inode *ext4_get_journal_inode(struct super_block *sb,
5122 unsigned int journal_inum)
ac27a0ec
DK
5123{
5124 struct inode *journal_inode;
ac27a0ec 5125
c6cb7e77
EW
5126 /*
5127 * Test for the existence of a valid inode on disk. Bad things
5128 * happen if we iget() an unused inode, as the subsequent iput()
5129 * will try to delete it.
5130 */
8a363970 5131 journal_inode = ext4_iget(sb, journal_inum, EXT4_IGET_SPECIAL);
1d1fe1ee 5132 if (IS_ERR(journal_inode)) {
b31e1552 5133 ext4_msg(sb, KERN_ERR, "no journal found");
ac27a0ec
DK
5134 return NULL;
5135 }
5136 if (!journal_inode->i_nlink) {
5137 make_bad_inode(journal_inode);
5138 iput(journal_inode);
b31e1552 5139 ext4_msg(sb, KERN_ERR, "journal inode is deleted");
ac27a0ec
DK
5140 return NULL;
5141 }
5142
e5f8eab8 5143 jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
ac27a0ec 5144 journal_inode, journal_inode->i_size);
1d1fe1ee 5145 if (!S_ISREG(journal_inode->i_mode)) {
b31e1552 5146 ext4_msg(sb, KERN_ERR, "invalid journal inode");
ac27a0ec
DK
5147 iput(journal_inode);
5148 return NULL;
5149 }
c6cb7e77
EW
5150 return journal_inode;
5151}
5152
5153static journal_t *ext4_get_journal(struct super_block *sb,
5154 unsigned int journal_inum)
5155{
5156 struct inode *journal_inode;
5157 journal_t *journal;
5158
11215630
JK
5159 if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
5160 return NULL;
c6cb7e77
EW
5161
5162 journal_inode = ext4_get_journal_inode(sb, journal_inum);
5163 if (!journal_inode)
5164 return NULL;
ac27a0ec 5165
dab291af 5166 journal = jbd2_journal_init_inode(journal_inode);
ac27a0ec 5167 if (!journal) {
b31e1552 5168 ext4_msg(sb, KERN_ERR, "Could not load journal inode");
ac27a0ec
DK
5169 iput(journal_inode);
5170 return NULL;
5171 }
5172 journal->j_private = sb;
617ba13b 5173 ext4_init_journal_params(sb, journal);
ac27a0ec
DK
5174 return journal;
5175}
5176
617ba13b 5177static journal_t *ext4_get_dev_journal(struct super_block *sb,
ac27a0ec
DK
5178 dev_t j_dev)
5179{
2b2d6d01 5180 struct buffer_head *bh;
ac27a0ec 5181 journal_t *journal;
617ba13b
MC
5182 ext4_fsblk_t start;
5183 ext4_fsblk_t len;
ac27a0ec 5184 int hblock, blocksize;
617ba13b 5185 ext4_fsblk_t sb_block;
ac27a0ec 5186 unsigned long offset;
2b2d6d01 5187 struct ext4_super_block *es;
ac27a0ec
DK
5188 struct block_device *bdev;
5189
11215630
JK
5190 if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
5191 return NULL;
0390131b 5192
b31e1552 5193 bdev = ext4_blkdev_get(j_dev, sb);
ac27a0ec
DK
5194 if (bdev == NULL)
5195 return NULL;
5196
ac27a0ec 5197 blocksize = sb->s_blocksize;
e1defc4f 5198 hblock = bdev_logical_block_size(bdev);
ac27a0ec 5199 if (blocksize < hblock) {
b31e1552
ES
5200 ext4_msg(sb, KERN_ERR,
5201 "blocksize too small for journal device");
ac27a0ec
DK
5202 goto out_bdev;
5203 }
5204
617ba13b
MC
5205 sb_block = EXT4_MIN_BLOCK_SIZE / blocksize;
5206 offset = EXT4_MIN_BLOCK_SIZE % blocksize;
ac27a0ec
DK
5207 set_blocksize(bdev, blocksize);
5208 if (!(bh = __bread(bdev, sb_block, blocksize))) {
b31e1552
ES
5209 ext4_msg(sb, KERN_ERR, "couldn't read superblock of "
5210 "external journal");
ac27a0ec
DK
5211 goto out_bdev;
5212 }
5213
2716b802 5214 es = (struct ext4_super_block *) (bh->b_data + offset);
617ba13b 5215 if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
ac27a0ec 5216 !(le32_to_cpu(es->s_feature_incompat) &
617ba13b 5217 EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
b31e1552
ES
5218 ext4_msg(sb, KERN_ERR, "external journal has "
5219 "bad superblock");
ac27a0ec
DK
5220 brelse(bh);
5221 goto out_bdev;
5222 }
5223
df4763be
DW
5224 if ((le32_to_cpu(es->s_feature_ro_compat) &
5225 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
5226 es->s_checksum != ext4_superblock_csum(sb, es)) {
5227 ext4_msg(sb, KERN_ERR, "external journal has "
5228 "corrupt superblock");
5229 brelse(bh);
5230 goto out_bdev;
5231 }
5232
617ba13b 5233 if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
b31e1552 5234 ext4_msg(sb, KERN_ERR, "journal UUID does not match");
ac27a0ec
DK
5235 brelse(bh);
5236 goto out_bdev;
5237 }
5238
bd81d8ee 5239 len = ext4_blocks_count(es);
ac27a0ec
DK
5240 start = sb_block + 1;
5241 brelse(bh); /* we're done with the superblock */
5242
dab291af 5243 journal = jbd2_journal_init_dev(bdev, sb->s_bdev,
ac27a0ec
DK
5244 start, len, blocksize);
5245 if (!journal) {
b31e1552 5246 ext4_msg(sb, KERN_ERR, "failed to create device journal");
ac27a0ec
DK
5247 goto out_bdev;
5248 }
5249 journal->j_private = sb;
2d069c08 5250 if (ext4_read_bh_lock(journal->j_sb_buffer, REQ_META | REQ_PRIO, true)) {
b31e1552 5251 ext4_msg(sb, KERN_ERR, "I/O error on journal device");
ac27a0ec
DK
5252 goto out_journal;
5253 }
5254 if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
b31e1552
ES
5255 ext4_msg(sb, KERN_ERR, "External journal has more than one "
5256 "user (unsupported) - %d",
ac27a0ec
DK
5257 be32_to_cpu(journal->j_superblock->s_nr_users));
5258 goto out_journal;
5259 }
ee7ed3aa 5260 EXT4_SB(sb)->s_journal_bdev = bdev;
617ba13b 5261 ext4_init_journal_params(sb, journal);
ac27a0ec 5262 return journal;
0b8e58a1 5263
ac27a0ec 5264out_journal:
dab291af 5265 jbd2_journal_destroy(journal);
ac27a0ec 5266out_bdev:
617ba13b 5267 ext4_blkdev_put(bdev);
ac27a0ec
DK
5268 return NULL;
5269}
5270
617ba13b
MC
5271static int ext4_load_journal(struct super_block *sb,
5272 struct ext4_super_block *es,
ac27a0ec
DK
5273 unsigned long journal_devnum)
5274{
5275 journal_t *journal;
5276 unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
5277 dev_t journal_dev;
5278 int err = 0;
5279 int really_read_only;
273108fa 5280 int journal_dev_ro;
ac27a0ec 5281
11215630
JK
5282 if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
5283 return -EFSCORRUPTED;
0390131b 5284
ac27a0ec
DK
5285 if (journal_devnum &&
5286 journal_devnum != le32_to_cpu(es->s_journal_dev)) {
b31e1552
ES
5287 ext4_msg(sb, KERN_INFO, "external journal device major/minor "
5288 "numbers have changed");
ac27a0ec
DK
5289 journal_dev = new_decode_dev(journal_devnum);
5290 } else
5291 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
5292
273108fa
LC
5293 if (journal_inum && journal_dev) {
5294 ext4_msg(sb, KERN_ERR,
5295 "filesystem has both journal inode and journal device!");
5296 return -EINVAL;
5297 }
5298
5299 if (journal_inum) {
5300 journal = ext4_get_journal(sb, journal_inum);
5301 if (!journal)
5302 return -EINVAL;
5303 } else {
5304 journal = ext4_get_dev_journal(sb, journal_dev);
5305 if (!journal)
5306 return -EINVAL;
5307 }
5308
5309 journal_dev_ro = bdev_read_only(journal->j_dev);
5310 really_read_only = bdev_read_only(sb->s_bdev) | journal_dev_ro;
5311
5312 if (journal_dev_ro && !sb_rdonly(sb)) {
5313 ext4_msg(sb, KERN_ERR,
5314 "journal device read-only, try mounting with '-o ro'");
5315 err = -EROFS;
5316 goto err_out;
5317 }
ac27a0ec
DK
5318
5319 /*
5320 * Are we loading a blank journal or performing recovery after a
5321 * crash? For recovery, we need to check in advance whether we
5322 * can get read-write access to the device.
5323 */
e2b911c5 5324 if (ext4_has_feature_journal_needs_recovery(sb)) {
bc98a42c 5325 if (sb_rdonly(sb)) {
b31e1552
ES
5326 ext4_msg(sb, KERN_INFO, "INFO: recovery "
5327 "required on readonly filesystem");
ac27a0ec 5328 if (really_read_only) {
b31e1552 5329 ext4_msg(sb, KERN_ERR, "write access "
d98bf8cd
SR
5330 "unavailable, cannot proceed "
5331 "(try mounting with noload)");
273108fa
LC
5332 err = -EROFS;
5333 goto err_out;
ac27a0ec 5334 }
b31e1552
ES
5335 ext4_msg(sb, KERN_INFO, "write access will "
5336 "be enabled during recovery");
ac27a0ec
DK
5337 }
5338 }
5339
90576c0b 5340 if (!(journal->j_flags & JBD2_BARRIER))
b31e1552 5341 ext4_msg(sb, KERN_INFO, "barriers disabled");
4776004f 5342
e2b911c5 5343 if (!ext4_has_feature_journal_needs_recovery(sb))
dab291af 5344 err = jbd2_journal_wipe(journal, !really_read_only);
1c13d5c0
TT
5345 if (!err) {
5346 char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL);
5347 if (save)
5348 memcpy(save, ((char *) es) +
5349 EXT4_S_ERR_START, EXT4_S_ERR_LEN);
dab291af 5350 err = jbd2_journal_load(journal);
1c13d5c0
TT
5351 if (save)
5352 memcpy(((char *) es) + EXT4_S_ERR_START,
5353 save, EXT4_S_ERR_LEN);
5354 kfree(save);
5355 }
ac27a0ec
DK
5356
5357 if (err) {
b31e1552 5358 ext4_msg(sb, KERN_ERR, "error loading journal");
273108fa 5359 goto err_out;
ac27a0ec
DK
5360 }
5361
617ba13b 5362 EXT4_SB(sb)->s_journal = journal;
11215630
JK
5363 err = ext4_clear_journal_err(sb, es);
5364 if (err) {
5365 EXT4_SB(sb)->s_journal = NULL;
5366 jbd2_journal_destroy(journal);
5367 return err;
5368 }
ac27a0ec 5369
c41303ce 5370 if (!really_read_only && journal_devnum &&
ac27a0ec
DK
5371 journal_devnum != le32_to_cpu(es->s_journal_dev)) {
5372 es->s_journal_dev = cpu_to_le32(journal_devnum);
ac27a0ec
DK
5373
5374 /* Make sure we flush the recovery flag to disk. */
4392fbc4 5375 ext4_commit_super(sb);
ac27a0ec
DK
5376 }
5377
5378 return 0;
273108fa
LC
5379
5380err_out:
5381 jbd2_journal_destroy(journal);
5382 return err;
ac27a0ec
DK
5383}
5384
2d01ddc8
JK
5385/* Copy state of EXT4_SB(sb) into buffer for on-disk superblock */
5386static void ext4_update_super(struct super_block *sb)
ac27a0ec 5387{
c92dc856 5388 struct ext4_sb_info *sbi = EXT4_SB(sb);
e92ad03f
JK
5389 struct ext4_super_block *es = sbi->s_es;
5390 struct buffer_head *sbh = sbi->s_sbh;
a17712c8 5391
05c2c00f 5392 lock_buffer(sbh);
71290b36
TT
5393 /*
5394 * If the file system is mounted read-only, don't update the
5395 * superblock write time. This avoids updating the superblock
5396 * write time when we are mounting the root file system
5397 * read/only but we need to replay the journal; at that point,
5398 * for people who are east of GMT and who make their clock
5399 * tick in localtime for Windows bug-for-bug compatibility,
5400 * the clock is set in the future, and this will cause e2fsck
5401 * to complain and force a full file system check.
5402 */
1751e8a6 5403 if (!(sb->s_flags & SB_RDONLY))
6a0678a7 5404 ext4_update_tstamp(es, s_wtime);
8446fe92 5405 es->s_kbytes_written =
0bc9bc1d 5406 cpu_to_le64(sbi->s_kbytes_written +
8446fe92 5407 ((part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) -
0bc9bc1d 5408 sbi->s_sectors_written_start) >> 1));
e92ad03f 5409 if (percpu_counter_initialized(&sbi->s_freeclusters_counter))
d5e03cbb 5410 ext4_free_blocks_count_set(es,
e92ad03f
JK
5411 EXT4_C2B(sbi, percpu_counter_sum_positive(
5412 &sbi->s_freeclusters_counter)));
5413 if (percpu_counter_initialized(&sbi->s_freeinodes_counter))
d5e03cbb
TT
5414 es->s_free_inodes_count =
5415 cpu_to_le32(percpu_counter_sum_positive(
e92ad03f 5416 &sbi->s_freeinodes_counter));
c92dc856
JK
5417 /* Copy error information to the on-disk superblock */
5418 spin_lock(&sbi->s_error_lock);
5419 if (sbi->s_add_error_count > 0) {
5420 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
5421 if (!es->s_first_error_time && !es->s_first_error_time_hi) {
5422 __ext4_update_tstamp(&es->s_first_error_time,
5423 &es->s_first_error_time_hi,
5424 sbi->s_first_error_time);
5425 strncpy(es->s_first_error_func, sbi->s_first_error_func,
5426 sizeof(es->s_first_error_func));
5427 es->s_first_error_line =
5428 cpu_to_le32(sbi->s_first_error_line);
5429 es->s_first_error_ino =
5430 cpu_to_le32(sbi->s_first_error_ino);
5431 es->s_first_error_block =
5432 cpu_to_le64(sbi->s_first_error_block);
5433 es->s_first_error_errcode =
5434 ext4_errno_to_code(sbi->s_first_error_code);
5435 }
5436 __ext4_update_tstamp(&es->s_last_error_time,
5437 &es->s_last_error_time_hi,
5438 sbi->s_last_error_time);
5439 strncpy(es->s_last_error_func, sbi->s_last_error_func,
5440 sizeof(es->s_last_error_func));
5441 es->s_last_error_line = cpu_to_le32(sbi->s_last_error_line);
5442 es->s_last_error_ino = cpu_to_le32(sbi->s_last_error_ino);
5443 es->s_last_error_block = cpu_to_le64(sbi->s_last_error_block);
5444 es->s_last_error_errcode =
5445 ext4_errno_to_code(sbi->s_last_error_code);
5446 /*
5447 * Start the daily error reporting function if it hasn't been
5448 * started already
5449 */
5450 if (!es->s_error_count)
5451 mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ);
5452 le32_add_cpu(&es->s_error_count, sbi->s_add_error_count);
5453 sbi->s_add_error_count = 0;
5454 }
5455 spin_unlock(&sbi->s_error_lock);
5456
06db49e6 5457 ext4_superblock_csum_set(sb);
2d01ddc8
JK
5458 unlock_buffer(sbh);
5459}
5460
5461static int ext4_commit_super(struct super_block *sb)
5462{
5463 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
5464 int error = 0;
5465
f88f1466
FC
5466 if (!sbh)
5467 return -EINVAL;
5468 if (block_device_ejected(sb))
5469 return -ENODEV;
2d01ddc8
JK
5470
5471 ext4_update_super(sb);
5472
e8680786 5473 if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
4743f839
PKS
5474 /*
5475 * Oh, dear. A previous attempt to write the
5476 * superblock failed. This could happen because the
5477 * USB device was yanked out. Or it could happen to
5478 * be a transient write error and maybe the block will
5479 * be remapped. Nothing we can do but to retry the
5480 * write and hope for the best.
5481 */
5482 ext4_msg(sb, KERN_ERR, "previous I/O error to "
5483 "superblock detected");
5484 clear_buffer_write_io_error(sbh);
5485 set_buffer_uptodate(sbh);
5486 }
2d01ddc8 5487 BUFFER_TRACE(sbh, "marking dirty");
ac27a0ec 5488 mark_buffer_dirty(sbh);
4392fbc4
JK
5489 error = __sync_dirty_buffer(sbh,
5490 REQ_SYNC | (test_opt(sb, BARRIER) ? REQ_FUA : 0));
5491 if (buffer_write_io_error(sbh)) {
5492 ext4_msg(sb, KERN_ERR, "I/O error while writing "
5493 "superblock");
5494 clear_buffer_write_io_error(sbh);
5495 set_buffer_uptodate(sbh);
914258bf 5496 }
c4be0c1d 5497 return error;
ac27a0ec
DK
5498}
5499
ac27a0ec
DK
5500/*
5501 * Have we just finished recovery? If so, and if we are mounting (or
5502 * remounting) the filesystem readonly, then we will end up with a
5503 * consistent fs on disk. Record that fact.
5504 */
11215630
JK
5505static int ext4_mark_recovery_complete(struct super_block *sb,
5506 struct ext4_super_block *es)
ac27a0ec 5507{
11215630 5508 int err;
617ba13b 5509 journal_t *journal = EXT4_SB(sb)->s_journal;
ac27a0ec 5510
e2b911c5 5511 if (!ext4_has_feature_journal(sb)) {
11215630
JK
5512 if (journal != NULL) {
5513 ext4_error(sb, "Journal got removed while the fs was "
5514 "mounted!");
5515 return -EFSCORRUPTED;
5516 }
5517 return 0;
0390131b 5518 }
dab291af 5519 jbd2_journal_lock_updates(journal);
01d5d965 5520 err = jbd2_journal_flush(journal, 0);
11215630 5521 if (err < 0)
7ffe1ea8
HK
5522 goto out;
5523
02f310fc
JK
5524 if (sb_rdonly(sb) && (ext4_has_feature_journal_needs_recovery(sb) ||
5525 ext4_has_feature_orphan_present(sb))) {
5526 if (!ext4_orphan_file_empty(sb)) {
5527 ext4_error(sb, "Orphan file not empty on read-only fs.");
5528 err = -EFSCORRUPTED;
5529 goto out;
5530 }
e2b911c5 5531 ext4_clear_feature_journal_needs_recovery(sb);
02f310fc 5532 ext4_clear_feature_orphan_present(sb);
4392fbc4 5533 ext4_commit_super(sb);
ac27a0ec 5534 }
7ffe1ea8 5535out:
dab291af 5536 jbd2_journal_unlock_updates(journal);
11215630 5537 return err;
ac27a0ec
DK
5538}
5539
5540/*
5541 * If we are mounting (or read-write remounting) a filesystem whose journal
5542 * has recorded an error from a previous lifetime, move that error to the
5543 * main filesystem now.
5544 */
11215630 5545static int ext4_clear_journal_err(struct super_block *sb,
2b2d6d01 5546 struct ext4_super_block *es)
ac27a0ec
DK
5547{
5548 journal_t *journal;
5549 int j_errno;
5550 const char *errstr;
5551
11215630
JK
5552 if (!ext4_has_feature_journal(sb)) {
5553 ext4_error(sb, "Journal got removed while the fs was mounted!");
5554 return -EFSCORRUPTED;
5555 }
0390131b 5556
617ba13b 5557 journal = EXT4_SB(sb)->s_journal;
ac27a0ec
DK
5558
5559 /*
5560 * Now check for any error status which may have been recorded in the
617ba13b 5561 * journal by a prior ext4_error() or ext4_abort()
ac27a0ec
DK
5562 */
5563
dab291af 5564 j_errno = jbd2_journal_errno(journal);
ac27a0ec
DK
5565 if (j_errno) {
5566 char nbuf[16];
5567
617ba13b 5568 errstr = ext4_decode_error(sb, j_errno, nbuf);
12062ddd 5569 ext4_warning(sb, "Filesystem error recorded "
ac27a0ec 5570 "from previous mount: %s", errstr);
12062ddd 5571 ext4_warning(sb, "Marking fs in need of filesystem check.");
ac27a0ec 5572
617ba13b
MC
5573 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
5574 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
4392fbc4 5575 ext4_commit_super(sb);
ac27a0ec 5576
dab291af 5577 jbd2_journal_clear_err(journal);
d796c52e 5578 jbd2_journal_update_sb_errno(journal);
ac27a0ec 5579 }
11215630 5580 return 0;
ac27a0ec
DK
5581}
5582
5583/*
5584 * Force the running and committing transactions to commit,
5585 * and wait on the commit.
5586 */
617ba13b 5587int ext4_force_commit(struct super_block *sb)
ac27a0ec
DK
5588{
5589 journal_t *journal;
ac27a0ec 5590
bc98a42c 5591 if (sb_rdonly(sb))
ac27a0ec
DK
5592 return 0;
5593
617ba13b 5594 journal = EXT4_SB(sb)->s_journal;
b1deefc9 5595 return ext4_journal_force_commit(journal);
ac27a0ec
DK
5596}
5597
617ba13b 5598static int ext4_sync_fs(struct super_block *sb, int wait)
ac27a0ec 5599{
14ce0cb4 5600 int ret = 0;
9eddacf9 5601 tid_t target;
06a407f1 5602 bool needs_barrier = false;
8d5d02e6 5603 struct ext4_sb_info *sbi = EXT4_SB(sb);
ac27a0ec 5604
49598e04 5605 if (unlikely(ext4_forced_shutdown(sbi)))
0db1ff22
TT
5606 return 0;
5607
9bffad1e 5608 trace_ext4_sync_fs(sb, wait);
2e8fa54e 5609 flush_workqueue(sbi->rsv_conversion_wq);
a1177825
JK
5610 /*
5611 * Writeback quota in non-journalled quota case - journalled quota has
5612 * no dirty dquots
5613 */
5614 dquot_writeback_dquots(sb, -1);
06a407f1
DM
5615 /*
5616 * Data writeback is possible w/o journal transaction, so barrier must
5617 * being sent at the end of the function. But we can skip it if
5618 * transaction_commit will do it for us.
5619 */
bda32530
TT
5620 if (sbi->s_journal) {
5621 target = jbd2_get_latest_transaction(sbi->s_journal);
5622 if (wait && sbi->s_journal->j_flags & JBD2_BARRIER &&
5623 !jbd2_trans_will_send_data_barrier(sbi->s_journal, target))
5624 needs_barrier = true;
5625
5626 if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
5627 if (wait)
5628 ret = jbd2_log_wait_commit(sbi->s_journal,
5629 target);
5630 }
5631 } else if (wait && test_opt(sb, BARRIER))
06a407f1 5632 needs_barrier = true;
06a407f1
DM
5633 if (needs_barrier) {
5634 int err;
c6bf3f0e 5635 err = blkdev_issue_flush(sb->s_bdev);
06a407f1
DM
5636 if (!ret)
5637 ret = err;
0390131b 5638 }
06a407f1
DM
5639
5640 return ret;
5641}
5642
ac27a0ec
DK
5643/*
5644 * LVM calls this function before a (read-only) snapshot is created. This
5645 * gives us a chance to flush the journal completely and mark the fs clean.
be4f27d3
YY
5646 *
5647 * Note that only this function cannot bring a filesystem to be in a clean
8e8ad8a5
JK
5648 * state independently. It relies on upper layer to stop all data & metadata
5649 * modifications.
ac27a0ec 5650 */
c4be0c1d 5651static int ext4_freeze(struct super_block *sb)
ac27a0ec 5652{
c4be0c1d
TS
5653 int error = 0;
5654 journal_t *journal;
ac27a0ec 5655
bc98a42c 5656 if (sb_rdonly(sb))
9ca92389 5657 return 0;
ac27a0ec 5658
9ca92389 5659 journal = EXT4_SB(sb)->s_journal;
7ffe1ea8 5660
bb044576
TT
5661 if (journal) {
5662 /* Now we set up the journal barrier. */
5663 jbd2_journal_lock_updates(journal);
ac27a0ec 5664
bb044576
TT
5665 /*
5666 * Don't clear the needs_recovery flag if we failed to
5667 * flush the journal.
5668 */
01d5d965 5669 error = jbd2_journal_flush(journal, 0);
bb044576
TT
5670 if (error < 0)
5671 goto out;
c642dc9e
ES
5672
5673 /* Journal blocked and flushed, clear needs_recovery flag. */
e2b911c5 5674 ext4_clear_feature_journal_needs_recovery(sb);
02f310fc
JK
5675 if (ext4_orphan_file_empty(sb))
5676 ext4_clear_feature_orphan_present(sb);
bb044576 5677 }
9ca92389 5678
4392fbc4 5679 error = ext4_commit_super(sb);
6b0310fb 5680out:
bb044576
TT
5681 if (journal)
5682 /* we rely on upper layer to stop further updates */
5683 jbd2_journal_unlock_updates(journal);
6b0310fb 5684 return error;
ac27a0ec
DK
5685}
5686
5687/*
5688 * Called by LVM after the snapshot is done. We need to reset the RECOVER
5689 * flag here, even though the filesystem is not technically dirty yet.
5690 */
c4be0c1d 5691static int ext4_unfreeze(struct super_block *sb)
ac27a0ec 5692{
bc98a42c 5693 if (sb_rdonly(sb) || ext4_forced_shutdown(EXT4_SB(sb)))
9ca92389
TT
5694 return 0;
5695
c642dc9e
ES
5696 if (EXT4_SB(sb)->s_journal) {
5697 /* Reset the needs_recovery flag before the fs is unlocked. */
e2b911c5 5698 ext4_set_feature_journal_needs_recovery(sb);
02f310fc
JK
5699 if (ext4_has_feature_orphan_file(sb))
5700 ext4_set_feature_orphan_present(sb);
c642dc9e
ES
5701 }
5702
4392fbc4 5703 ext4_commit_super(sb);
c4be0c1d 5704 return 0;
ac27a0ec
DK
5705}
5706
673c6100
TT
5707/*
5708 * Structure to save mount options for ext4_remount's benefit
5709 */
5710struct ext4_mount_options {
5711 unsigned long s_mount_opt;
a2595b8a 5712 unsigned long s_mount_opt2;
08cefc7a
EB
5713 kuid_t s_resuid;
5714 kgid_t s_resgid;
673c6100
TT
5715 unsigned long s_commit_interval;
5716 u32 s_min_batch_time, s_max_batch_time;
5717#ifdef CONFIG_QUOTA
5718 int s_jquota_fmt;
a2d4a646 5719 char *s_qf_names[EXT4_MAXQUOTAS];
673c6100
TT
5720#endif
5721};
5722
2b2d6d01 5723static int ext4_remount(struct super_block *sb, int *flags, char *data)
ac27a0ec 5724{
2b2d6d01 5725 struct ext4_super_block *es;
617ba13b 5726 struct ext4_sb_info *sbi = EXT4_SB(sb);
f25391eb 5727 unsigned long old_sb_flags, vfs_flags;
617ba13b 5728 struct ext4_mount_options old_opts;
c79d967d 5729 int enable_quota = 0;
8a266467 5730 ext4_group_t g;
c5e06d10 5731 int err = 0;
ac27a0ec 5732#ifdef CONFIG_QUOTA
03dafb5f 5733 int i, j;
33458eab 5734 char *to_free[EXT4_MAXQUOTAS];
ac27a0ec 5735#endif
d4c402d9 5736 char *orig_data = kstrdup(data, GFP_KERNEL);
b237e304
HS
5737 struct ext4_parsed_options parsed_opts;
5738
5739 parsed_opts.journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
5740 parsed_opts.journal_devnum = 0;
ac27a0ec 5741
21ac738e
CX
5742 if (data && !orig_data)
5743 return -ENOMEM;
5744
ac27a0ec
DK
5745 /* Store the original options */
5746 old_sb_flags = sb->s_flags;
5747 old_opts.s_mount_opt = sbi->s_mount_opt;
a2595b8a 5748 old_opts.s_mount_opt2 = sbi->s_mount_opt2;
ac27a0ec
DK
5749 old_opts.s_resuid = sbi->s_resuid;
5750 old_opts.s_resgid = sbi->s_resgid;
5751 old_opts.s_commit_interval = sbi->s_commit_interval;
30773840
TT
5752 old_opts.s_min_batch_time = sbi->s_min_batch_time;
5753 old_opts.s_max_batch_time = sbi->s_max_batch_time;
ac27a0ec
DK
5754#ifdef CONFIG_QUOTA
5755 old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
a2d4a646 5756 for (i = 0; i < EXT4_MAXQUOTAS; i++)
03dafb5f 5757 if (sbi->s_qf_names[i]) {
33458eab
TT
5758 char *qf_name = get_qf_name(sb, sbi, i);
5759
5760 old_opts.s_qf_names[i] = kstrdup(qf_name, GFP_KERNEL);
03dafb5f
CG
5761 if (!old_opts.s_qf_names[i]) {
5762 for (j = 0; j < i; j++)
5763 kfree(old_opts.s_qf_names[j]);
3e36a163 5764 kfree(orig_data);
03dafb5f
CG
5765 return -ENOMEM;
5766 }
5767 } else
5768 old_opts.s_qf_names[i] = NULL;
ac27a0ec 5769#endif
b3881f74 5770 if (sbi->s_journal && sbi->s_journal->j_task->io_context)
b237e304
HS
5771 parsed_opts.journal_ioprio =
5772 sbi->s_journal->j_task->io_context->ioprio;
ac27a0ec 5773
f25391eb
LC
5774 /*
5775 * Some options can be enabled by ext4 and/or by VFS mount flag
5776 * either way we need to make sure it matches in both *flags and
5777 * s_flags. Copy those selected flags from *flags to s_flags
5778 */
5779 vfs_flags = SB_LAZYTIME | SB_I_VERSION;
5780 sb->s_flags = (sb->s_flags & ~vfs_flags) | (*flags & vfs_flags);
5781
b237e304 5782 if (!parse_options(data, sb, &parsed_opts, 1)) {
ac27a0ec
DK
5783 err = -EINVAL;
5784 goto restore_opts;
5785 }
5786
6b992ff2 5787 if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
c6d3d56d
DW
5788 test_opt(sb, JOURNAL_CHECKSUM)) {
5789 ext4_msg(sb, KERN_ERR, "changing journal_checksum "
2d5b86e0
ES
5790 "during remount not supported; ignoring");
5791 sbi->s_mount_opt ^= EXT4_MOUNT_JOURNAL_CHECKSUM;
6b992ff2
DW
5792 }
5793
6ae6514b
PS
5794 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
5795 if (test_opt2(sb, EXPLICIT_DELALLOC)) {
5796 ext4_msg(sb, KERN_ERR, "can't mount with "
5797 "both data=journal and delalloc");
5798 err = -EINVAL;
5799 goto restore_opts;
5800 }
5801 if (test_opt(sb, DIOREAD_NOLOCK)) {
5802 ext4_msg(sb, KERN_ERR, "can't mount with "
5803 "both data=journal and dioread_nolock");
5804 err = -EINVAL;
5805 goto restore_opts;
5806 }
ab04df78
JK
5807 } else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) {
5808 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
5809 ext4_msg(sb, KERN_ERR, "can't mount with "
5810 "journal_async_commit in data=ordered mode");
5811 err = -EINVAL;
5812 goto restore_opts;
5813 }
923ae0ff
RZ
5814 }
5815
cdb7ee4c
TE
5816 if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_NO_MBCACHE) {
5817 ext4_msg(sb, KERN_ERR, "can't enable nombcache during remount");
5818 err = -EINVAL;
5819 goto restore_opts;
5820 }
5821
9b5f6c9b 5822 if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
54d3adbc 5823 ext4_abort(sb, EXT4_ERR_ESHUTDOWN, "Abort forced by user");
ac27a0ec 5824
1751e8a6
LT
5825 sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
5826 (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
ac27a0ec
DK
5827
5828 es = sbi->s_es;
5829
b3881f74 5830 if (sbi->s_journal) {
0390131b 5831 ext4_init_journal_params(sb, sbi->s_journal);
b237e304 5832 set_task_ioprio(sbi->s_journal->j_task, parsed_opts.journal_ioprio);
b3881f74 5833 }
ac27a0ec 5834
c92dc856
JK
5835 /* Flush outstanding errors before changing fs state */
5836 flush_work(&sbi->s_error_work);
5837
1751e8a6 5838 if ((bool)(*flags & SB_RDONLY) != sb_rdonly(sb)) {
9b5f6c9b 5839 if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) {
ac27a0ec
DK
5840 err = -EROFS;
5841 goto restore_opts;
5842 }
5843
1751e8a6 5844 if (*flags & SB_RDONLY) {
38c03b34
TT
5845 err = sync_filesystem(sb);
5846 if (err < 0)
5847 goto restore_opts;
0f0dd62f
CH
5848 err = dquot_suspend(sb, -1);
5849 if (err < 0)
c79d967d 5850 goto restore_opts;
c79d967d 5851
ac27a0ec
DK
5852 /*
5853 * First of all, the unconditional stuff we have to do
5854 * to disable replay of the journal when we next remount
5855 */
1751e8a6 5856 sb->s_flags |= SB_RDONLY;
ac27a0ec
DK
5857
5858 /*
5859 * OK, test if we are remounting a valid rw partition
5860 * readonly, and if so set the rdonly flag and then
5861 * mark the partition as valid again.
5862 */
617ba13b
MC
5863 if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) &&
5864 (sbi->s_mount_state & EXT4_VALID_FS))
ac27a0ec
DK
5865 es->s_state = cpu_to_le16(sbi->s_mount_state);
5866
11215630
JK
5867 if (sbi->s_journal) {
5868 /*
5869 * We let remount-ro finish even if marking fs
5870 * as clean failed...
5871 */
0390131b 5872 ext4_mark_recovery_complete(sb, es);
11215630 5873 }
ac27a0ec 5874 } else {
a13fb1a4 5875 /* Make sure we can mount this feature set readwrite */
e2b911c5 5876 if (ext4_has_feature_readonly(sb) ||
2cb5cc8b 5877 !ext4_feature_set_ok(sb, 0)) {
ac27a0ec
DK
5878 err = -EROFS;
5879 goto restore_opts;
5880 }
8a266467
TT
5881 /*
5882 * Make sure the group descriptor checksums
0b8e58a1 5883 * are sane. If they aren't, refuse to remount r/w.
8a266467
TT
5884 */
5885 for (g = 0; g < sbi->s_groups_count; g++) {
5886 struct ext4_group_desc *gdp =
5887 ext4_get_group_desc(sb, g, NULL);
5888
feb0ab32 5889 if (!ext4_group_desc_csum_verify(sb, g, gdp)) {
b31e1552
ES
5890 ext4_msg(sb, KERN_ERR,
5891 "ext4_remount: Checksum for group %u failed (%u!=%u)",
e2b911c5 5892 g, le16_to_cpu(ext4_group_desc_csum(sb, g, gdp)),
8a266467 5893 le16_to_cpu(gdp->bg_checksum));
6a797d27 5894 err = -EFSBADCRC;
8a266467
TT
5895 goto restore_opts;
5896 }
5897 }
5898
ead6596b
ES
5899 /*
5900 * If we have an unprocessed orphan list hanging
5901 * around from a previously readonly bdev mount,
5902 * require a full umount/remount for now.
5903 */
02f310fc 5904 if (es->s_last_orphan || !ext4_orphan_file_empty(sb)) {
b31e1552 5905 ext4_msg(sb, KERN_WARNING, "Couldn't "
ead6596b
ES
5906 "remount RDWR because of unprocessed "
5907 "orphan inode list. Please "
b31e1552 5908 "umount/remount instead");
ead6596b
ES
5909 err = -EINVAL;
5910 goto restore_opts;
5911 }
5912
ac27a0ec
DK
5913 /*
5914 * Mounting a RDONLY partition read-write, so reread
5915 * and store the current valid flag. (It may have
5916 * been changed by e2fsck since we originally mounted
5917 * the partition.)
5918 */
11215630
JK
5919 if (sbi->s_journal) {
5920 err = ext4_clear_journal_err(sb, es);
5921 if (err)
5922 goto restore_opts;
5923 }
ac27a0ec 5924 sbi->s_mount_state = le16_to_cpu(es->s_state);
c89128a0
JK
5925
5926 err = ext4_setup_super(sb, es, 0);
5927 if (err)
5928 goto restore_opts;
5929
5930 sb->s_flags &= ~SB_RDONLY;
e2b911c5 5931 if (ext4_has_feature_mmp(sb))
c5e06d10
JL
5932 if (ext4_multi_mount_protect(sb,
5933 le64_to_cpu(es->s_mmp_block))) {
5934 err = -EROFS;
5935 goto restore_opts;
5936 }
c79d967d 5937 enable_quota = 1;
ac27a0ec
DK
5938 }
5939 }
bfff6873
LC
5940
5941 /*
5942 * Reinitialize lazy itable initialization thread based on
5943 * current settings
5944 */
bc98a42c 5945 if (sb_rdonly(sb) || !test_opt(sb, INIT_INODE_TABLE))
bfff6873
LC
5946 ext4_unregister_li_request(sb);
5947 else {
5948 ext4_group_t first_not_zeroed;
5949 first_not_zeroed = ext4_has_uninit_itable(sb);
5950 ext4_register_li_request(sb, first_not_zeroed);
5951 }
5952
0f5bde1d
JK
5953 /*
5954 * Handle creation of system zone data early because it can fail.
5955 * Releasing of existing data is done when we are sure remount will
5956 * succeed.
5957 */
dd0db94f 5958 if (test_opt(sb, BLOCK_VALIDITY) && !sbi->s_system_blks) {
0f5bde1d
JK
5959 err = ext4_setup_system_zone(sb);
5960 if (err)
5961 goto restore_opts;
5962 }
d176b1f6 5963
c89128a0 5964 if (sbi->s_journal == NULL && !(old_sb_flags & SB_RDONLY)) {
4392fbc4 5965 err = ext4_commit_super(sb);
c89128a0
JK
5966 if (err)
5967 goto restore_opts;
5968 }
0390131b 5969
ac27a0ec
DK
5970#ifdef CONFIG_QUOTA
5971 /* Release old quota file names */
a2d4a646 5972 for (i = 0; i < EXT4_MAXQUOTAS; i++)
03dafb5f 5973 kfree(old_opts.s_qf_names[i]);
7c319d32
AK
5974 if (enable_quota) {
5975 if (sb_any_quota_suspended(sb))
5976 dquot_resume(sb, -1);
e2b911c5 5977 else if (ext4_has_feature_quota(sb)) {
7c319d32 5978 err = ext4_enable_quotas(sb);
07724f98 5979 if (err)
7c319d32 5980 goto restore_opts;
7c319d32
AK
5981 }
5982 }
ac27a0ec 5983#endif
dd0db94f 5984 if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks)
0f5bde1d 5985 ext4_release_system_zone(sb);
d4c402d9 5986
61bb4a1c
TT
5987 if (!ext4_has_feature_mmp(sb) || sb_rdonly(sb))
5988 ext4_stop_mmpd(sbi);
5989
f25391eb
LC
5990 /*
5991 * Some options can be enabled by ext4 and/or by VFS mount flag
5992 * either way we need to make sure it matches in both *flags and
5993 * s_flags. Copy those selected flags from s_flags to *flags
5994 */
5995 *flags = (*flags & ~vfs_flags) | (sb->s_flags & vfs_flags);
d4c402d9 5996
ca9b404f
RA
5997 ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s. Quota mode: %s.",
5998 orig_data, ext4_quota_mode(sb));
d4c402d9 5999 kfree(orig_data);
ac27a0ec 6000 return 0;
0b8e58a1 6001
ac27a0ec
DK
6002restore_opts:
6003 sb->s_flags = old_sb_flags;
6004 sbi->s_mount_opt = old_opts.s_mount_opt;
a2595b8a 6005 sbi->s_mount_opt2 = old_opts.s_mount_opt2;
ac27a0ec
DK
6006 sbi->s_resuid = old_opts.s_resuid;
6007 sbi->s_resgid = old_opts.s_resgid;
6008 sbi->s_commit_interval = old_opts.s_commit_interval;
30773840
TT
6009 sbi->s_min_batch_time = old_opts.s_min_batch_time;
6010 sbi->s_max_batch_time = old_opts.s_max_batch_time;
dd0db94f 6011 if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks)
0f5bde1d 6012 ext4_release_system_zone(sb);
ac27a0ec
DK
6013#ifdef CONFIG_QUOTA
6014 sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
a2d4a646 6015 for (i = 0; i < EXT4_MAXQUOTAS; i++) {
33458eab
TT
6016 to_free[i] = get_qf_name(sb, sbi, i);
6017 rcu_assign_pointer(sbi->s_qf_names[i], old_opts.s_qf_names[i]);
ac27a0ec 6018 }
33458eab
TT
6019 synchronize_rcu();
6020 for (i = 0; i < EXT4_MAXQUOTAS; i++)
6021 kfree(to_free[i]);
ac27a0ec 6022#endif
61bb4a1c
TT
6023 if (!ext4_has_feature_mmp(sb) || sb_rdonly(sb))
6024 ext4_stop_mmpd(sbi);
d4c402d9 6025 kfree(orig_data);
ac27a0ec
DK
6026 return err;
6027}
6028
689c958c
LX
6029#ifdef CONFIG_QUOTA
6030static int ext4_statfs_project(struct super_block *sb,
6031 kprojid_t projid, struct kstatfs *buf)
6032{
6033 struct kqid qid;
6034 struct dquot *dquot;
6035 u64 limit;
6036 u64 curblock;
6037
6038 qid = make_kqid_projid(projid);
6039 dquot = dqget(sb, qid);
6040 if (IS_ERR(dquot))
6041 return PTR_ERR(dquot);
7b9ca4c6 6042 spin_lock(&dquot->dq_dqb_lock);
689c958c 6043
a08fe66e
CX
6044 limit = min_not_zero(dquot->dq_dqb.dqb_bsoftlimit,
6045 dquot->dq_dqb.dqb_bhardlimit);
57c32ea4
CX
6046 limit >>= sb->s_blocksize_bits;
6047
689c958c 6048 if (limit && buf->f_blocks > limit) {
f06925c7
KK
6049 curblock = (dquot->dq_dqb.dqb_curspace +
6050 dquot->dq_dqb.dqb_rsvspace) >> sb->s_blocksize_bits;
689c958c
LX
6051 buf->f_blocks = limit;
6052 buf->f_bfree = buf->f_bavail =
6053 (buf->f_blocks > curblock) ?
6054 (buf->f_blocks - curblock) : 0;
6055 }
6056
a08fe66e
CX
6057 limit = min_not_zero(dquot->dq_dqb.dqb_isoftlimit,
6058 dquot->dq_dqb.dqb_ihardlimit);
689c958c
LX
6059 if (limit && buf->f_files > limit) {
6060 buf->f_files = limit;
6061 buf->f_ffree =
6062 (buf->f_files > dquot->dq_dqb.dqb_curinodes) ?
6063 (buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0;
6064 }
6065
7b9ca4c6 6066 spin_unlock(&dquot->dq_dqb_lock);
689c958c
LX
6067 dqput(dquot);
6068 return 0;
6069}
6070#endif
6071
2b2d6d01 6072static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
ac27a0ec
DK
6073{
6074 struct super_block *sb = dentry->d_sb;
617ba13b
MC
6075 struct ext4_sb_info *sbi = EXT4_SB(sb);
6076 struct ext4_super_block *es = sbi->s_es;
27dd4385 6077 ext4_fsblk_t overhead = 0, resv_blocks;
d02a9391 6078 s64 bfree;
27dd4385 6079 resv_blocks = EXT4_C2B(sbi, atomic64_read(&sbi->s_resv_clusters));
ac27a0ec 6080
952fc18e
TT
6081 if (!test_opt(sb, MINIX_DF))
6082 overhead = sbi->s_overhead;
ac27a0ec 6083
617ba13b 6084 buf->f_type = EXT4_SUPER_MAGIC;
ac27a0ec 6085 buf->f_bsize = sb->s_blocksize;
b72f78cb 6086 buf->f_blocks = ext4_blocks_count(es) - EXT4_C2B(sbi, overhead);
57042651
TT
6087 bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) -
6088 percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
d02a9391 6089 /* prevent underflow in case that few free space is available */
57042651 6090 buf->f_bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0));
27dd4385
LC
6091 buf->f_bavail = buf->f_bfree -
6092 (ext4_r_blocks_count(es) + resv_blocks);
6093 if (buf->f_bfree < (ext4_r_blocks_count(es) + resv_blocks))
ac27a0ec
DK
6094 buf->f_bavail = 0;
6095 buf->f_files = le32_to_cpu(es->s_inodes_count);
52d9f3b4 6096 buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
617ba13b 6097 buf->f_namelen = EXT4_NAME_LEN;
9591c3a3 6098 buf->f_fsid = uuid_to_fsid(es->s_uuid);
0b8e58a1 6099
689c958c
LX
6100#ifdef CONFIG_QUOTA
6101 if (ext4_test_inode_flag(dentry->d_inode, EXT4_INODE_PROJINHERIT) &&
6102 sb_has_quota_limits_enabled(sb, PRJQUOTA))
6103 ext4_statfs_project(sb, EXT4_I(dentry->d_inode)->i_projid, buf);
6104#endif
ac27a0ec
DK
6105 return 0;
6106}
6107
ac27a0ec
DK
6108
6109#ifdef CONFIG_QUOTA
6110
bc8230ee
JK
6111/*
6112 * Helper functions so that transaction is started before we acquire dqio_sem
6113 * to keep correct lock ordering of transaction > dqio_sem
6114 */
ac27a0ec
DK
6115static inline struct inode *dquot_to_inode(struct dquot *dquot)
6116{
4c376dca 6117 return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type];
ac27a0ec
DK
6118}
6119
617ba13b 6120static int ext4_write_dquot(struct dquot *dquot)
ac27a0ec
DK
6121{
6122 int ret, err;
6123 handle_t *handle;
6124 struct inode *inode;
6125
6126 inode = dquot_to_inode(dquot);
9924a92a 6127 handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
0b8e58a1 6128 EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
ac27a0ec
DK
6129 if (IS_ERR(handle))
6130 return PTR_ERR(handle);
6131 ret = dquot_commit(dquot);
617ba13b 6132 err = ext4_journal_stop(handle);
ac27a0ec
DK
6133 if (!ret)
6134 ret = err;
6135 return ret;
6136}
6137
617ba13b 6138static int ext4_acquire_dquot(struct dquot *dquot)
ac27a0ec
DK
6139{
6140 int ret, err;
6141 handle_t *handle;
6142
9924a92a 6143 handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
0b8e58a1 6144 EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
ac27a0ec
DK
6145 if (IS_ERR(handle))
6146 return PTR_ERR(handle);
6147 ret = dquot_acquire(dquot);
617ba13b 6148 err = ext4_journal_stop(handle);
ac27a0ec
DK
6149 if (!ret)
6150 ret = err;
6151 return ret;
6152}
6153
617ba13b 6154static int ext4_release_dquot(struct dquot *dquot)
ac27a0ec
DK
6155{
6156 int ret, err;
6157 handle_t *handle;
6158
9924a92a 6159 handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
0b8e58a1 6160 EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
9c3013e9
JK
6161 if (IS_ERR(handle)) {
6162 /* Release dquot anyway to avoid endless cycle in dqput() */
6163 dquot_release(dquot);
ac27a0ec 6164 return PTR_ERR(handle);
9c3013e9 6165 }
ac27a0ec 6166 ret = dquot_release(dquot);
617ba13b 6167 err = ext4_journal_stop(handle);
ac27a0ec
DK
6168 if (!ret)
6169 ret = err;
6170 return ret;
6171}
6172
617ba13b 6173static int ext4_mark_dquot_dirty(struct dquot *dquot)
ac27a0ec 6174{
262b4662 6175 struct super_block *sb = dquot->dq_sb;
262b4662 6176
f177ee08 6177 if (ext4_is_quota_journalled(sb)) {
ac27a0ec 6178 dquot_mark_dquot_dirty(dquot);
617ba13b 6179 return ext4_write_dquot(dquot);
ac27a0ec
DK
6180 } else {
6181 return dquot_mark_dquot_dirty(dquot);
6182 }
6183}
6184
617ba13b 6185static int ext4_write_info(struct super_block *sb, int type)
ac27a0ec
DK
6186{
6187 int ret, err;
6188 handle_t *handle;
6189
6190 /* Data block + inode block */
2b0143b5 6191 handle = ext4_journal_start(d_inode(sb->s_root), EXT4_HT_QUOTA, 2);
ac27a0ec
DK
6192 if (IS_ERR(handle))
6193 return PTR_ERR(handle);
6194 ret = dquot_commit_info(sb, type);
617ba13b 6195 err = ext4_journal_stop(handle);
ac27a0ec
DK
6196 if (!ret)
6197 ret = err;
6198 return ret;
6199}
6200
daf647d2
TT
6201static void lockdep_set_quota_inode(struct inode *inode, int subclass)
6202{
6203 struct ext4_inode_info *ei = EXT4_I(inode);
6204
6205 /* The first argument of lockdep_set_subclass has to be
6206 * *exactly* the same as the argument to init_rwsem() --- in
6207 * this case, in init_once() --- or lockdep gets unhappy
6208 * because the name of the lock is set using the
6209 * stringification of the argument to init_rwsem().
6210 */
6211 (void) ei; /* shut up clang warning if !CONFIG_LOCKDEP */
6212 lockdep_set_subclass(&ei->i_data_sem, subclass);
6213}
6214
ac27a0ec
DK
6215/*
6216 * Standard function to be called on quota_on
6217 */
617ba13b 6218static int ext4_quota_on(struct super_block *sb, int type, int format_id,
8c54ca9c 6219 const struct path *path)
ac27a0ec
DK
6220{
6221 int err;
ac27a0ec
DK
6222
6223 if (!test_opt(sb, QUOTA))
6224 return -EINVAL;
0623543b 6225
ac27a0ec 6226 /* Quotafile not on the same filesystem? */
d8c9584e 6227 if (path->dentry->d_sb != sb)
ac27a0ec 6228 return -EXDEV;
e0770e91
JK
6229
6230 /* Quota already enabled for this file? */
6231 if (IS_NOQUOTA(d_inode(path->dentry)))
6232 return -EBUSY;
6233
0623543b
JK
6234 /* Journaling quota? */
6235 if (EXT4_SB(sb)->s_qf_names[type]) {
2b2d6d01 6236 /* Quotafile not in fs root? */
f00c9e44 6237 if (path->dentry->d_parent != sb->s_root)
b31e1552
ES
6238 ext4_msg(sb, KERN_WARNING,
6239 "Quota file not on filesystem root. "
6240 "Journaled quota will not work");
91389240
JK
6241 sb_dqopt(sb)->flags |= DQUOT_NOLIST_DIRTY;
6242 } else {
6243 /*
6244 * Clear the flag just in case mount options changed since
6245 * last time.
6246 */
6247 sb_dqopt(sb)->flags &= ~DQUOT_NOLIST_DIRTY;
2b2d6d01 6248 }
0623543b
JK
6249
6250 /*
6251 * When we journal data on quota file, we have to flush journal to see
6252 * all updates to the file when we bypass pagecache...
6253 */
0390131b 6254 if (EXT4_SB(sb)->s_journal &&
2b0143b5 6255 ext4_should_journal_data(d_inode(path->dentry))) {
0623543b
JK
6256 /*
6257 * We don't need to lock updates but journal_flush() could
6258 * otherwise be livelocked...
6259 */
6260 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
01d5d965 6261 err = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0);
0623543b 6262 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
f00c9e44 6263 if (err)
7ffe1ea8 6264 return err;
0623543b 6265 }
957153fc 6266
daf647d2
TT
6267 lockdep_set_quota_inode(path->dentry->d_inode, I_DATA_SEM_QUOTA);
6268 err = dquot_quota_on(sb, type, format_id, path);
9e80a6e5 6269 if (!err) {
957153fc
JK
6270 struct inode *inode = d_inode(path->dentry);
6271 handle_t *handle;
6272
61a92987
JK
6273 /*
6274 * Set inode flags to prevent userspace from messing with quota
6275 * files. If this fails, we return success anyway since quotas
6276 * are already enabled and this is not a hard failure.
6277 */
957153fc
JK
6278 inode_lock(inode);
6279 handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
6280 if (IS_ERR(handle))
6281 goto unlock_inode;
6282 EXT4_I(inode)->i_flags |= EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL;
6283 inode_set_flags(inode, S_NOATIME | S_IMMUTABLE,
6284 S_NOATIME | S_IMMUTABLE);
4209ae12 6285 err = ext4_mark_inode_dirty(handle, inode);
957153fc
JK
6286 ext4_journal_stop(handle);
6287 unlock_inode:
6288 inode_unlock(inode);
9e80a6e5
JK
6289 if (err)
6290 dquot_quota_off(sb, type);
957153fc 6291 }
9e80a6e5
JK
6292 if (err)
6293 lockdep_set_quota_inode(path->dentry->d_inode,
6294 I_DATA_SEM_NORMAL);
daf647d2 6295 return err;
ac27a0ec
DK
6296}
6297
7c319d32
AK
6298static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
6299 unsigned int flags)
6300{
6301 int err;
6302 struct inode *qf_inode;
a2d4a646 6303 unsigned long qf_inums[EXT4_MAXQUOTAS] = {
7c319d32 6304 le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
689c958c
LX
6305 le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
6306 le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
7c319d32
AK
6307 };
6308
e2b911c5 6309 BUG_ON(!ext4_has_feature_quota(sb));
7c319d32
AK
6310
6311 if (!qf_inums[type])
6312 return -EPERM;
6313
8a363970 6314 qf_inode = ext4_iget(sb, qf_inums[type], EXT4_IGET_SPECIAL);
7c319d32
AK
6315 if (IS_ERR(qf_inode)) {
6316 ext4_error(sb, "Bad quota inode # %lu", qf_inums[type]);
6317 return PTR_ERR(qf_inode);
6318 }
6319
bcb13850
JK
6320 /* Don't account quota for quota files to avoid recursion */
6321 qf_inode->i_flags |= S_NOQUOTA;
daf647d2 6322 lockdep_set_quota_inode(qf_inode, I_DATA_SEM_QUOTA);
7212b95e 6323 err = dquot_load_quota_inode(qf_inode, type, format_id, flags);
daf647d2
TT
6324 if (err)
6325 lockdep_set_quota_inode(qf_inode, I_DATA_SEM_NORMAL);
61157b24 6326 iput(qf_inode);
7c319d32
AK
6327
6328 return err;
6329}
6330
6331/* Enable usage tracking for all quota types. */
25c6d98f 6332int ext4_enable_quotas(struct super_block *sb)
7c319d32
AK
6333{
6334 int type, err = 0;
a2d4a646 6335 unsigned long qf_inums[EXT4_MAXQUOTAS] = {
7c319d32 6336 le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
689c958c
LX
6337 le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
6338 le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
7c319d32 6339 };
49da9392
JK
6340 bool quota_mopt[EXT4_MAXQUOTAS] = {
6341 test_opt(sb, USRQUOTA),
6342 test_opt(sb, GRPQUOTA),
6343 test_opt(sb, PRJQUOTA),
6344 };
7c319d32 6345
91389240 6346 sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY;
a2d4a646 6347 for (type = 0; type < EXT4_MAXQUOTAS; type++) {
7c319d32
AK
6348 if (qf_inums[type]) {
6349 err = ext4_quota_enable(sb, type, QFMT_VFS_V1,
49da9392
JK
6350 DQUOT_USAGE_ENABLED |
6351 (quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0));
7c319d32
AK
6352 if (err) {
6353 ext4_warning(sb,
72ba7450
TT
6354 "Failed to enable quota tracking "
6355 "(type=%d, err=%d). Please run "
6356 "e2fsck to fix.", type, err);
ca2a6dfc
JK
6357 for (type--; type >= 0; type--) {
6358 struct inode *inode;
6359
6360 inode = sb_dqopt(sb)->files[type];
6361 if (inode)
6362 inode = igrab(inode);
7f144fd0 6363 dquot_quota_off(sb, type);
ca2a6dfc
JK
6364 if (inode) {
6365 lockdep_set_quota_inode(inode,
6366 I_DATA_SEM_NORMAL);
6367 iput(inode);
6368 }
6369 }
7f144fd0 6370
7c319d32
AK
6371 return err;
6372 }
6373 }
6374 }
6375 return 0;
6376}
6377
ca0e05e4
DM
6378static int ext4_quota_off(struct super_block *sb, int type)
6379{
21f97697
JK
6380 struct inode *inode = sb_dqopt(sb)->files[type];
6381 handle_t *handle;
957153fc 6382 int err;
21f97697 6383
87009d86
DM
6384 /* Force all delayed allocation blocks to be allocated.
6385 * Caller already holds s_umount sem */
6386 if (test_opt(sb, DELALLOC))
ca0e05e4 6387 sync_filesystem(sb);
ca0e05e4 6388
957153fc 6389 if (!inode || !igrab(inode))
0b268590
AG
6390 goto out;
6391
957153fc 6392 err = dquot_quota_off(sb, type);
964edf66 6393 if (err || ext4_has_feature_quota(sb))
957153fc
JK
6394 goto out_put;
6395
6396 inode_lock(inode);
61a92987
JK
6397 /*
6398 * Update modification times of quota files when userspace can
6399 * start looking at them. If we fail, we return success anyway since
6400 * this is not a hard failure and quotas are already disabled.
6401 */
9924a92a 6402 handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
4209ae12
HS
6403 if (IS_ERR(handle)) {
6404 err = PTR_ERR(handle);
957153fc 6405 goto out_unlock;
4209ae12 6406 }
957153fc
JK
6407 EXT4_I(inode)->i_flags &= ~(EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL);
6408 inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE);
eeca7ea1 6409 inode->i_mtime = inode->i_ctime = current_time(inode);
4209ae12 6410 err = ext4_mark_inode_dirty(handle, inode);
21f97697 6411 ext4_journal_stop(handle);
957153fc
JK
6412out_unlock:
6413 inode_unlock(inode);
6414out_put:
964edf66 6415 lockdep_set_quota_inode(inode, I_DATA_SEM_NORMAL);
957153fc
JK
6416 iput(inode);
6417 return err;
21f97697 6418out:
ca0e05e4
DM
6419 return dquot_quota_off(sb, type);
6420}
6421
ac27a0ec
DK
6422/* Read data from quotafile - avoid pagecache and such because we cannot afford
6423 * acquiring the locks... As quota files are never truncated and quota code
25985edc 6424 * itself serializes the operations (and no one else should touch the files)
ac27a0ec 6425 * we don't have to be afraid of races */
617ba13b 6426static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
ac27a0ec
DK
6427 size_t len, loff_t off)
6428{
6429 struct inode *inode = sb_dqopt(sb)->files[type];
725d26d3 6430 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
ac27a0ec
DK
6431 int offset = off & (sb->s_blocksize - 1);
6432 int tocopy;
6433 size_t toread;
6434 struct buffer_head *bh;
6435 loff_t i_size = i_size_read(inode);
6436
6437 if (off > i_size)
6438 return 0;
6439 if (off+len > i_size)
6440 len = i_size-off;
6441 toread = len;
6442 while (toread > 0) {
6443 tocopy = sb->s_blocksize - offset < toread ?
6444 sb->s_blocksize - offset : toread;
1c215028
TT
6445 bh = ext4_bread(NULL, inode, blk, 0);
6446 if (IS_ERR(bh))
6447 return PTR_ERR(bh);
ac27a0ec
DK
6448 if (!bh) /* A hole? */
6449 memset(data, 0, tocopy);
6450 else
6451 memcpy(data, bh->b_data+offset, tocopy);
6452 brelse(bh);
6453 offset = 0;
6454 toread -= tocopy;
6455 data += tocopy;
6456 blk++;
6457 }
6458 return len;
6459}
6460
6461/* Write to quotafile (we know the transaction is already started and has
6462 * enough credits) */
617ba13b 6463static ssize_t ext4_quota_write(struct super_block *sb, int type,
ac27a0ec
DK
6464 const char *data, size_t len, loff_t off)
6465{
6466 struct inode *inode = sb_dqopt(sb)->files[type];
725d26d3 6467 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
4209ae12 6468 int err = 0, err2 = 0, offset = off & (sb->s_blocksize - 1);
c5e298ae 6469 int retries = 0;
ac27a0ec
DK
6470 struct buffer_head *bh;
6471 handle_t *handle = journal_current_handle();
6472
1bb2a366 6473 if (!handle) {
b31e1552
ES
6474 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
6475 " cancelled because transaction is not started",
9c3013e9
JK
6476 (unsigned long long)off, (unsigned long long)len);
6477 return -EIO;
6478 }
67eeb568
DM
6479 /*
6480 * Since we account only one data block in transaction credits,
6481 * then it is impossible to cross a block boundary.
6482 */
6483 if (sb->s_blocksize - offset < len) {
6484 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
6485 " cancelled because not block aligned",
6486 (unsigned long long)off, (unsigned long long)len);
6487 return -EIO;
6488 }
6489
c5e298ae
TT
6490 do {
6491 bh = ext4_bread(handle, inode, blk,
6492 EXT4_GET_BLOCKS_CREATE |
6493 EXT4_GET_BLOCKS_METADATA_NOFAIL);
45586c70 6494 } while (PTR_ERR(bh) == -ENOSPC &&
c5e298ae 6495 ext4_should_retry_alloc(inode->i_sb, &retries));
1c215028
TT
6496 if (IS_ERR(bh))
6497 return PTR_ERR(bh);
67eeb568
DM
6498 if (!bh)
6499 goto out;
5d601255 6500 BUFFER_TRACE(bh, "get write access");
188c299e 6501 err = ext4_journal_get_write_access(handle, sb, bh, EXT4_JTR_NONE);
62d2b5f2
JK
6502 if (err) {
6503 brelse(bh);
1c215028 6504 return err;
ac27a0ec 6505 }
67eeb568
DM
6506 lock_buffer(bh);
6507 memcpy(bh->b_data+offset, data, len);
6508 flush_dcache_page(bh->b_page);
6509 unlock_buffer(bh);
62d2b5f2 6510 err = ext4_handle_dirty_metadata(handle, NULL, bh);
67eeb568 6511 brelse(bh);
ac27a0ec 6512out:
67eeb568
DM
6513 if (inode->i_size < off + len) {
6514 i_size_write(inode, off + len);
617ba13b 6515 EXT4_I(inode)->i_disksize = inode->i_size;
4209ae12
HS
6516 err2 = ext4_mark_inode_dirty(handle, inode);
6517 if (unlikely(err2 && !err))
6518 err = err2;
ac27a0ec 6519 }
4209ae12 6520 return err ? err : len;
ac27a0ec 6521}
ac27a0ec
DK
6522#endif
6523
152a0836
AV
6524static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
6525 const char *dev_name, void *data)
ac27a0ec 6526{
152a0836 6527 return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super);
ac27a0ec
DK
6528}
6529
c290ea01 6530#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
24b58424
TT
6531static inline void register_as_ext2(void)
6532{
6533 int err = register_filesystem(&ext2_fs_type);
6534 if (err)
6535 printk(KERN_WARNING
6536 "EXT4-fs: Unable to register as ext2 (%d)\n", err);
6537}
6538
6539static inline void unregister_as_ext2(void)
6540{
6541 unregister_filesystem(&ext2_fs_type);
6542}
2035e776
TT
6543
6544static inline int ext2_feature_set_ok(struct super_block *sb)
6545{
e2b911c5 6546 if (ext4_has_unknown_ext2_incompat_features(sb))
2035e776 6547 return 0;
bc98a42c 6548 if (sb_rdonly(sb))
2035e776 6549 return 1;
e2b911c5 6550 if (ext4_has_unknown_ext2_ro_compat_features(sb))
2035e776
TT
6551 return 0;
6552 return 1;
6553}
24b58424
TT
6554#else
6555static inline void register_as_ext2(void) { }
6556static inline void unregister_as_ext2(void) { }
2035e776 6557static inline int ext2_feature_set_ok(struct super_block *sb) { return 0; }
24b58424
TT
6558#endif
6559
24b58424
TT
6560static inline void register_as_ext3(void)
6561{
6562 int err = register_filesystem(&ext3_fs_type);
6563 if (err)
6564 printk(KERN_WARNING
6565 "EXT4-fs: Unable to register as ext3 (%d)\n", err);
6566}
6567
6568static inline void unregister_as_ext3(void)
6569{
6570 unregister_filesystem(&ext3_fs_type);
6571}
2035e776
TT
6572
6573static inline int ext3_feature_set_ok(struct super_block *sb)
6574{
e2b911c5 6575 if (ext4_has_unknown_ext3_incompat_features(sb))
2035e776 6576 return 0;
e2b911c5 6577 if (!ext4_has_feature_journal(sb))
2035e776 6578 return 0;
bc98a42c 6579 if (sb_rdonly(sb))
2035e776 6580 return 1;
e2b911c5 6581 if (ext4_has_unknown_ext3_ro_compat_features(sb))
2035e776
TT
6582 return 0;
6583 return 1;
6584}
24b58424 6585
03010a33
TT
6586static struct file_system_type ext4_fs_type = {
6587 .owner = THIS_MODULE,
6588 .name = "ext4",
152a0836 6589 .mount = ext4_mount,
03010a33 6590 .kill_sb = kill_block_super,
14f3db55 6591 .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
03010a33 6592};
7f78e035 6593MODULE_ALIAS_FS("ext4");
03010a33 6594
e9e3bcec
ES
6595/* Shared across all ext4 file systems */
6596wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
e9e3bcec 6597
5dabfc78 6598static int __init ext4_init_fs(void)
ac27a0ec 6599{
e9e3bcec 6600 int i, err;
c9de560d 6601
e294a537 6602 ratelimit_state_init(&ext4_mount_msg_ratelimit, 30 * HZ, 64);
07c0c5d8 6603 ext4_li_info = NULL;
07c0c5d8 6604
9a4c8019 6605 /* Build-time check for flags consistency */
12e9b892 6606 ext4_check_flag_values();
e9e3bcec 6607
e142d052 6608 for (i = 0; i < EXT4_WQ_HASH_SZ; i++)
e9e3bcec 6609 init_waitqueue_head(&ext4__ioend_wq[i]);
e9e3bcec 6610
51865fda 6611 err = ext4_init_es();
6fd058f7
TT
6612 if (err)
6613 return err;
51865fda 6614
1dc0aa46 6615 err = ext4_init_pending();
22cfe4b4
EB
6616 if (err)
6617 goto out7;
6618
6619 err = ext4_init_post_read_processing();
1dc0aa46
EW
6620 if (err)
6621 goto out6;
6622
51865fda
ZL
6623 err = ext4_init_pageio();
6624 if (err)
b5799018 6625 goto out5;
51865fda 6626
5dabfc78 6627 err = ext4_init_system_zone();
bd2d0210 6628 if (err)
b5799018 6629 goto out4;
857ac889 6630
b5799018 6631 err = ext4_init_sysfs();
dd68314c 6632 if (err)
b5799018 6633 goto out3;
857ac889 6634
5dabfc78 6635 err = ext4_init_mballoc();
c9de560d
AT
6636 if (err)
6637 goto out2;
ac27a0ec
DK
6638 err = init_inodecache();
6639 if (err)
6640 goto out1;
aa75f4d3
HS
6641
6642 err = ext4_fc_init_dentry_cache();
6643 if (err)
6644 goto out05;
6645
24b58424 6646 register_as_ext3();
2035e776 6647 register_as_ext2();
03010a33 6648 err = register_filesystem(&ext4_fs_type);
ac27a0ec
DK
6649 if (err)
6650 goto out;
bfff6873 6651
ac27a0ec
DK
6652 return 0;
6653out:
24b58424
TT
6654 unregister_as_ext2();
6655 unregister_as_ext3();
f710b864 6656 ext4_fc_destroy_dentry_cache();
aa75f4d3 6657out05:
ac27a0ec
DK
6658 destroy_inodecache();
6659out1:
5dabfc78 6660 ext4_exit_mballoc();
9c191f70 6661out2:
b5799018
TT
6662 ext4_exit_sysfs();
6663out3:
5dabfc78 6664 ext4_exit_system_zone();
b5799018 6665out4:
5dabfc78 6666 ext4_exit_pageio();
b5799018 6667out5:
22cfe4b4 6668 ext4_exit_post_read_processing();
1dc0aa46 6669out6:
22cfe4b4
EB
6670 ext4_exit_pending();
6671out7:
51865fda
ZL
6672 ext4_exit_es();
6673
ac27a0ec
DK
6674 return err;
6675}
6676
5dabfc78 6677static void __exit ext4_exit_fs(void)
ac27a0ec 6678{
bfff6873 6679 ext4_destroy_lazyinit_thread();
24b58424
TT
6680 unregister_as_ext2();
6681 unregister_as_ext3();
03010a33 6682 unregister_filesystem(&ext4_fs_type);
f710b864 6683 ext4_fc_destroy_dentry_cache();
ac27a0ec 6684 destroy_inodecache();
5dabfc78 6685 ext4_exit_mballoc();
b5799018 6686 ext4_exit_sysfs();
5dabfc78
TT
6687 ext4_exit_system_zone();
6688 ext4_exit_pageio();
22cfe4b4 6689 ext4_exit_post_read_processing();
dd12ed14 6690 ext4_exit_es();
1dc0aa46 6691 ext4_exit_pending();
ac27a0ec
DK
6692}
6693
6694MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
83982b6f 6695MODULE_DESCRIPTION("Fourth Extended Filesystem");
ac27a0ec 6696MODULE_LICENSE("GPL");
7ef79ad5 6697MODULE_SOFTDEP("pre: crc32c");
5dabfc78
TT
6698module_init(ext4_init_fs)
6699module_exit(ext4_exit_fs)