fs/ext4/super.c

   1 /*
   2  *  linux/fs/ext4/super.c
   3  *
   4  * Copyright (C) 1992, 1993, 1994, 1995
   5  * Remy Card (card@masi.ibp.fr)
   6  * Laboratoire MASI - Institut Blaise Pascal
   7  * Universite Pierre et Marie Curie (Paris VI)
   8  *
   9  *  from
  10  *
  11  *  linux/fs/minix/inode.c
  12  *
  13  *  Copyright (C) 1991, 1992  Linus Torvalds
  14  *
  15  *  Big-endian to little-endian byte-swapping/bitmaps by
  16  *        David S. Miller (davem@caip.rutgers.edu), 1995
  17  */
  18
  19 #include <linux/module.h>
  20 #include <linux/string.h>
  21 #include <linux/fs.h>
  22 #include <linux/time.h>
  23 #include <linux/vmalloc.h>
  24 #include <linux/slab.h>
  25 #include <linux/init.h>
  26 #include <linux/blkdev.h>
  27 #include <linux/backing-dev.h>
  28 #include <linux/parser.h>
  29 #include <linux/buffer_head.h>
  30 #include <linux/exportfs.h>
  31 #include <linux/vfs.h>
  32 #include <linux/random.h>
  33 #include <linux/mount.h>
  34 #include <linux/namei.h>
  35 #include <linux/quotaops.h>
  36 #include <linux/seq_file.h>
  37 #include <linux/ctype.h>
  38 #include <linux/log2.h>
  39 #include <linux/crc16.h>
  40 #include <linux/cleancache.h>
  41 #include <linux/uaccess.h>
  42 #include <linux/user_namespace.h>
  43
  44 #include <linux/kthread.h>
  45 #include <linux/freezer.h>
  46
  47 #include "ext4.h"
  48 #include "ext4_extents.h"       /* Needed for trace points definition */
  49 #include "ext4_jbd2.h"
  50 #include "xattr.h"
  51 #include "acl.h"
  52 #include "mballoc.h"
  53
  54 #define CREATE_TRACE_POINTS
  55 #include <trace/events/ext4.h>
  56
  57 static struct ext4_lazy_init *ext4_li_info;
  58 static struct mutex ext4_li_mtx;
  59 static struct ratelimit_state ext4_mount_msg_ratelimit;
  60
  61 static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
  62                              unsigned long journal_devnum);
  63 static int ext4_show_options(struct seq_file *seq, struct dentry *root);
  64 static int ext4_commit_super(struct super_block *sb, int sync);
  65 static void ext4_mark_recovery_complete(struct super_block *sb,
  66                                         struct ext4_super_block *es);
  67 static void ext4_clear_journal_err(struct super_block *sb,
  68                                    struct ext4_super_block *es);
  69 static int ext4_sync_fs(struct super_block *sb, int wait);
  70 static int ext4_remount(struct super_block *sb, int *flags, char *data);
  71 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
  72 static int ext4_unfreeze(struct super_block *sb);
  73 static int ext4_freeze(struct super_block *sb);
  74 static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
  75                        const char *dev_name, void *data);
  76 static inline int ext2_feature_set_ok(struct super_block *sb);
  77 static inline int ext3_feature_set_ok(struct super_block *sb);
  78 static int ext4_feature_set_ok(struct super_block *sb, int readonly);
  79 static void ext4_destroy_lazyinit_thread(void);
  80 static void ext4_unregister_li_request(struct super_block *sb);
  81 static void ext4_clear_request_list(void);
  82 static struct inode *ext4_get_journal_inode(struct super_block *sb,
  83                                             unsigned int journal_inum);
  84
  85 /*
  86  * Lock ordering
  87  *
  88  * Note the difference between i_mmap_sem (EXT4_I(inode)->i_mmap_sem) and
  89  * i_mmap_rwsem (inode->i_mmap_rwsem)!
  90  *
  91  * page fault path:
  92  * mmap_sem -> sb_start_pagefault -> i_mmap_sem (r) -> transaction start ->
  93  *   page lock -> i_data_sem (rw)
  94  *
  95  * buffered write path:
  96  * sb_start_write -> i_mutex -> mmap_sem
  97  * sb_start_write -> i_mutex -> transaction start -> page lock ->
  98  *   i_data_sem (rw)
  99  *
 100  * truncate:
 101  * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (w) -> i_mmap_sem (w) ->
 102  *   i_mmap_rwsem (w) -> page lock
 103  * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (w) -> i_mmap_sem (w) ->
 104  *   transaction start -> i_data_sem (rw)
 105  *
 106  * direct IO:
 107  * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (r) -> mmap_sem
 108  * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (r) ->
 109  *   transaction start -> i_data_sem (rw)
 110  *
 111  * writepages:
 112  * transaction start -> page lock(s) -> i_data_sem (rw)
 113  */
 114
 115 static bool userns_mounts = false;
 116 module_param(userns_mounts, bool, 0644);
 117 MODULE_PARM_DESC(userns_mounts, "Allow mounts from unprivileged user namespaces");
 118
 119 #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
 120 static struct file_system_type ext2_fs_type = {
 121         .owner          = THIS_MODULE,
 122         .name           = "ext2",
 123         .mount          = ext4_mount,
 124         .kill_sb        = kill_block_super,
 125         .fs_flags       = FS_REQUIRES_DEV | FS_USERNS_MOUNT,
 126 };
 127 MODULE_ALIAS_FS("ext2");
 128 MODULE_ALIAS("ext2");
 129 #define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type)
 130 #else
 131 #define IS_EXT2_SB(sb) (0)
 132 #endif
 133
 134
 135 static struct file_system_type ext3_fs_type = {
 136         .owner          = THIS_MODULE,
 137         .name           = "ext3",
 138         .mount          = ext4_mount,
 139         .kill_sb        = kill_block_super,
 140         .fs_flags       = FS_REQUIRES_DEV | FS_USERNS_MOUNT,
 141 };
 142 MODULE_ALIAS_FS("ext3");
 143 MODULE_ALIAS("ext3");
 144 #define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type)
 145
 146 static int ext4_verify_csum_type(struct super_block *sb,
 147                                  struct ext4_super_block *es)
 148 {
 149         if (!ext4_has_feature_metadata_csum(sb))
 150                 return 1;
 151
 152         return es->s_checksum_type == EXT4_CRC32C_CHKSUM;
 153 }
 154
 155 static __le32 ext4_superblock_csum(struct super_block *sb,
 156                                    struct ext4_super_block *es)
 157 {
 158         struct ext4_sb_info *sbi = EXT4_SB(sb);
 159         int offset = offsetof(struct ext4_super_block, s_checksum);
 160         __u32 csum;
 161
 162         csum = ext4_chksum(sbi, ~0, (char *)es, offset);
 163
 164         return cpu_to_le32(csum);
 165 }
 166
 167 static int ext4_superblock_csum_verify(struct super_block *sb,
 168                                        struct ext4_super_block *es)
 169 {
 170         if (!ext4_has_metadata_csum(sb))
 171                 return 1;
 172
 173         return es->s_checksum == ext4_superblock_csum(sb, es);
 174 }
 175
 176 void ext4_superblock_csum_set(struct super_block *sb)
 177 {
 178         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 179
 180         if (!ext4_has_metadata_csum(sb))
 181                 return;
 182
 183         es->s_checksum = ext4_superblock_csum(sb, es);
 184 }
 185
 186 void *ext4_kvmalloc(size_t size, gfp_t flags)
 187 {
 188         void *ret;
 189
 190         ret = kmalloc(size, flags | __GFP_NOWARN);
 191         if (!ret)
 192                 ret = __vmalloc(size, flags, PAGE_KERNEL);
 193         return ret;
 194 }
 195
 196 void *ext4_kvzalloc(size_t size, gfp_t flags)
 197 {
 198         void *ret;
 199
 200         ret = kzalloc(size, flags | __GFP_NOWARN);
 201         if (!ret)
 202                 ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL);
 203         return ret;
 204 }
 205
 206 ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
 207                                struct ext4_group_desc *bg)
 208 {
 209         return le32_to_cpu(bg->bg_block_bitmap_lo) |
 210                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 211                  (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
 212 }
 213
 214 ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
 215                                struct ext4_group_desc *bg)
 216 {
 217         return le32_to_cpu(bg->bg_inode_bitmap_lo) |
 218                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 219                  (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
 220 }
 221
 222 ext4_fsblk_t ext4_inode_table(struct super_block *sb,
 223                               struct ext4_group_desc *bg)
 224 {
 225         return le32_to_cpu(bg->bg_inode_table_lo) |
 226                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 227                  (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
 228 }
 229
 230 __u32 ext4_free_group_clusters(struct super_block *sb,
 231                                struct ext4_group_desc *bg)
 232 {
 233         return le16_to_cpu(bg->bg_free_blocks_count_lo) |
 234                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 235                  (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
 236 }
 237
 238 __u32 ext4_free_inodes_count(struct super_block *sb,
 239                               struct ext4_group_desc *bg)
 240 {
 241         return le16_to_cpu(bg->bg_free_inodes_count_lo) |
 242                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 243                  (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
 244 }
 245
 246 __u32 ext4_used_dirs_count(struct super_block *sb,
 247                               struct ext4_group_desc *bg)
 248 {
 249         return le16_to_cpu(bg->bg_used_dirs_count_lo) |
 250                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 251                  (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
 252 }
 253
 254 __u32 ext4_itable_unused_count(struct super_block *sb,
 255                               struct ext4_group_desc *bg)
 256 {
 257         return le16_to_cpu(bg->bg_itable_unused_lo) |
 258                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 259                  (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
 260 }
 261
 262 void ext4_block_bitmap_set(struct super_block *sb,
 263                            struct ext4_group_desc *bg, ext4_fsblk_t blk)
 264 {
 265         bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk);
 266         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 267                 bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32);
 268 }
 269
 270 void ext4_inode_bitmap_set(struct super_block *sb,
 271                            struct ext4_group_desc *bg, ext4_fsblk_t blk)
 272 {
 273         bg->bg_inode_bitmap_lo  = cpu_to_le32((u32)blk);
 274         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 275                 bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32);
 276 }
 277
 278 void ext4_inode_table_set(struct super_block *sb,
 279                           struct ext4_group_desc *bg, ext4_fsblk_t blk)
 280 {
 281         bg->bg_inode_table_lo = cpu_to_le32((u32)blk);
 282         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 283                 bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
 284 }
 285
 286 void ext4_free_group_clusters_set(struct super_block *sb,
 287                                   struct ext4_group_desc *bg, __u32 count)
 288 {
 289         bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
 290         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 291                 bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16);
 292 }
 293
 294 void ext4_free_inodes_set(struct super_block *sb,
 295                           struct ext4_group_desc *bg, __u32 count)
 296 {
 297         bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count);
 298         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 299                 bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16);
 300 }
 301
 302 void ext4_used_dirs_set(struct super_block *sb,
 303                           struct ext4_group_desc *bg, __u32 count)
 304 {
 305         bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count);
 306         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 307                 bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16);
 308 }
 309
 310 void ext4_itable_unused_set(struct super_block *sb,
 311                           struct ext4_group_desc *bg, __u32 count)
 312 {
 313         bg->bg_itable_unused_lo = cpu_to_le16((__u16)count);
 314         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 315                 bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
 316 }
 317
 318
 319 static void __save_error_info(struct super_block *sb, const char *func,
 320                             unsigned int line)
 321 {
 322         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 323
 324         EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 325         if (bdev_read_only(sb->s_bdev))
 326                 return;
 327         es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
 328         es->s_last_error_time = cpu_to_le32(get_seconds());
 329         strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
 330         es->s_last_error_line = cpu_to_le32(line);
 331         if (!es->s_first_error_time) {
 332                 es->s_first_error_time = es->s_last_error_time;
 333                 strncpy(es->s_first_error_func, func,
 334                         sizeof(es->s_first_error_func));
 335                 es->s_first_error_line = cpu_to_le32(line);
 336                 es->s_first_error_ino = es->s_last_error_ino;
 337                 es->s_first_error_block = es->s_last_error_block;
 338         }
 339         /*
 340          * Start the daily error reporting function if it hasn't been
 341          * started already
 342          */
 343         if (!es->s_error_count)
 344                 mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ);
 345         le32_add_cpu(&es->s_error_count, 1);
 346 }
 347
 348 static void save_error_info(struct super_block *sb, const char *func,
 349                             unsigned int line)
 350 {
 351         __save_error_info(sb, func, line);
 352         ext4_commit_super(sb, 1);
 353 }
 354
 355 /*
 356  * The del_gendisk() function uninitializes the disk-specific data
 357  * structures, including the bdi structure, without telling anyone
 358  * else.  Once this happens, any attempt to call mark_buffer_dirty()
 359  * (for example, by ext4_commit_super), will cause a kernel OOPS.
 360  * This is a kludge to prevent these oops until we can put in a proper
 361  * hook in del_gendisk() to inform the VFS and file system layers.
 362  */
 363 static int block_device_ejected(struct super_block *sb)
 364 {
 365         struct inode *bd_inode = sb->s_bdev->bd_inode;
 366         struct backing_dev_info *bdi = inode_to_bdi(bd_inode);
 367
 368         return bdi->dev == NULL;
 369 }
 370
 371 static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
 372 {
 373         struct super_block              *sb = journal->j_private;
 374         struct ext4_sb_info             *sbi = EXT4_SB(sb);
 375         int                             error = is_journal_aborted(journal);
 376         struct ext4_journal_cb_entry    *jce;
 377
 378         BUG_ON(txn->t_state == T_FINISHED);
 379         spin_lock(&sbi->s_md_lock);
 380         while (!list_empty(&txn->t_private_list)) {
 381                 jce = list_entry(txn->t_private_list.next,
 382                                  struct ext4_journal_cb_entry, jce_list);
 383                 list_del_init(&jce->jce_list);
 384                 spin_unlock(&sbi->s_md_lock);
 385                 jce->jce_func(sb, jce, error);
 386                 spin_lock(&sbi->s_md_lock);
 387         }
 388         spin_unlock(&sbi->s_md_lock);
 389 }
 390
 391 /* Deal with the reporting of failure conditions on a filesystem such as
 392  * inconsistencies detected or read IO failures.
 393  *
 394  * On ext2, we can store the error state of the filesystem in the
 395  * superblock.  That is not possible on ext4, because we may have other
 396  * write ordering constraints on the superblock which prevent us from
 397  * writing it out straight away; and given that the journal is about to
 398  * be aborted, we can't rely on the current, or future, transactions to
 399  * write out the superblock safely.
 400  *
 401  * We'll just use the jbd2_journal_abort() error code to record an error in
 402  * the journal instead.  On recovery, the journal will complain about
 403  * that error until we've noted it down and cleared it.
 404  */
 405
 406 static void ext4_handle_error(struct super_block *sb)
 407 {
 408         if (sb->s_flags & MS_RDONLY)
 409                 return;
 410
 411         if (!test_opt(sb, ERRORS_CONT)) {
 412                 journal_t *journal = EXT4_SB(sb)->s_journal;
 413
 414                 EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
 415                 if (journal)
 416                         jbd2_journal_abort(journal, -EIO);
 417         }
 418         if (test_opt(sb, ERRORS_RO)) {
 419                 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
 420                 /*
 421                  * Make sure updated value of ->s_mount_flags will be visible
 422                  * before ->s_flags update
 423                  */
 424                 smp_wmb();
 425                 sb->s_flags |= MS_RDONLY;
 426         }
 427         if (test_opt(sb, ERRORS_PANIC)) {
 428                 if (EXT4_SB(sb)->s_journal &&
 429                   !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR))
 430                         return;
 431                 panic("EXT4-fs (device %s): panic forced after error\n",
 432                         sb->s_id);
 433         }
 434 }
 435
 436 #define ext4_error_ratelimit(sb)                                        \
 437                 ___ratelimit(&(EXT4_SB(sb)->s_err_ratelimit_state),     \
 438                              "EXT4-fs error")
 439
 440 void __ext4_error(struct super_block *sb, const char *function,
 441                   unsigned int line, const char *fmt, ...)
 442 {
 443         struct va_format vaf;
 444         va_list args;
 445
 446         if (ext4_error_ratelimit(sb)) {
 447                 va_start(args, fmt);
 448                 vaf.fmt = fmt;
 449                 vaf.va = &args;
 450                 printk(KERN_CRIT
 451                        "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
 452                        sb->s_id, function, line, current->comm, &vaf);
 453                 va_end(args);
 454         }
 455         save_error_info(sb, function, line);
 456         ext4_handle_error(sb);
 457 }
 458
 459 void __ext4_error_inode(struct inode *inode, const char *function,
 460                         unsigned int line, ext4_fsblk_t block,
 461                         const char *fmt, ...)
 462 {
 463         va_list args;
 464         struct va_format vaf;
 465         struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
 466
 467         es->s_last_error_ino = cpu_to_le32(inode->i_ino);
 468         es->s_last_error_block = cpu_to_le64(block);
 469         if (ext4_error_ratelimit(inode->i_sb)) {
 470                 va_start(args, fmt);
 471                 vaf.fmt = fmt;
 472                 vaf.va = &args;
 473                 if (block)
 474                         printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
 475                                "inode #%lu: block %llu: comm %s: %pV\n",
 476                                inode->i_sb->s_id, function, line, inode->i_ino,
 477                                block, current->comm, &vaf);
 478                 else
 479                         printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
 480                                "inode #%lu: comm %s: %pV\n",
 481                                inode->i_sb->s_id, function, line, inode->i_ino,
 482                                current->comm, &vaf);
 483                 va_end(args);
 484         }
 485         save_error_info(inode->i_sb, function, line);
 486         ext4_handle_error(inode->i_sb);
 487 }
 488
 489 void __ext4_error_file(struct file *file, const char *function,
 490                        unsigned int line, ext4_fsblk_t block,
 491                        const char *fmt, ...)
 492 {
 493         va_list args;
 494         struct va_format vaf;
 495         struct ext4_super_block *es;
 496         struct inode *inode = file_inode(file);
 497         char pathname[80], *path;
 498
 499         es = EXT4_SB(inode->i_sb)->s_es;
 500         es->s_last_error_ino = cpu_to_le32(inode->i_ino);
 501         if (ext4_error_ratelimit(inode->i_sb)) {
 502                 path = file_path(file, pathname, sizeof(pathname));
 503                 if (IS_ERR(path))
 504                         path = "(unknown)";
 505                 va_start(args, fmt);
 506                 vaf.fmt = fmt;
 507                 vaf.va = &args;
 508                 if (block)
 509                         printk(KERN_CRIT
 510                                "EXT4-fs error (device %s): %s:%d: inode #%lu: "
 511                                "block %llu: comm %s: path %s: %pV\n",
 512                                inode->i_sb->s_id, function, line, inode->i_ino,
 513                                block, current->comm, path, &vaf);
 514                 else
 515                         printk(KERN_CRIT
 516                                "EXT4-fs error (device %s): %s:%d: inode #%lu: "
 517                                "comm %s: path %s: %pV\n",
 518                                inode->i_sb->s_id, function, line, inode->i_ino,
 519                                current->comm, path, &vaf);
 520                 va_end(args);
 521         }
 522         save_error_info(inode->i_sb, function, line);
 523         ext4_handle_error(inode->i_sb);
 524 }
 525
 526 const char *ext4_decode_error(struct super_block *sb, int errno,
 527                               char nbuf[16])
 528 {
 529         char *errstr = NULL;
 530
 531         switch (errno) {
 532         case -EFSCORRUPTED:
 533                 errstr = "Corrupt filesystem";
 534                 break;
 535         case -EFSBADCRC:
 536                 errstr = "Filesystem failed CRC";
 537                 break;
 538         case -EIO:
 539                 errstr = "IO failure";
 540                 break;
 541         case -ENOMEM:
 542                 errstr = "Out of memory";
 543                 break;
 544         case -EROFS:
 545                 if (!sb || (EXT4_SB(sb)->s_journal &&
 546                             EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT))
 547                         errstr = "Journal has aborted";
 548                 else
 549                         errstr = "Readonly filesystem";
 550                 break;
 551         default:
 552                 /* If the caller passed in an extra buffer for unknown
 553                  * errors, textualise them now.  Else we just return
 554                  * NULL. */
 555                 if (nbuf) {
 556                         /* Check for truncated error codes... */
 557                         if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
 558                                 errstr = nbuf;
 559                 }
 560                 break;
 561         }
 562
 563         return errstr;
 564 }
 565
 566 /* __ext4_std_error decodes expected errors from journaling functions
 567  * automatically and invokes the appropriate error response.  */
 568
 569 void __ext4_std_error(struct super_block *sb, const char *function,
 570                       unsigned int line, int errno)
 571 {
 572         char nbuf[16];
 573         const char *errstr;
 574
 575         /* Special case: if the error is EROFS, and we're not already
 576          * inside a transaction, then there's really no point in logging
 577          * an error. */
 578         if (errno == -EROFS && journal_current_handle() == NULL &&
 579             (sb->s_flags & MS_RDONLY))
 580                 return;
 581
 582         if (ext4_error_ratelimit(sb)) {
 583                 errstr = ext4_decode_error(sb, errno, nbuf);
 584                 printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
 585                        sb->s_id, function, line, errstr);
 586         }
 587
 588         save_error_info(sb, function, line);
 589         ext4_handle_error(sb);
 590 }
 591
 592 /*
 593  * ext4_abort is a much stronger failure handler than ext4_error.  The
 594  * abort function may be used to deal with unrecoverable failures such
 595  * as journal IO errors or ENOMEM at a critical moment in log management.
 596  *
 597  * We unconditionally force the filesystem into an ABORT|READONLY state,
 598  * unless the error response on the fs has been set to panic in which
 599  * case we take the easy way out and panic immediately.
 600  */
 601
 602 void __ext4_abort(struct super_block *sb, const char *function,
 603                 unsigned int line, const char *fmt, ...)
 604 {
 605         struct va_format vaf;
 606         va_list args;
 607
 608         save_error_info(sb, function, line);
 609         va_start(args, fmt);
 610         vaf.fmt = fmt;
 611         vaf.va = &args;
 612         printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: %pV\n",
 613                sb->s_id, function, line, &vaf);
 614         va_end(args);
 615
 616         if ((sb->s_flags & MS_RDONLY) == 0) {
 617                 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
 618                 EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
 619                 /*
 620                  * Make sure updated value of ->s_mount_flags will be visible
 621                  * before ->s_flags update
 622                  */
 623                 smp_wmb();
 624                 sb->s_flags |= MS_RDONLY;
 625                 if (EXT4_SB(sb)->s_journal)
 626                         jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
 627                 save_error_info(sb, function, line);
 628         }
 629         if (test_opt(sb, ERRORS_PANIC)) {
 630                 if (EXT4_SB(sb)->s_journal &&
 631                   !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR))
 632                         return;
 633                 panic("EXT4-fs panic from previous error\n");
 634         }
 635 }
 636
 637 void __ext4_msg(struct super_block *sb,
 638                 const char *prefix, const char *fmt, ...)
 639 {
 640         struct va_format vaf;
 641         va_list args;
 642
 643         if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state), "EXT4-fs"))
 644                 return;
 645
 646         va_start(args, fmt);
 647         vaf.fmt = fmt;
 648         vaf.va = &args;
 649         printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
 650         va_end(args);
 651 }
 652
 653 #define ext4_warning_ratelimit(sb)                                      \
 654                 ___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state), \
 655                              "EXT4-fs warning")
 656
 657 void __ext4_warning(struct super_block *sb, const char *function,
 658                     unsigned int line, const char *fmt, ...)
 659 {
 660         struct va_format vaf;
 661         va_list args;
 662
 663         if (!ext4_warning_ratelimit(sb))
 664                 return;
 665
 666         va_start(args, fmt);
 667         vaf.fmt = fmt;
 668         vaf.va = &args;
 669         printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n",
 670                sb->s_id, function, line, &vaf);
 671         va_end(args);
 672 }
 673
 674 void __ext4_warning_inode(const struct inode *inode, const char *function,
 675                           unsigned int line, const char *fmt, ...)
 676 {
 677         struct va_format vaf;
 678         va_list args;
 679
 680         if (!ext4_warning_ratelimit(inode->i_sb))
 681                 return;
 682
 683         va_start(args, fmt);
 684         vaf.fmt = fmt;
 685         vaf.va = &args;
 686         printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: "
 687                "inode #%lu: comm %s: %pV\n", inode->i_sb->s_id,
 688                function, line, inode->i_ino, current->comm, &vaf);
 689         va_end(args);
 690 }
 691
 692 void __ext4_grp_locked_error(const char *function, unsigned int line,
 693                              struct super_block *sb, ext4_group_t grp,
 694                              unsigned long ino, ext4_fsblk_t block,
 695                              const char *fmt, ...)
 696 __releases(bitlock)
 697 __acquires(bitlock)
 698 {
 699         struct va_format vaf;
 700         va_list args;
 701         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 702
 703         es->s_last_error_ino = cpu_to_le32(ino);
 704         es->s_last_error_block = cpu_to_le64(block);
 705         __save_error_info(sb, function, line);
 706
 707         if (ext4_error_ratelimit(sb)) {
 708                 va_start(args, fmt);
 709                 vaf.fmt = fmt;
 710                 vaf.va = &args;
 711                 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
 712                        sb->s_id, function, line, grp);
 713                 if (ino)
 714                         printk(KERN_CONT "inode %lu: ", ino);
 715                 if (block)
 716                         printk(KERN_CONT "block %llu:",
 717                                (unsigned long long) block);
 718                 printk(KERN_CONT "%pV\n", &vaf);
 719                 va_end(args);
 720         }
 721
 722         if (test_opt(sb, ERRORS_CONT)) {
 723                 ext4_commit_super(sb, 0);
 724                 return;
 725         }
 726
 727         ext4_unlock_group(sb, grp);
 728         ext4_handle_error(sb);
 729         /*
 730          * We only get here in the ERRORS_RO case; relocking the group
 731          * may be dangerous, but nothing bad will happen since the
 732          * filesystem will have already been marked read/only and the
 733          * journal has been aborted.  We return 1 as a hint to callers
 734          * who might what to use the return value from
 735          * ext4_grp_locked_error() to distinguish between the
 736          * ERRORS_CONT and ERRORS_RO case, and perhaps return more
 737          * aggressively from the ext4 function in question, with a
 738          * more appropriate error code.
 739          */
 740         ext4_lock_group(sb, grp);
 741         return;
 742 }
 743
 744 void ext4_update_dynamic_rev(struct super_block *sb)
 745 {
 746         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 747
 748         if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
 749                 return;
 750
 751         ext4_warning(sb,
 752                      "updating to rev %d because of new feature flag, "
 753                      "running e2fsck is recommended",
 754                      EXT4_DYNAMIC_REV);
 755
 756         es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO);
 757         es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE);
 758         es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV);
 759         /* leave es->s_feature_*compat flags alone */
 760         /* es->s_uuid will be set by e2fsck if empty */
 761
 762         /*
 763          * The rest of the superblock fields should be zero, and if not it
 764          * means they are likely already in use, so leave them alone.  We
 765          * can leave it up to e2fsck to clean up any inconsistencies there.
 766          */
 767 }
 768
 769 /*
 770  * Open the external journal device
 771  */
 772 static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
 773 {
 774         struct block_device *bdev;
 775         char b[BDEVNAME_SIZE];
 776
 777         bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb);
 778         if (IS_ERR(bdev))
 779                 goto fail;
 780         return bdev;
 781
 782 fail:
 783         ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld",
 784                         __bdevname(dev, b), PTR_ERR(bdev));
 785         return NULL;
 786 }
 787
 788 /*
 789  * Release the journal device
 790  */
 791 static void ext4_blkdev_put(struct block_device *bdev)
 792 {
 793         blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
 794 }
 795
 796 static void ext4_blkdev_remove(struct ext4_sb_info *sbi)
 797 {
 798         struct block_device *bdev;
 799         bdev = sbi->journal_bdev;
 800         if (bdev) {
 801                 ext4_blkdev_put(bdev);
 802                 sbi->journal_bdev = NULL;
 803         }
 804 }
 805
 806 static inline struct inode *orphan_list_entry(struct list_head *l)
 807 {
 808         return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
 809 }
 810
 811 static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
 812 {
 813         struct list_head *l;
 814
 815         ext4_msg(sb, KERN_ERR, "sb orphan head is %d",
 816                  le32_to_cpu(sbi->s_es->s_last_orphan));
 817
 818         printk(KERN_ERR "sb_info orphan list:\n");
 819         list_for_each(l, &sbi->s_orphan) {
 820                 struct inode *inode = orphan_list_entry(l);
 821                 printk(KERN_ERR "  "
 822                        "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
 823                        inode->i_sb->s_id, inode->i_ino, inode,
 824                        inode->i_mode, inode->i_nlink,
 825                        NEXT_ORPHAN(inode));
 826         }
 827 }
 828
 829 static void ext4_put_super(struct super_block *sb)
 830 {
 831         struct ext4_sb_info *sbi = EXT4_SB(sb);
 832         struct ext4_super_block *es = sbi->s_es;
 833         int aborted = 0;
 834         int i, err;
 835
 836         ext4_unregister_li_request(sb);
 837         dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
 838
 839         flush_workqueue(sbi->rsv_conversion_wq);
 840         destroy_workqueue(sbi->rsv_conversion_wq);
 841
 842         if (sbi->s_journal) {
 843                 aborted = is_journal_aborted(sbi->s_journal);
 844                 err = jbd2_journal_destroy(sbi->s_journal);
 845                 sbi->s_journal = NULL;
 846                 if ((err < 0) && !aborted)
 847                         ext4_abort(sb, "Couldn't clean up the journal");
 848         }
 849
 850         ext4_unregister_sysfs(sb);
 851         ext4_es_unregister_shrinker(sbi);
 852         del_timer_sync(&sbi->s_err_report);
 853         ext4_release_system_zone(sb);
 854         ext4_mb_release(sb);
 855         ext4_ext_release(sb);
 856
 857         if (!(sb->s_flags & MS_RDONLY) && !aborted) {
 858                 ext4_clear_feature_journal_needs_recovery(sb);
 859                 es->s_state = cpu_to_le16(sbi->s_mount_state);
 860         }
 861         if (!(sb->s_flags & MS_RDONLY))
 862                 ext4_commit_super(sb, 1);
 863
 864         for (i = 0; i < sbi->s_gdb_count; i++)
 865                 brelse(sbi->s_group_desc[i]);
 866         kvfree(sbi->s_group_desc);
 867         kvfree(sbi->s_flex_groups);
 868         percpu_counter_destroy(&sbi->s_freeclusters_counter);
 869         percpu_counter_destroy(&sbi->s_freeinodes_counter);
 870         percpu_counter_destroy(&sbi->s_dirs_counter);
 871         percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
 872         percpu_free_rwsem(&sbi->s_journal_flag_rwsem);
 873 #ifdef CONFIG_QUOTA
 874         for (i = 0; i < EXT4_MAXQUOTAS; i++)
 875                 kfree(sbi->s_qf_names[i]);
 876 #endif
 877
 878         /* Debugging code just in case the in-memory inode orphan list
 879          * isn't empty.  The on-disk one can be non-empty if we've
 880          * detected an error and taken the fs readonly, but the
 881          * in-memory list had better be clean by this point. */
 882         if (!list_empty(&sbi->s_orphan))
 883                 dump_orphan_list(sb, sbi);
 884         J_ASSERT(list_empty(&sbi->s_orphan));
 885
 886         sync_blockdev(sb->s_bdev);
 887         invalidate_bdev(sb->s_bdev);
 888         if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
 889                 /*
 890                  * Invalidate the journal device's buffers.  We don't want them
 891                  * floating about in memory - the physical journal device may
 892                  * hotswapped, and it breaks the `ro-after' testing code.
 893                  */
 894                 sync_blockdev(sbi->journal_bdev);
 895                 invalidate_bdev(sbi->journal_bdev);
 896                 ext4_blkdev_remove(sbi);
 897         }
 898         if (sbi->s_mb_cache) {
 899                 ext4_xattr_destroy_cache(sbi->s_mb_cache);
 900                 sbi->s_mb_cache = NULL;
 901         }
 902         if (sbi->s_mmp_tsk)
 903                 kthread_stop(sbi->s_mmp_tsk);
 904         brelse(sbi->s_sbh);
 905         sb->s_fs_info = NULL;
 906         /*
 907          * Now that we are completely done shutting down the
 908          * superblock, we need to actually destroy the kobject.
 909          */
 910         kobject_put(&sbi->s_kobj);
 911         wait_for_completion(&sbi->s_kobj_unregister);
 912         if (sbi->s_chksum_driver)
 913                 crypto_free_shash(sbi->s_chksum_driver);
 914         kfree(sbi->s_blockgroup_lock);
 915         kfree(sbi);
 916 }
 917
 918 static struct kmem_cache *ext4_inode_cachep;
 919
 920 /*
 921  * Called inside transaction, so use GFP_NOFS
 922  */
 923 static struct inode *ext4_alloc_inode(struct super_block *sb)
 924 {
 925         struct ext4_inode_info *ei;
 926
 927         ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
 928         if (!ei)
 929                 return NULL;
 930
 931         ei->vfs_inode.i_version = 1;
 932         spin_lock_init(&ei->i_raw_lock);
 933         INIT_LIST_HEAD(&ei->i_prealloc_list);
 934         spin_lock_init(&ei->i_prealloc_lock);
 935         ext4_es_init_tree(&ei->i_es_tree);
 936         rwlock_init(&ei->i_es_lock);
 937         INIT_LIST_HEAD(&ei->i_es_list);
 938         ei->i_es_all_nr = 0;
 939         ei->i_es_shk_nr = 0;
 940         ei->i_es_shrink_lblk = 0;
 941         ei->i_reserved_data_blocks = 0;
 942         ei->i_reserved_meta_blocks = 0;
 943         ei->i_allocated_meta_blocks = 0;
 944         ei->i_da_metadata_calc_len = 0;
 945         ei->i_da_metadata_calc_last_lblock = 0;
 946         spin_lock_init(&(ei->i_block_reservation_lock));
 947 #ifdef CONFIG_QUOTA
 948         ei->i_reserved_quota = 0;
 949         memset(&ei->i_dquot, 0, sizeof(ei->i_dquot));
 950 #endif
 951         ei->jinode = NULL;
 952         INIT_LIST_HEAD(&ei->i_rsv_conversion_list);
 953         spin_lock_init(&ei->i_completed_io_lock);
 954         ei->i_sync_tid = 0;
 955         ei->i_datasync_tid = 0;
 956         atomic_set(&ei->i_unwritten, 0);
 957         INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
 958         return &ei->vfs_inode;
 959 }
 960
 961 static int ext4_drop_inode(struct inode *inode)
 962 {
 963         int drop = generic_drop_inode(inode);
 964
 965         trace_ext4_drop_inode(inode, drop);
 966         return drop;
 967 }
 968
 969 static void ext4_i_callback(struct rcu_head *head)
 970 {
 971         struct inode *inode = container_of(head, struct inode, i_rcu);
 972         kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
 973 }
 974
 975 static void ext4_destroy_inode(struct inode *inode)
 976 {
 977         if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
 978                 ext4_msg(inode->i_sb, KERN_ERR,
 979                          "Inode %lu (%p): orphan list check failed!",
 980                          inode->i_ino, EXT4_I(inode));
 981                 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
 982                                 EXT4_I(inode), sizeof(struct ext4_inode_info),
 983                                 true);
 984                 dump_stack();
 985         }
 986         call_rcu(&inode->i_rcu, ext4_i_callback);
 987 }
 988
 989 static void init_once(void *foo)
 990 {
 991         struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
 992
 993         INIT_LIST_HEAD(&ei->i_orphan);
 994         init_rwsem(&ei->xattr_sem);
 995         init_rwsem(&ei->i_data_sem);
 996         init_rwsem(&ei->i_mmap_sem);
 997         inode_init_once(&ei->vfs_inode);
 998 }
 999
1000 static int __init init_inodecache(void)
1001 {
1002         ext4_inode_cachep = kmem_cache_create("ext4_inode_cache",
1003                                              sizeof(struct ext4_inode_info),
1004                                              0, (SLAB_RECLAIM_ACCOUNT|
1005                                                 SLAB_MEM_SPREAD|SLAB_ACCOUNT),
1006                                              init_once);
1007         if (ext4_inode_cachep == NULL)
1008                 return -ENOMEM;
1009         return 0;
1010 }
1011
1012 static void destroy_inodecache(void)
1013 {
1014         /*
1015          * Make sure all delayed rcu free inodes are flushed before we
1016          * destroy cache.
1017          */
1018         rcu_barrier();
1019         kmem_cache_destroy(ext4_inode_cachep);
1020 }
1021
1022 void ext4_clear_inode(struct inode *inode)
1023 {
1024         invalidate_inode_buffers(inode);
1025         clear_inode(inode);
1026         dquot_drop(inode);
1027         ext4_discard_preallocations(inode);
1028         ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
1029         if (EXT4_I(inode)->jinode) {
1030                 jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
1031                                                EXT4_I(inode)->jinode);
1032                 jbd2_free_inode(EXT4_I(inode)->jinode);
1033                 EXT4_I(inode)->jinode = NULL;
1034         }
1035 #ifdef CONFIG_EXT4_FS_ENCRYPTION
1036         fscrypt_put_encryption_info(inode, NULL);
1037 #endif
1038 }
1039
1040 static struct inode *ext4_nfs_get_inode(struct super_block *sb,
1041                                         u64 ino, u32 generation)
1042 {
1043         struct inode *inode;
1044
1045         if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)
1046                 return ERR_PTR(-ESTALE);
1047         if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))
1048                 return ERR_PTR(-ESTALE);
1049
1050         /* iget isn't really right if the inode is currently unallocated!!
1051          *
1052          * ext4_read_inode will return a bad_inode if the inode had been
1053          * deleted, so we should be safe.
1054          *
1055          * Currently we don't know the generation for parent directory, so
1056          * a generation of 0 means "accept any"
1057          */
1058         inode = ext4_iget_normal(sb, ino);
1059         if (IS_ERR(inode))
1060                 return ERR_CAST(inode);
1061         if (generation && inode->i_generation != generation) {
1062                 iput(inode);
1063                 return ERR_PTR(-ESTALE);
1064         }
1065
1066         return inode;
1067 }
1068
1069 static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid,
1070                                         int fh_len, int fh_type)
1071 {
1072         return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
1073                                     ext4_nfs_get_inode);
1074 }
1075
1076 static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
1077                                         int fh_len, int fh_type)
1078 {
1079         return generic_fh_to_parent(sb, fid, fh_len, fh_type,
1080                                     ext4_nfs_get_inode);
1081 }
1082
1083 /*
1084  * Try to release metadata pages (indirect blocks, directories) which are
1085  * mapped via the block device.  Since these pages could have journal heads
1086  * which would prevent try_to_free_buffers() from freeing them, we must use
1087  * jbd2 layer's try_to_free_buffers() function to release them.
1088  */
1089 static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
1090                                  gfp_t wait)
1091 {
1092         journal_t *journal = EXT4_SB(sb)->s_journal;
1093
1094         WARN_ON(PageChecked(page));
1095         if (!page_has_buffers(page))
1096                 return 0;
1097         if (journal)
1098                 return jbd2_journal_try_to_free_buffers(journal, page,
1099                                                 wait & ~__GFP_DIRECT_RECLAIM);
1100         return try_to_free_buffers(page);
1101 }
1102
1103 #ifdef CONFIG_EXT4_FS_ENCRYPTION
1104 static int ext4_get_context(struct inode *inode, void *ctx, size_t len)
1105 {
1106         return ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION,
1107                                  EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx, len);
1108 }
1109
1110 static int ext4_key_prefix(struct inode *inode, u8 **key)
1111 {
1112         *key = EXT4_SB(inode->i_sb)->key_prefix;
1113         return EXT4_SB(inode->i_sb)->key_prefix_size;
1114 }
1115
1116 static int ext4_prepare_context(struct inode *inode)
1117 {
1118         return ext4_convert_inline_data(inode);
1119 }
1120
1121 static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
1122                                                         void *fs_data)
1123 {
1124         handle_t *handle = fs_data;
1125         int res, res2, retries = 0;
1126
1127         /*
1128          * If a journal handle was specified, then the encryption context is
1129          * being set on a new inode via inheritance and is part of a larger
1130          * transaction to create the inode.  Otherwise the encryption context is
1131          * being set on an existing inode in its own transaction.  Only in the
1132          * latter case should the "retry on ENOSPC" logic be used.
1133          */
1134
1135         if (handle) {
1136                 res = ext4_xattr_set_handle(handle, inode,
1137                                             EXT4_XATTR_INDEX_ENCRYPTION,
1138                                             EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
1139                                             ctx, len, 0);
1140                 if (!res) {
1141                         ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
1142                         ext4_clear_inode_state(inode,
1143                                         EXT4_STATE_MAY_INLINE_DATA);
1144                         /*
1145                          * Update inode->i_flags - e.g. S_DAX may get disabled
1146                          */
1147                         ext4_set_inode_flags(inode);
1148                 }
1149                 return res;
1150         }
1151
1152 retry:
1153         handle = ext4_journal_start(inode, EXT4_HT_MISC,
1154                         ext4_jbd2_credits_xattr(inode));
1155         if (IS_ERR(handle))
1156                 return PTR_ERR(handle);
1157
1158         res = ext4_xattr_set_handle(handle, inode, EXT4_XATTR_INDEX_ENCRYPTION,
1159                                     EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
1160                                     ctx, len, 0);
1161         if (!res) {
1162                 ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
1163                 /* Update inode->i_flags - e.g. S_DAX may get disabled */
1164                 ext4_set_inode_flags(inode);
1165                 res = ext4_mark_inode_dirty(handle, inode);
1166                 if (res)
1167                         EXT4_ERROR_INODE(inode, "Failed to mark inode dirty");
1168         }
1169         res2 = ext4_journal_stop(handle);
1170
1171         if (res == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
1172                 goto retry;
1173         if (!res)
1174                 res = res2;
1175         return res;
1176 }
1177
1178 static int ext4_dummy_context(struct inode *inode)
1179 {
1180         return DUMMY_ENCRYPTION_ENABLED(EXT4_SB(inode->i_sb));
1181 }
1182
1183 static unsigned ext4_max_namelen(struct inode *inode)
1184 {
1185         return S_ISLNK(inode->i_mode) ? inode->i_sb->s_blocksize :
1186                 EXT4_NAME_LEN;
1187 }
1188
1189 static struct fscrypt_operations ext4_cryptops = {
1190         .get_context            = ext4_get_context,
1191         .key_prefix             = ext4_key_prefix,
1192         .prepare_context        = ext4_prepare_context,
1193         .set_context            = ext4_set_context,
1194         .dummy_context          = ext4_dummy_context,
1195         .is_encrypted           = ext4_encrypted_inode,
1196         .empty_dir              = ext4_empty_dir,
1197         .max_namelen            = ext4_max_namelen,
1198 };
1199 #else
1200 static struct fscrypt_operations ext4_cryptops = {
1201         .is_encrypted           = ext4_encrypted_inode,
1202 };
1203 #endif
1204
1205 #ifdef CONFIG_QUOTA
1206 static char *quotatypes[] = INITQFNAMES;
1207 #define QTYPE2NAME(t) (quotatypes[t])
1208
1209 static int ext4_write_dquot(struct dquot *dquot);
1210 static int ext4_acquire_dquot(struct dquot *dquot);
1211 static int ext4_release_dquot(struct dquot *dquot);
1212 static int ext4_mark_dquot_dirty(struct dquot *dquot);
1213 static int ext4_write_info(struct super_block *sb, int type);
1214 static int ext4_quota_on(struct super_block *sb, int type, int format_id,
1215                          const struct path *path);
1216 static int ext4_quota_off(struct super_block *sb, int type);
1217 static int ext4_quota_on_mount(struct super_block *sb, int type);
1218 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
1219                                size_t len, loff_t off);
1220 static ssize_t ext4_quota_write(struct super_block *sb, int type,
1221                                 const char *data, size_t len, loff_t off);
1222 static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
1223                              unsigned int flags);
1224 static int ext4_enable_quotas(struct super_block *sb);
1225 static int ext4_get_next_id(struct super_block *sb, struct kqid *qid);
1226
1227 static struct dquot **ext4_get_dquots(struct inode *inode)
1228 {
1229         return EXT4_I(inode)->i_dquot;
1230 }
1231
1232 static const struct dquot_operations ext4_quota_operations = {
1233         .get_reserved_space = ext4_get_reserved_space,
1234         .write_dquot    = ext4_write_dquot,
1235         .acquire_dquot  = ext4_acquire_dquot,
1236         .release_dquot  = ext4_release_dquot,
1237         .mark_dirty     = ext4_mark_dquot_dirty,
1238         .write_info     = ext4_write_info,
1239         .alloc_dquot    = dquot_alloc,
1240         .destroy_dquot  = dquot_destroy,
1241         .get_projid     = ext4_get_projid,
1242         .get_next_id    = ext4_get_next_id,
1243 };
1244
1245 static const struct quotactl_ops ext4_qctl_operations = {
1246         .quota_on       = ext4_quota_on,
1247         .quota_off      = ext4_quota_off,
1248         .quota_sync     = dquot_quota_sync,
1249         .get_state      = dquot_get_state,
1250         .set_info       = dquot_set_dqinfo,
1251         .get_dqblk      = dquot_get_dqblk,
1252         .set_dqblk      = dquot_set_dqblk,
1253         .get_nextdqblk  = dquot_get_next_dqblk,
1254 };
1255 #endif
1256
1257 static const struct super_operations ext4_sops = {
1258         .alloc_inode    = ext4_alloc_inode,
1259         .destroy_inode  = ext4_destroy_inode,
1260         .write_inode    = ext4_write_inode,
1261         .dirty_inode    = ext4_dirty_inode,
1262         .drop_inode     = ext4_drop_inode,
1263         .evict_inode    = ext4_evict_inode,
1264         .put_super      = ext4_put_super,
1265         .sync_fs        = ext4_sync_fs,
1266         .freeze_fs      = ext4_freeze,
1267         .unfreeze_fs    = ext4_unfreeze,
1268         .statfs         = ext4_statfs,
1269         .remount_fs     = ext4_remount,
1270         .show_options   = ext4_show_options,
1271 #ifdef CONFIG_QUOTA
1272         .quota_read     = ext4_quota_read,
1273         .quota_write    = ext4_quota_write,
1274         .get_dquots     = ext4_get_dquots,
1275 #endif
1276         .bdev_try_to_free_page = bdev_try_to_free_page,
1277 };
1278
1279 static const struct export_operations ext4_export_ops = {
1280         .fh_to_dentry = ext4_fh_to_dentry,
1281         .fh_to_parent = ext4_fh_to_parent,
1282         .get_parent = ext4_get_parent,
1283 };
1284
1285 enum {
1286         Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
1287         Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
1288         Opt_nouid32, Opt_debug, Opt_removed,
1289         Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
1290         Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload,
1291         Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev,
1292         Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit,
1293         Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
1294         Opt_data_err_abort, Opt_data_err_ignore, Opt_test_dummy_encryption,
1295         Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
1296         Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
1297         Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
1298         Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version, Opt_dax,
1299         Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
1300         Opt_lazytime, Opt_nolazytime,
1301         Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
1302         Opt_inode_readahead_blks, Opt_journal_ioprio,
1303         Opt_dioread_nolock, Opt_dioread_lock,
1304         Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
1305         Opt_max_dir_size_kb, Opt_nojournal_checksum,
1306 };
1307
1308 static const match_table_t tokens = {
1309         {Opt_bsd_df, "bsddf"},
1310         {Opt_minix_df, "minixdf"},
1311         {Opt_grpid, "grpid"},
1312         {Opt_grpid, "bsdgroups"},
1313         {Opt_nogrpid, "nogrpid"},
1314         {Opt_nogrpid, "sysvgroups"},
1315         {Opt_resgid, "resgid=%u"},
1316         {Opt_resuid, "resuid=%u"},
1317         {Opt_sb, "sb=%u"},
1318         {Opt_err_cont, "errors=continue"},
1319         {Opt_err_panic, "errors=panic"},
1320         {Opt_err_ro, "errors=remount-ro"},
1321         {Opt_nouid32, "nouid32"},
1322         {Opt_debug, "debug"},
1323         {Opt_removed, "oldalloc"},
1324         {Opt_removed, "orlov"},
1325         {Opt_user_xattr, "user_xattr"},
1326         {Opt_nouser_xattr, "nouser_xattr"},
1327         {Opt_acl, "acl"},
1328         {Opt_noacl, "noacl"},
1329         {Opt_noload, "norecovery"},
1330         {Opt_noload, "noload"},
1331         {Opt_removed, "nobh"},
1332         {Opt_removed, "bh"},
1333         {Opt_commit, "commit=%u"},
1334         {Opt_min_batch_time, "min_batch_time=%u"},
1335         {Opt_max_batch_time, "max_batch_time=%u"},
1336         {Opt_journal_dev, "journal_dev=%u"},
1337         {Opt_journal_path, "journal_path=%s"},
1338         {Opt_journal_checksum, "journal_checksum"},
1339         {Opt_nojournal_checksum, "nojournal_checksum"},
1340         {Opt_journal_async_commit, "journal_async_commit"},
1341         {Opt_abort, "abort"},
1342         {Opt_data_journal, "data=journal"},
1343         {Opt_data_ordered, "data=ordered"},
1344         {Opt_data_writeback, "data=writeback"},
1345         {Opt_data_err_abort, "data_err=abort"},
1346         {Opt_data_err_ignore, "data_err=ignore"},
1347         {Opt_offusrjquota, "usrjquota="},
1348         {Opt_usrjquota, "usrjquota=%s"},
1349         {Opt_offgrpjquota, "grpjquota="},
1350         {Opt_grpjquota, "grpjquota=%s"},
1351         {Opt_jqfmt_vfsold, "jqfmt=vfsold"},
1352         {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
1353         {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"},
1354         {Opt_grpquota, "grpquota"},
1355         {Opt_noquota, "noquota"},
1356         {Opt_quota, "quota"},
1357         {Opt_usrquota, "usrquota"},
1358         {Opt_prjquota, "prjquota"},
1359         {Opt_barrier, "barrier=%u"},
1360         {Opt_barrier, "barrier"},
1361         {Opt_nobarrier, "nobarrier"},
1362         {Opt_i_version, "i_version"},
1363         {Opt_dax, "dax"},
1364         {Opt_stripe, "stripe=%u"},
1365         {Opt_delalloc, "delalloc"},
1366         {Opt_lazytime, "lazytime"},
1367         {Opt_nolazytime, "nolazytime"},
1368         {Opt_nodelalloc, "nodelalloc"},
1369         {Opt_removed, "mblk_io_submit"},
1370         {Opt_removed, "nomblk_io_submit"},
1371         {Opt_block_validity, "block_validity"},
1372         {Opt_noblock_validity, "noblock_validity"},
1373         {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
1374         {Opt_journal_ioprio, "journal_ioprio=%u"},
1375         {Opt_auto_da_alloc, "auto_da_alloc=%u"},
1376         {Opt_auto_da_alloc, "auto_da_alloc"},
1377         {Opt_noauto_da_alloc, "noauto_da_alloc"},
1378         {Opt_dioread_nolock, "dioread_nolock"},
1379         {Opt_dioread_lock, "dioread_lock"},
1380         {Opt_discard, "discard"},
1381         {Opt_nodiscard, "nodiscard"},
1382         {Opt_init_itable, "init_itable=%u"},
1383         {Opt_init_itable, "init_itable"},
1384         {Opt_noinit_itable, "noinit_itable"},
1385         {Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
1386         {Opt_test_dummy_encryption, "test_dummy_encryption"},
1387         {Opt_removed, "check=none"},    /* mount option from ext2/3 */
1388         {Opt_removed, "nocheck"},       /* mount option from ext2/3 */
1389         {Opt_removed, "reservation"},   /* mount option from ext2/3 */
1390         {Opt_removed, "noreservation"}, /* mount option from ext2/3 */
1391         {Opt_removed, "journal=%u"},    /* mount option from ext2/3 */
1392         {Opt_err, NULL},
1393 };
1394
1395 static ext4_fsblk_t get_sb_block(void **data)
1396 {
1397         ext4_fsblk_t    sb_block;
1398         char            *options = (char *) *data;
1399
1400         if (!options || strncmp(options, "sb=", 3) != 0)
1401                 return 1;       /* Default location */
1402
1403         options += 3;
1404         /* TODO: use simple_strtoll with >32bit ext4 */
1405         sb_block = simple_strtoul(options, &options, 0);
1406         if (*options && *options != ',') {
1407                 printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n",
1408                        (char *) *data);
1409                 return 1;
1410         }
1411         if (*options == ',')
1412                 options++;
1413         *data = (void *) options;
1414
1415         return sb_block;
1416 }
1417
1418 #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
1419 static char deprecated_msg[] = "Mount option \"%s\" will be removed by %s\n"
1420         "Contact linux-ext4@vger.kernel.org if you think we should keep it.\n";
1421
1422 #ifdef CONFIG_QUOTA
1423 static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
1424 {
1425         struct ext4_sb_info *sbi = EXT4_SB(sb);
1426         char *qname;
1427         int ret = -1;
1428
1429         if (sb_any_quota_loaded(sb) &&
1430                 !sbi->s_qf_names[qtype]) {
1431                 ext4_msg(sb, KERN_ERR,
1432                         "Cannot change journaled "
1433                         "quota options when quota turned on");
1434                 return -1;
1435         }
1436         if (ext4_has_feature_quota(sb)) {
1437                 ext4_msg(sb, KERN_INFO, "Journaled quota options "
1438                          "ignored when QUOTA feature is enabled");
1439                 return 1;
1440         }
1441         qname = match_strdup(args);
1442         if (!qname) {
1443                 ext4_msg(sb, KERN_ERR,
1444                         "Not enough memory for storing quotafile name");
1445                 return -1;
1446         }
1447         if (sbi->s_qf_names[qtype]) {
1448                 if (strcmp(sbi->s_qf_names[qtype], qname) == 0)
1449                         ret = 1;
1450                 else
1451                         ext4_msg(sb, KERN_ERR,
1452                                  "%s quota file already specified",
1453                                  QTYPE2NAME(qtype));
1454                 goto errout;
1455         }
1456         if (strchr(qname, '/')) {
1457                 ext4_msg(sb, KERN_ERR,
1458                         "quotafile must be on filesystem root");
1459                 goto errout;
1460         }
1461         sbi->s_qf_names[qtype] = qname;
1462         set_opt(sb, QUOTA);
1463         return 1;
1464 errout:
1465         kfree(qname);
1466         return ret;
1467 }
1468
1469 static int clear_qf_name(struct super_block *sb, int qtype)
1470 {
1471
1472         struct ext4_sb_info *sbi = EXT4_SB(sb);
1473
1474         if (sb_any_quota_loaded(sb) &&
1475                 sbi->s_qf_names[qtype]) {
1476                 ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options"
1477                         " when quota turned on");
1478                 return -1;
1479         }
1480         kfree(sbi->s_qf_names[qtype]);
1481         sbi->s_qf_names[qtype] = NULL;
1482         return 1;
1483 }
1484 #endif
1485
1486 #define MOPT_SET        0x0001
1487 #define MOPT_CLEAR      0x0002
1488 #define MOPT_NOSUPPORT  0x0004
1489 #define MOPT_EXPLICIT   0x0008
1490 #define MOPT_CLEAR_ERR  0x0010
1491 #define MOPT_GTE0       0x0020
1492 #ifdef CONFIG_QUOTA
1493 #define MOPT_Q          0
1494 #define MOPT_QFMT       0x0040
1495 #else
1496 #define MOPT_Q          MOPT_NOSUPPORT
1497 #define MOPT_QFMT       MOPT_NOSUPPORT
1498 #endif
1499 #define MOPT_DATAJ      0x0080
1500 #define MOPT_NO_EXT2    0x0100
1501 #define MOPT_NO_EXT3    0x0200
1502 #define MOPT_EXT4_ONLY  (MOPT_NO_EXT2 | MOPT_NO_EXT3)
1503 #define MOPT_STRING     0x0400
1504
1505 static const struct mount_opts {
1506         int     token;
1507         int     mount_opt;
1508         int     flags;
1509 } ext4_mount_opts[] = {
1510         {Opt_minix_df, EXT4_MOUNT_MINIX_DF, MOPT_SET},
1511         {Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR},
1512         {Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET},
1513         {Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR},
1514         {Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET},
1515         {Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR},
1516         {Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK,
1517          MOPT_EXT4_ONLY | MOPT_SET},
1518         {Opt_dioread_lock, EXT4_MOUNT_DIOREAD_NOLOCK,
1519          MOPT_EXT4_ONLY | MOPT_CLEAR},
1520         {Opt_discard, EXT4_MOUNT_DISCARD, MOPT_SET},
1521         {Opt_nodiscard, EXT4_MOUNT_DISCARD, MOPT_CLEAR},
1522         {Opt_delalloc, EXT4_MOUNT_DELALLOC,
1523          MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1524         {Opt_nodelalloc, EXT4_MOUNT_DELALLOC,
1525          MOPT_EXT4_ONLY | MOPT_CLEAR},
1526         {Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
1527          MOPT_EXT4_ONLY | MOPT_CLEAR},
1528         {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
1529          MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1530         {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT |
1531                                     EXT4_MOUNT_JOURNAL_CHECKSUM),
1532          MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1533         {Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_NO_EXT2 | MOPT_SET},
1534         {Opt_err_panic, EXT4_MOUNT_ERRORS_PANIC, MOPT_SET | MOPT_CLEAR_ERR},
1535         {Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR},
1536         {Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET | MOPT_CLEAR_ERR},
1537         {Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT,
1538          MOPT_NO_EXT2},
1539         {Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT,
1540          MOPT_NO_EXT2},
1541         {Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET},
1542         {Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR},
1543         {Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET},
1544         {Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR},
1545         {Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR},
1546         {Opt_commit, 0, MOPT_GTE0},
1547         {Opt_max_batch_time, 0, MOPT_GTE0},
1548         {Opt_min_batch_time, 0, MOPT_GTE0},
1549         {Opt_inode_readahead_blks, 0, MOPT_GTE0},
1550         {Opt_init_itable, 0, MOPT_GTE0},
1551         {Opt_dax, EXT4_MOUNT_DAX, MOPT_SET},
1552         {Opt_stripe, 0, MOPT_GTE0},
1553         {Opt_resuid, 0, MOPT_GTE0},
1554         {Opt_resgid, 0, MOPT_GTE0},
1555         {Opt_journal_dev, 0, MOPT_NO_EXT2 | MOPT_GTE0},
1556         {Opt_journal_path, 0, MOPT_NO_EXT2 | MOPT_STRING},
1557         {Opt_journal_ioprio, 0, MOPT_NO_EXT2 | MOPT_GTE0},
1558         {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
1559         {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
1560         {Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA,
1561          MOPT_NO_EXT2 | MOPT_DATAJ},
1562         {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET},
1563         {Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR},
1564 #ifdef CONFIG_EXT4_FS_POSIX_ACL
1565         {Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET},
1566         {Opt_noacl, EXT4_MOUNT_POSIX_ACL, MOPT_CLEAR},
1567 #else
1568         {Opt_acl, 0, MOPT_NOSUPPORT},
1569         {Opt_noacl, 0, MOPT_NOSUPPORT},
1570 #endif
1571         {Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET},
1572         {Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET},
1573         {Opt_quota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, MOPT_SET | MOPT_Q},
1574         {Opt_usrquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA,
1575                                                         MOPT_SET | MOPT_Q},
1576         {Opt_grpquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_GRPQUOTA,
1577                                                         MOPT_SET | MOPT_Q},
1578         {Opt_prjquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_PRJQUOTA,
1579                                                         MOPT_SET | MOPT_Q},
1580         {Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
1581                        EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA),
1582                                                         MOPT_CLEAR | MOPT_Q},
1583         {Opt_usrjquota, 0, MOPT_Q},
1584         {Opt_grpjquota, 0, MOPT_Q},
1585         {Opt_offusrjquota, 0, MOPT_Q},
1586         {Opt_offgrpjquota, 0, MOPT_Q},
1587         {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT},
1588         {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT},
1589         {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
1590         {Opt_max_dir_size_kb, 0, MOPT_GTE0},
1591         {Opt_test_dummy_encryption, 0, MOPT_GTE0},
1592         {Opt_err, 0, 0}
1593 };
1594
1595 static int handle_mount_opt(struct super_block *sb, char *opt, int token,
1596                             substring_t *args, unsigned long *journal_devnum,
1597                             unsigned int *journal_ioprio, int is_remount)
1598 {
1599         struct ext4_sb_info *sbi = EXT4_SB(sb);
1600         const struct mount_opts *m;
1601         kuid_t uid;
1602         kgid_t gid;
1603         int arg = 0;
1604
1605 #ifdef CONFIG_QUOTA
1606         if (token == Opt_usrjquota)
1607                 return set_qf_name(sb, USRQUOTA, &args[0]);
1608         else if (token == Opt_grpjquota)
1609                 return set_qf_name(sb, GRPQUOTA, &args[0]);
1610         else if (token == Opt_offusrjquota)
1611                 return clear_qf_name(sb, USRQUOTA);
1612         else if (token == Opt_offgrpjquota)
1613                 return clear_qf_name(sb, GRPQUOTA);
1614 #endif
1615         switch (token) {
1616         case Opt_noacl:
1617         case Opt_nouser_xattr:
1618                 ext4_msg(sb, KERN_WARNING, deprecated_msg, opt, "3.5");
1619                 break;
1620         case Opt_sb:
1621                 return 1;       /* handled by get_sb_block() */
1622         case Opt_removed:
1623                 ext4_msg(sb, KERN_WARNING, "Ignoring removed %s option", opt);
1624                 return 1;
1625         case Opt_abort:
1626                 sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
1627                 return 1;
1628         case Opt_i_version:
1629                 sb->s_flags |= MS_I_VERSION;
1630                 return 1;
1631         case Opt_lazytime:
1632                 sb->s_flags |= MS_LAZYTIME;
1633                 return 1;
1634         case Opt_nolazytime:
1635                 sb->s_flags &= ~MS_LAZYTIME;
1636                 return 1;
1637         }
1638
1639         for (m = ext4_mount_opts; m->token != Opt_err; m++)
1640                 if (token == m->token)
1641                         break;
1642
1643         if (m->token == Opt_err) {
1644                 ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" "
1645                          "or missing value", opt);
1646                 return -1;
1647         }
1648
1649         if ((m->flags & MOPT_NO_EXT2) && IS_EXT2_SB(sb)) {
1650                 ext4_msg(sb, KERN_ERR,
1651                          "Mount option \"%s\" incompatible with ext2", opt);
1652                 return -1;
1653         }
1654         if ((m->flags & MOPT_NO_EXT3) && IS_EXT3_SB(sb)) {
1655                 ext4_msg(sb, KERN_ERR,
1656                          "Mount option \"%s\" incompatible with ext3", opt);
1657                 return -1;
1658         }
1659
1660         if (token == Opt_err_panic && !capable(CAP_SYS_ADMIN)) {
1661                 ext4_msg(sb, KERN_ERR,
1662                          "Mount option \"%s\" not allowed for unprivileged mounts",
1663                          opt);
1664                 return -1;
1665         }
1666
1667         if (args->from && !(m->flags & MOPT_STRING) && match_int(args, &arg))
1668                 return -1;
1669         if (args->from && (m->flags & MOPT_GTE0) && (arg < 0))
1670                 return -1;
1671         if (m->flags & MOPT_EXPLICIT) {
1672                 if (m->mount_opt & EXT4_MOUNT_DELALLOC) {
1673                         set_opt2(sb, EXPLICIT_DELALLOC);
1674                 } else if (m->mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) {
1675                         set_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM);
1676                 } else
1677                         return -1;
1678         }
1679         if (m->flags & MOPT_CLEAR_ERR)
1680                 clear_opt(sb, ERRORS_MASK);
1681         if (token == Opt_noquota && sb_any_quota_loaded(sb)) {
1682                 ext4_msg(sb, KERN_ERR, "Cannot change quota "
1683                          "options when quota turned on");
1684                 return -1;
1685         }
1686
1687         if (m->flags & MOPT_NOSUPPORT) {
1688                 ext4_msg(sb, KERN_ERR, "%s option not supported", opt);
1689         } else if (token == Opt_commit) {
1690                 if (arg == 0)
1691                         arg = JBD2_DEFAULT_MAX_COMMIT_AGE;
1692                 sbi->s_commit_interval = HZ * arg;
1693         } else if (token == Opt_max_batch_time) {
1694                 sbi->s_max_batch_time = arg;
1695         } else if (token == Opt_min_batch_time) {
1696                 sbi->s_min_batch_time = arg;
1697         } else if (token == Opt_inode_readahead_blks) {
1698                 if (arg && (arg > (1 << 30) || !is_power_of_2(arg))) {
1699                         ext4_msg(sb, KERN_ERR,
1700                                  "EXT4-fs: inode_readahead_blks must be "
1701                                  "0 or a power of 2 smaller than 2^31");
1702                         return -1;
1703                 }
1704                 sbi->s_inode_readahead_blks = arg;
1705         } else if (token == Opt_init_itable) {
1706                 set_opt(sb, INIT_INODE_TABLE);
1707                 if (!args->from)
1708                         arg = EXT4_DEF_LI_WAIT_MULT;
1709                 sbi->s_li_wait_mult = arg;
1710         } else if (token == Opt_max_dir_size_kb) {
1711                 sbi->s_max_dir_size_kb = arg;
1712         } else if (token == Opt_stripe) {
1713                 sbi->s_stripe = arg;
1714         } else if (token == Opt_resuid) {
1715                 uid = make_kuid(sb->s_user_ns, arg);
1716                 if (!uid_valid(uid)) {
1717                         ext4_msg(sb, KERN_ERR, "Invalid uid value %d", arg);
1718                         return -1;
1719                 }
1720                 sbi->s_resuid = uid;
1721         } else if (token == Opt_resgid) {
1722                 gid = make_kgid(sb->s_user_ns, arg);
1723                 if (!gid_valid(gid)) {
1724                         ext4_msg(sb, KERN_ERR, "Invalid gid value %d", arg);
1725                         return -1;
1726                 }
1727                 sbi->s_resgid = gid;
1728         } else if (token == Opt_journal_dev) {
1729                 if (is_remount) {
1730                         ext4_msg(sb, KERN_ERR,
1731                                  "Cannot specify journal on remount");
1732                         return -1;
1733                 }
1734                 *journal_devnum = arg;
1735         } else if (token == Opt_journal_path) {
1736                 char *journal_path;
1737                 struct inode *journal_inode;
1738                 struct path path;
1739                 int error;
1740
1741                 if (is_remount) {
1742                         ext4_msg(sb, KERN_ERR,
1743                                  "Cannot specify journal on remount");
1744                         return -1;
1745                 }
1746                 journal_path = match_strdup(&args[0]);
1747                 if (!journal_path) {
1748                         ext4_msg(sb, KERN_ERR, "error: could not dup "
1749                                 "journal device string");
1750                         return -1;
1751                 }
1752
1753                 error = kern_path(journal_path, LOOKUP_FOLLOW, &path);
1754                 if (error) {
1755                         ext4_msg(sb, KERN_ERR, "error: could not find "
1756                                 "journal device path: error %d", error);
1757                         kfree(journal_path);
1758                         return -1;
1759                 }
1760
1761                 /*
1762                  * Refuse access for unprivileged mounts if the user does
1763                  * not have rw access to the journal device via the supplied
1764                  * path.
1765                  */
1766                 if (!capable(CAP_SYS_ADMIN) &&
1767                     inode_permission(d_inode(path.dentry), MAY_READ|MAY_WRITE)) {
1768                         ext4_msg(sb, KERN_ERR,
1769                                  "error: Insufficient access to journal path %s",
1770                                  journal_path);
1771                         return -1;
1772                 }
1773
1774                 journal_inode = d_inode(path.dentry);
1775                 if (!S_ISBLK(journal_inode->i_mode)) {
1776                         ext4_msg(sb, KERN_ERR, "error: journal path %s "
1777                                 "is not a block device", journal_path);
1778                         path_put(&path);
1779                         kfree(journal_path);
1780                         return -1;
1781                 }
1782
1783                 *journal_devnum = new_encode_dev(journal_inode->i_rdev);
1784                 path_put(&path);
1785                 kfree(journal_path);
1786         } else if (token == Opt_journal_ioprio) {
1787                 if (arg > 7) {
1788                         ext4_msg(sb, KERN_ERR, "Invalid journal IO priority"
1789                                  " (must be 0-7)");
1790                         return -1;
1791                 }
1792                 *journal_ioprio =
1793                         IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg);
1794         } else if (token == Opt_test_dummy_encryption) {
1795 #ifdef CONFIG_EXT4_FS_ENCRYPTION
1796                 sbi->s_mount_flags |= EXT4_MF_TEST_DUMMY_ENCRYPTION;
1797                 ext4_msg(sb, KERN_WARNING,
1798                          "Test dummy encryption mode enabled");
1799 #else
1800                 ext4_msg(sb, KERN_WARNING,
1801                          "Test dummy encryption mount option ignored");
1802 #endif
1803         } else if (m->flags & MOPT_DATAJ) {
1804                 if (is_remount) {
1805                         if (!sbi->s_journal)
1806                                 ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option");
1807                         else if (test_opt(sb, DATA_FLAGS) != m->mount_opt) {
1808                                 ext4_msg(sb, KERN_ERR,
1809                                          "Cannot change data mode on remount");
1810                                 return -1;
1811                         }
1812                 } else {
1813                         clear_opt(sb, DATA_FLAGS);
1814                         sbi->s_mount_opt |= m->mount_opt;
1815                 }
1816 #ifdef CONFIG_QUOTA
1817         } else if (m->flags & MOPT_QFMT) {
1818                 if (sb_any_quota_loaded(sb) &&
1819                     sbi->s_jquota_fmt != m->mount_opt) {
1820                         ext4_msg(sb, KERN_ERR, "Cannot change journaled "
1821                                  "quota options when quota turned on");
1822                         return -1;
1823                 }
1824                 if (ext4_has_feature_quota(sb)) {
1825                         ext4_msg(sb, KERN_INFO,
1826                                  "Quota format mount options ignored "
1827                                  "when QUOTA feature is enabled");
1828                         return 1;
1829                 }
1830                 sbi->s_jquota_fmt = m->mount_opt;
1831 #endif
1832         } else if (token == Opt_dax) {
1833 #ifdef CONFIG_FS_DAX
1834                 ext4_msg(sb, KERN_WARNING,
1835                 "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
1836                         sbi->s_mount_opt |= m->mount_opt;
1837 #else
1838                 ext4_msg(sb, KERN_INFO, "dax option not supported");
1839                 return -1;
1840 #endif
1841         } else if (token == Opt_data_err_abort) {
1842                 sbi->s_mount_opt |= m->mount_opt;
1843         } else if (token == Opt_data_err_ignore) {
1844                 sbi->s_mount_opt &= ~m->mount_opt;
1845         } else {
1846                 if (!args->from)
1847                         arg = 1;
1848                 if (m->flags & MOPT_CLEAR)
1849                         arg = !arg;
1850                 else if (unlikely(!(m->flags & MOPT_SET))) {
1851                         ext4_msg(sb, KERN_WARNING,
1852                                  "buggy handling of option %s", opt);
1853                         WARN_ON(1);
1854                         return -1;
1855                 }
1856                 if (arg != 0)
1857                         sbi->s_mount_opt |= m->mount_opt;
1858                 else
1859                         sbi->s_mount_opt &= ~m->mount_opt;
1860         }
1861         return 1;
1862 }
1863
1864 static int parse_options(char *options, struct super_block *sb,
1865                          unsigned long *journal_devnum,
1866                          unsigned int *journal_ioprio,
1867                          int is_remount)
1868 {
1869         struct ext4_sb_info *sbi = EXT4_SB(sb);
1870         char *p;
1871         substring_t args[MAX_OPT_ARGS];
1872         int token;
1873
1874         if (!options)
1875                 return 1;
1876
1877         while ((p = strsep(&options, ",")) != NULL) {
1878                 if (!*p)
1879                         continue;
1880                 /*
1881                  * Initialize args struct so we know whether arg was
1882                  * found; some options take optional arguments.
1883                  */
1884                 args[0].to = args[0].from = NULL;
1885                 token = match_token(p, tokens, args);
1886                 if (handle_mount_opt(sb, p, token, args, journal_devnum,
1887                                      journal_ioprio, is_remount) < 0)
1888                         return 0;
1889         }
1890 #ifdef CONFIG_QUOTA
1891         /*
1892          * We do the test below only for project quotas. 'usrquota' and
1893          * 'grpquota' mount options are allowed even without quota feature
1894          * to support legacy quotas in quota files.
1895          */
1896         if (test_opt(sb, PRJQUOTA) && !ext4_has_feature_project(sb)) {
1897                 ext4_msg(sb, KERN_ERR, "Project quota feature not enabled. "
1898                          "Cannot enable project quota enforcement.");
1899                 return 0;
1900         }
1901         if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
1902                 if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA])
1903                         clear_opt(sb, USRQUOTA);
1904
1905                 if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA])
1906                         clear_opt(sb, GRPQUOTA);
1907
1908                 if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) {
1909                         ext4_msg(sb, KERN_ERR, "old and new quota "
1910                                         "format mixing");
1911                         return 0;
1912                 }
1913
1914                 if (!sbi->s_jquota_fmt) {
1915                         ext4_msg(sb, KERN_ERR, "journaled quota format "
1916                                         "not specified");
1917                         return 0;
1918                 }
1919         }
1920 #endif
1921         if (test_opt(sb, DIOREAD_NOLOCK)) {
1922                 int blocksize =
1923                         BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
1924
1925                 if (blocksize < PAGE_SIZE) {
1926                         ext4_msg(sb, KERN_ERR, "can't mount with "
1927                                  "dioread_nolock if block size != PAGE_SIZE");
1928                         return 0;
1929                 }
1930         }
1931         return 1;
1932 }
1933
1934 static inline void ext4_show_quota_options(struct seq_file *seq,
1935                                            struct super_block *sb)
1936 {
1937 #if defined(CONFIG_QUOTA)
1938         struct ext4_sb_info *sbi = EXT4_SB(sb);
1939
1940         if (sbi->s_jquota_fmt) {
1941                 char *fmtname = "";
1942
1943                 switch (sbi->s_jquota_fmt) {
1944                 case QFMT_VFS_OLD:
1945                         fmtname = "vfsold";
1946                         break;
1947                 case QFMT_VFS_V0:
1948                         fmtname = "vfsv0";
1949                         break;
1950                 case QFMT_VFS_V1:
1951                         fmtname = "vfsv1";
1952                         break;
1953                 }
1954                 seq_printf(seq, ",jqfmt=%s", fmtname);
1955         }
1956
1957         if (sbi->s_qf_names[USRQUOTA])
1958                 seq_show_option(seq, "usrjquota", sbi->s_qf_names[USRQUOTA]);
1959
1960         if (sbi->s_qf_names[GRPQUOTA])
1961                 seq_show_option(seq, "grpjquota", sbi->s_qf_names[GRPQUOTA]);
1962 #endif
1963 }
1964
1965 static const char *token2str(int token)
1966 {
1967         const struct match_token *t;
1968
1969         for (t = tokens; t->token != Opt_err; t++)
1970                 if (t->token == token && !strchr(t->pattern, '='))
1971                         break;
1972         return t->pattern;
1973 }
1974
1975 /*
1976  * Show an option if
1977  *  - it's set to a non-default value OR
1978  *  - if the per-sb default is different from the global default
1979  */
1980 static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
1981                               int nodefs)
1982 {
1983         struct ext4_sb_info *sbi = EXT4_SB(sb);
1984         struct ext4_super_block *es = sbi->s_es;
1985         int def_errors, def_mount_opt = nodefs ? 0 : sbi->s_def_mount_opt;
1986         const struct mount_opts *m;
1987         char sep = nodefs ? '\n' : ',';
1988
1989 #define SEQ_OPTS_PUTS(str) seq_printf(seq, "%c" str, sep)
1990 #define SEQ_OPTS_PRINT(str, arg) seq_printf(seq, "%c" str, sep, arg)
1991
1992         if (sbi->s_sb_block != 1)
1993                 SEQ_OPTS_PRINT("sb=%llu", sbi->s_sb_block);
1994
1995         for (m = ext4_mount_opts; m->token != Opt_err; m++) {
1996                 int want_set = m->flags & MOPT_SET;
1997                 if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) ||
1998                     (m->flags & MOPT_CLEAR_ERR))
1999                         continue;
2000                 if (!(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt)))
2001                         continue; /* skip if same as the default */
2002                 if ((want_set &&
2003                      (sbi->s_mount_opt & m->mount_opt) != m->mount_opt) ||
2004                     (!want_set && (sbi->s_mount_opt & m->mount_opt)))
2005                         continue; /* select Opt_noFoo vs Opt_Foo */
2006                 SEQ_OPTS_PRINT("%s", token2str(m->token));
2007         }
2008
2009         if (nodefs || !uid_eq(sbi->s_resuid, make_kuid(sb->s_user_ns, EXT4_DEF_RESUID)) ||
2010             le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID)
2011                 SEQ_OPTS_PRINT("resuid=%u",
2012                                 from_kuid_munged(sb->s_user_ns, sbi->s_resuid));
2013         if (nodefs || !gid_eq(sbi->s_resgid, make_kgid(sb->s_user_ns, EXT4_DEF_RESGID)) ||
2014             le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID)
2015                 SEQ_OPTS_PRINT("resgid=%u",
2016                                 from_kgid_munged(sb->s_user_ns, sbi->s_resgid));
2017         def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors);
2018         if (test_opt(sb, ERRORS_RO) && def_errors != EXT4_ERRORS_RO)
2019                 SEQ_OPTS_PUTS("errors=remount-ro");
2020         if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE)
2021                 SEQ_OPTS_PUTS("errors=continue");
2022         if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC)
2023                 SEQ_OPTS_PUTS("errors=panic");
2024         if (nodefs || sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ)
2025                 SEQ_OPTS_PRINT("commit=%lu", sbi->s_commit_interval / HZ);
2026         if (nodefs || sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME)
2027                 SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time);
2028         if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME)
2029                 SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time);
2030         if (sb->s_flags & MS_I_VERSION)
2031                 SEQ_OPTS_PUTS("i_version");
2032         if (nodefs || sbi->s_stripe)
2033                 SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe);
2034         if (EXT4_MOUNT_DATA_FLAGS & (sbi->s_mount_opt ^ def_mount_opt)) {
2035                 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
2036                         SEQ_OPTS_PUTS("data=journal");
2037                 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
2038                         SEQ_OPTS_PUTS("data=ordered");
2039                 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
2040                         SEQ_OPTS_PUTS("data=writeback");
2041         }
2042         if (nodefs ||
2043             sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
2044                 SEQ_OPTS_PRINT("inode_readahead_blks=%u",
2045                                sbi->s_inode_readahead_blks);
2046
2047         if (nodefs || (test_opt(sb, INIT_INODE_TABLE) &&
2048                        (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)))
2049                 SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
2050         if (nodefs || sbi->s_max_dir_size_kb)
2051                 SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb);
2052         if (test_opt(sb, DATA_ERR_ABORT))
2053                 SEQ_OPTS_PUTS("data_err=abort");
2054
2055         ext4_show_quota_options(seq, sb);
2056         return 0;
2057 }
2058
2059 static int ext4_show_options(struct seq_file *seq, struct dentry *root)
2060 {
2061         return _ext4_show_options(seq, root->d_sb, 0);
2062 }
2063
2064 int ext4_seq_options_show(struct seq_file *seq, void *offset)
2065 {
2066         struct super_block *sb = seq->private;
2067         int rc;
2068
2069         seq_puts(seq, (sb->s_flags & MS_RDONLY) ? "ro" : "rw");
2070         rc = _ext4_show_options(seq, sb, 1);
2071         seq_puts(seq, "\n");
2072         return rc;
2073 }
2074
2075 static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
2076                             int read_only)
2077 {
2078         struct ext4_sb_info *sbi = EXT4_SB(sb);
2079         int res = 0;
2080
2081         if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
2082                 ext4_msg(sb, KERN_ERR, "revision level too high, "
2083                          "forcing read-only mode");
2084                 res = MS_RDONLY;
2085         }
2086         if (read_only)
2087                 goto done;
2088         if (!(sbi->s_mount_state & EXT4_VALID_FS))
2089                 ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "
2090                          "running e2fsck is recommended");
2091         else if (sbi->s_mount_state & EXT4_ERROR_FS)
2092                 ext4_msg(sb, KERN_WARNING,
2093                          "warning: mounting fs with errors, "
2094                          "running e2fsck is recommended");
2095         else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 &&
2096                  le16_to_cpu(es->s_mnt_count) >=
2097                  (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
2098                 ext4_msg(sb, KERN_WARNING,
2099                          "warning: maximal mount count reached, "
2100                          "running e2fsck is recommended");
2101         else if (le32_to_cpu(es->s_checkinterval) &&
2102                 (le32_to_cpu(es->s_lastcheck) +
2103                         le32_to_cpu(es->s_checkinterval) <= get_seconds()))
2104                 ext4_msg(sb, KERN_WARNING,
2105                          "warning: checktime reached, "
2106                          "running e2fsck is recommended");
2107         if (!sbi->s_journal)
2108                 es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
2109         if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
2110                 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
2111         le16_add_cpu(&es->s_mnt_count, 1);
2112         es->s_mtime = cpu_to_le32(get_seconds());
2113         ext4_update_dynamic_rev(sb);
2114         if (sbi->s_journal)
2115                 ext4_set_feature_journal_needs_recovery(sb);
2116
2117         ext4_commit_super(sb, 1);
2118 done:
2119         if (test_opt(sb, DEBUG))
2120                 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
2121                                 "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n",
2122                         sb->s_blocksize,
2123                         sbi->s_groups_count,
2124                         EXT4_BLOCKS_PER_GROUP(sb),
2125                         EXT4_INODES_PER_GROUP(sb),
2126                         sbi->s_mount_opt, sbi->s_mount_opt2);
2127
2128         cleancache_init_fs(sb);
2129         return res;
2130 }
2131
2132 int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
2133 {
2134         struct ext4_sb_info *sbi = EXT4_SB(sb);
2135         struct flex_groups *new_groups;
2136         int size;
2137
2138         if (!sbi->s_log_groups_per_flex)
2139                 return 0;
2140
2141         size = ext4_flex_group(sbi, ngroup - 1) + 1;
2142         if (size <= sbi->s_flex_groups_allocated)
2143                 return 0;
2144
2145         size = roundup_pow_of_two(size * sizeof(struct flex_groups));
2146         new_groups = ext4_kvzalloc(size, GFP_KERNEL);
2147         if (!new_groups) {
2148                 ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups",
2149                          size / (int) sizeof(struct flex_groups));
2150                 return -ENOMEM;
2151         }
2152
2153         if (sbi->s_flex_groups) {
2154                 memcpy(new_groups, sbi->s_flex_groups,
2155                        (sbi->s_flex_groups_allocated *
2156                         sizeof(struct flex_groups)));
2157                 kvfree(sbi->s_flex_groups);
2158         }
2159         sbi->s_flex_groups = new_groups;
2160         sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups);
2161         return 0;
2162 }
2163
2164 static int ext4_fill_flex_info(struct super_block *sb)
2165 {
2166         struct ext4_sb_info *sbi = EXT4_SB(sb);
2167         struct ext4_group_desc *gdp = NULL;
2168         ext4_group_t flex_group;
2169         int i, err;
2170
2171         sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
2172         if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) {
2173                 sbi->s_log_groups_per_flex = 0;
2174                 return 1;
2175         }
2176
2177         err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count);
2178         if (err)
2179                 goto failed;
2180
2181         for (i = 0; i < sbi->s_groups_count; i++) {
2182                 gdp = ext4_get_group_desc(sb, i, NULL);
2183
2184                 flex_group = ext4_flex_group(sbi, i);
2185                 atomic_add(ext4_free_inodes_count(sb, gdp),
2186                            &sbi->s_flex_groups[flex_group].free_inodes);
2187                 atomic64_add(ext4_free_group_clusters(sb, gdp),
2188                              &sbi->s_flex_groups[flex_group].free_clusters);
2189                 atomic_add(ext4_used_dirs_count(sb, gdp),
2190                            &sbi->s_flex_groups[flex_group].used_dirs);
2191         }
2192
2193         return 1;
2194 failed:
2195         return 0;
2196 }
2197
2198 static __le16 ext4_group_desc_csum(struct super_block *sb, __u32 block_group,
2199                                    struct ext4_group_desc *gdp)
2200 {
2201         int offset = offsetof(struct ext4_group_desc, bg_checksum);
2202         __u16 crc = 0;
2203         __le32 le_group = cpu_to_le32(block_group);
2204         struct ext4_sb_info *sbi = EXT4_SB(sb);
2205
2206         if (ext4_has_metadata_csum(sbi->s_sb)) {
2207                 /* Use new metadata_csum algorithm */
2208                 __u32 csum32;
2209                 __u16 dummy_csum = 0;
2210
2211                 csum32 = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&le_group,
2212                                      sizeof(le_group));
2213                 csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp, offset);
2214                 csum32 = ext4_chksum(sbi, csum32, (__u8 *)&dummy_csum,
2215                                      sizeof(dummy_csum));
2216                 offset += sizeof(dummy_csum);
2217                 if (offset < sbi->s_desc_size)
2218                         csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp + offset,
2219                                              sbi->s_desc_size - offset);
2220
2221                 crc = csum32 & 0xFFFF;
2222                 goto out;
2223         }
2224
2225         /* old crc16 code */
2226         if (!ext4_has_feature_gdt_csum(sb))
2227                 return 0;
2228
2229         crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
2230         crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
2231         crc = crc16(crc, (__u8 *)gdp, offset);
2232         offset += sizeof(gdp->bg_checksum); /* skip checksum */
2233         /* for checksum of struct ext4_group_desc do the rest...*/
2234         if (ext4_has_feature_64bit(sb) &&
2235             offset < le16_to_cpu(sbi->s_es->s_desc_size))
2236                 crc = crc16(crc, (__u8 *)gdp + offset,
2237                             le16_to_cpu(sbi->s_es->s_desc_size) -
2238                                 offset);
2239
2240 out:
2241         return cpu_to_le16(crc);
2242 }
2243
2244 int ext4_group_desc_csum_verify(struct super_block *sb, __u32 block_group,
2245                                 struct ext4_group_desc *gdp)
2246 {
2247         if (ext4_has_group_desc_csum(sb) &&
2248             (gdp->bg_checksum != ext4_group_desc_csum(sb, block_group, gdp)))
2249                 return 0;
2250
2251         return 1;
2252 }
2253
2254 void ext4_group_desc_csum_set(struct super_block *sb, __u32 block_group,
2255                               struct ext4_group_desc *gdp)
2256 {
2257         if (!ext4_has_group_desc_csum(sb))
2258                 return;
2259         gdp->bg_checksum = ext4_group_desc_csum(sb, block_group, gdp);
2260 }
2261
2262 /* Called at mount-time, super-block is locked */
2263 static int ext4_check_descriptors(struct super_block *sb,
2264                                   ext4_fsblk_t sb_block,
2265                                   ext4_group_t *first_not_zeroed)
2266 {
2267         struct ext4_sb_info *sbi = EXT4_SB(sb);
2268         ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
2269         ext4_fsblk_t last_block;
2270         ext4_fsblk_t block_bitmap;
2271         ext4_fsblk_t inode_bitmap;
2272         ext4_fsblk_t inode_table;
2273         int flexbg_flag = 0;
2274         ext4_group_t i, grp = sbi->s_groups_count;
2275
2276         if (ext4_has_feature_flex_bg(sb))
2277                 flexbg_flag = 1;
2278
2279         ext4_debug("Checking group descriptors");
2280
2281         for (i = 0; i < sbi->s_groups_count; i++) {
2282                 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
2283
2284                 if (i == sbi->s_groups_count - 1 || flexbg_flag)
2285                         last_block = ext4_blocks_count(sbi->s_es) - 1;
2286                 else
2287                         last_block = first_block +
2288                                 (EXT4_BLOCKS_PER_GROUP(sb) - 1);
2289
2290                 if ((grp == sbi->s_groups_count) &&
2291                    !(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
2292                         grp = i;
2293
2294                 block_bitmap = ext4_block_bitmap(sb, gdp);
2295                 if (block_bitmap == sb_block) {
2296                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2297                                  "Block bitmap for group %u overlaps "
2298                                  "superblock", i);
2299                 }
2300                 if (block_bitmap < first_block || block_bitmap > last_block) {
2301                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2302                                "Block bitmap for group %u not in group "
2303                                "(block %llu)!", i, block_bitmap);
2304                         return 0;
2305                 }
2306                 inode_bitmap = ext4_inode_bitmap(sb, gdp);
2307                 if (inode_bitmap == sb_block) {
2308                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2309                                  "Inode bitmap for group %u overlaps "
2310                                  "superblock", i);
2311                 }
2312                 if (inode_bitmap < first_block || inode_bitmap > last_block) {
2313                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2314                                "Inode bitmap for group %u not in group "
2315                                "(block %llu)!", i, inode_bitmap);
2316                         return 0;
2317                 }
2318                 inode_table = ext4_inode_table(sb, gdp);
2319                 if (inode_table == sb_block) {
2320                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2321                                  "Inode table for group %u overlaps "
2322                                  "superblock", i);
2323                 }
2324                 if (inode_table < first_block ||
2325                     inode_table + sbi->s_itb_per_group - 1 > last_block) {
2326                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2327                                "Inode table for group %u not in group "
2328                                "(block %llu)!", i, inode_table);
2329                         return 0;
2330                 }
2331                 ext4_lock_group(sb, i);
2332                 if (!ext4_group_desc_csum_verify(sb, i, gdp)) {
2333                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2334                                  "Checksum for group %u failed (%u!=%u)",
2335                                  i, le16_to_cpu(ext4_group_desc_csum(sb, i,
2336                                      gdp)), le16_to_cpu(gdp->bg_checksum));
2337                         if (!(sb->s_flags & MS_RDONLY)) {
2338                                 ext4_unlock_group(sb, i);
2339                                 return 0;
2340                         }
2341                 }
2342                 ext4_unlock_group(sb, i);
2343                 if (!flexbg_flag)
2344                         first_block += EXT4_BLOCKS_PER_GROUP(sb);
2345         }
2346         if (NULL != first_not_zeroed)
2347                 *first_not_zeroed = grp;
2348         return 1;
2349 }
2350
2351 /* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at
2352  * the superblock) which were deleted from all directories, but held open by
2353  * a process at the time of a crash.  We walk the list and try to delete these
2354  * inodes at recovery time (only with a read-write filesystem).
2355  *
2356  * In order to keep the orphan inode chain consistent during traversal (in
2357  * case of crash during recovery), we link each inode into the superblock
2358  * orphan list_head and handle it the same way as an inode deletion during
2359  * normal operation (which journals the operations for us).
2360  *
2361  * We only do an iget() and an iput() on each inode, which is very safe if we
2362  * accidentally point at an in-use or already deleted inode.  The worst that
2363  * can happen in this case is that we get a "bit already cleared" message from
2364  * ext4_free_inode().  The only reason we would point at a wrong inode is if
2365  * e2fsck was run on this filesystem, and it must have already done the orphan
2366  * inode cleanup for us, so we can safely abort without any further action.
2367  */
2368 static void ext4_orphan_cleanup(struct super_block *sb,
2369                                 struct ext4_super_block *es)
2370 {
2371         unsigned int s_flags = sb->s_flags;
2372         int ret, nr_orphans = 0, nr_truncates = 0;
2373 #ifdef CONFIG_QUOTA
2374         int i;
2375 #endif
2376         if (!es->s_last_orphan) {
2377                 jbd_debug(4, "no orphan inodes to clean up\n");
2378                 return;
2379         }
2380
2381         if (bdev_read_only(sb->s_bdev)) {
2382                 ext4_msg(sb, KERN_ERR, "write access "
2383                         "unavailable, skipping orphan cleanup");
2384                 return;
2385         }
2386
2387         /* Check if feature set would not allow a r/w mount */
2388         if (!ext4_feature_set_ok(sb, 0)) {
2389                 ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to "
2390                          "unknown ROCOMPAT features");
2391                 return;
2392         }
2393
2394         if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
2395                 /* don't clear list on RO mount w/ errors */
2396                 if (es->s_last_orphan && !(s_flags & MS_RDONLY)) {
2397                         ext4_msg(sb, KERN_INFO, "Errors on filesystem, "
2398                                   "clearing orphan list.\n");
2399                         es->s_last_orphan = 0;
2400                 }
2401                 jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
2402                 return;
2403         }
2404
2405         if (s_flags & MS_RDONLY) {
2406                 ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs");
2407                 sb->s_flags &= ~MS_RDONLY;
2408         }
2409 #ifdef CONFIG_QUOTA
2410         /* Needed for iput() to work correctly and not trash data */
2411         sb->s_flags |= MS_ACTIVE;
2412         /* Turn on quotas so that they are updated correctly */
2413         for (i = 0; i < EXT4_MAXQUOTAS; i++) {
2414                 if (EXT4_SB(sb)->s_qf_names[i]) {
2415                         int ret = ext4_quota_on_mount(sb, i);
2416                         if (ret < 0)
2417                                 ext4_msg(sb, KERN_ERR,
2418                                         "Cannot turn on journaled "
2419                                         "quota: error %d", ret);
2420                 }
2421         }
2422 #endif
2423
2424         while (es->s_last_orphan) {
2425                 struct inode *inode;
2426
2427                 /*
2428                  * We may have encountered an error during cleanup; if
2429                  * so, skip the rest.
2430                  */
2431                 if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
2432                         jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
2433                         es->s_last_orphan = 0;
2434                         break;
2435                 }
2436
2437                 inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
2438                 if (IS_ERR(inode)) {
2439                         es->s_last_orphan = 0;
2440                         break;
2441                 }
2442
2443                 list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
2444                 dquot_initialize(inode);
2445                 if (inode->i_nlink) {
2446                         if (test_opt(sb, DEBUG))
2447                                 ext4_msg(sb, KERN_DEBUG,
2448                                         "%s: truncating inode %lu to %lld bytes",
2449                                         __func__, inode->i_ino, inode->i_size);
2450                         jbd_debug(2, "truncating inode %lu to %lld bytes\n",
2451                                   inode->i_ino, inode->i_size);
2452                         inode_lock(inode);
2453                         truncate_inode_pages(inode->i_mapping, inode->i_size);
2454                         ret = ext4_truncate(inode);
2455                         if (ret)
2456                                 ext4_std_error(inode->i_sb, ret);
2457                         inode_unlock(inode);
2458                         nr_truncates++;
2459                 } else {
2460                         if (test_opt(sb, DEBUG))
2461                                 ext4_msg(sb, KERN_DEBUG,
2462                                         "%s: deleting unreferenced inode %lu",
2463                                         __func__, inode->i_ino);
2464                         jbd_debug(2, "deleting unreferenced inode %lu\n",
2465                                   inode->i_ino);
2466                         nr_orphans++;
2467                 }
2468                 iput(inode);  /* The delete magic happens here! */
2469         }
2470
2471 #define PLURAL(x) (x), ((x) == 1) ? "" : "s"
2472
2473         if (nr_orphans)
2474                 ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted",
2475                        PLURAL(nr_orphans));
2476         if (nr_truncates)
2477                 ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
2478                        PLURAL(nr_truncates));
2479 #ifdef CONFIG_QUOTA
2480         /* Turn quotas off */
2481         for (i = 0; i < EXT4_MAXQUOTAS; i++) {
2482                 if (sb_dqopt(sb)->files[i])
2483                         dquot_quota_off(sb, i);
2484         }
2485 #endif
2486         sb->s_flags = s_flags; /* Restore MS_RDONLY status */
2487 }
2488
2489 /*
2490  * Maximal extent format file size.
2491  * Resulting logical blkno at s_maxbytes must fit in our on-disk
2492  * extent format containers, within a sector_t, and within i_blocks
2493  * in the vfs.  ext4 inode has 48 bits of i_block in fsblock units,
2494  * so that won't be a limiting factor.
2495  *
2496  * However there is other limiting factor. We do store extents in the form
2497  * of starting block and length, hence the resulting length of the extent
2498  * covering maximum file size must fit into on-disk format containers as
2499  * well. Given that length is always by 1 unit bigger than max unit (because
2500  * we count 0 as well) we have to lower the s_maxbytes by one fs block.
2501  *
2502  * Note, this does *not* consider any metadata overhead for vfs i_blocks.
2503  */
2504 static loff_t ext4_max_size(int blkbits, int has_huge_files)
2505 {
2506         loff_t res;
2507         loff_t upper_limit = MAX_LFS_FILESIZE;
2508
2509         /* small i_blocks in vfs inode? */
2510         if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
2511                 /*
2512                  * CONFIG_LBDAF is not enabled implies the inode
2513                  * i_block represent total blocks in 512 bytes
2514                  * 32 == size of vfs inode i_blocks * 8
2515                  */
2516                 upper_limit = (1LL << 32) - 1;
2517
2518                 /* total blocks in file system block size */
2519                 upper_limit >>= (blkbits - 9);
2520                 upper_limit <<= blkbits;
2521         }
2522
2523         /*
2524          * 32-bit extent-start container, ee_block. We lower the maxbytes
2525          * by one fs block, so ee_len can cover the extent of maximum file
2526          * size
2527          */
2528         res = (1LL << 32) - 1;
2529         res <<= blkbits;
2530
2531         /* Sanity check against vm- & vfs- imposed limits */
2532         if (res > upper_limit)
2533                 res = upper_limit;
2534
2535         return res;
2536 }
2537
2538 /*
2539  * Maximal bitmap file size.  There is a direct, and {,double-,triple-}indirect
2540  * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks.
2541  * We need to be 1 filesystem block less than the 2^48 sector limit.
2542  */
2543 static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
2544 {
2545         loff_t res = EXT4_NDIR_BLOCKS;
2546         int meta_blocks;
2547         loff_t upper_limit;
2548         /* This is calculated to be the largest file size for a dense, block
2549          * mapped file such that the file's total number of 512-byte sectors,
2550          * including data and all indirect blocks, does not exceed (2^48 - 1).
2551          *
2552          * __u32 i_blocks_lo and _u16 i_blocks_high represent the total
2553          * number of 512-byte sectors of the file.
2554          */
2555
2556         if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
2557                 /*
2558                  * !has_huge_files or CONFIG_LBDAF not enabled implies that
2559                  * the inode i_block field represents total file blocks in
2560                  * 2^32 512-byte sectors == size of vfs inode i_blocks * 8
2561                  */
2562                 upper_limit = (1LL << 32) - 1;
2563
2564                 /* total blocks in file system block size */
2565                 upper_limit >>= (bits - 9);
2566
2567         } else {
2568                 /*
2569                  * We use 48 bit ext4_inode i_blocks
2570                  * With EXT4_HUGE_FILE_FL set the i_blocks
2571                  * represent total number of blocks in
2572                  * file system block size
2573                  */
2574                 upper_limit = (1LL << 48) - 1;
2575
2576         }
2577
2578         /* indirect blocks */
2579         meta_blocks = 1;
2580         /* double indirect blocks */
2581         meta_blocks += 1 + (1LL << (bits-2));
2582         /* tripple indirect blocks */
2583         meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
2584
2585         upper_limit -= meta_blocks;
2586         upper_limit <<= bits;
2587
2588         res += 1LL << (bits-2);
2589         res += 1LL << (2*(bits-2));
2590         res += 1LL << (3*(bits-2));
2591         res <<= bits;
2592         if (res > upper_limit)
2593                 res = upper_limit;
2594
2595         if (res > MAX_LFS_FILESIZE)
2596                 res = MAX_LFS_FILESIZE;
2597
2598         return res;
2599 }
2600
2601 static ext4_fsblk_t descriptor_loc(struct super_block *sb,
2602                                    ext4_fsblk_t logical_sb_block, int nr)
2603 {
2604         struct ext4_sb_info *sbi = EXT4_SB(sb);
2605         ext4_group_t bg, first_meta_bg;
2606         int has_super = 0;
2607
2608         first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
2609
2610         if (!ext4_has_feature_meta_bg(sb) || nr < first_meta_bg)
2611                 return logical_sb_block + nr + 1;
2612         bg = sbi->s_desc_per_block * nr;
2613         if (ext4_bg_has_super(sb, bg))
2614                 has_super = 1;
2615
2616         /*
2617          * If we have a meta_bg fs with 1k blocks, group 0's GDT is at
2618          * block 2, not 1.  If s_first_data_block == 0 (bigalloc is enabled
2619          * on modern mke2fs or blksize > 1k on older mke2fs) then we must
2620          * compensate.
2621          */
2622         if (sb->s_blocksize == 1024 && nr == 0 &&
2623             le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) == 0)
2624                 has_super++;
2625
2626         return (has_super + ext4_group_first_block_no(sb, bg));
2627 }
2628
2629 /**
2630  * ext4_get_stripe_size: Get the stripe size.
2631  * @sbi: In memory super block info
2632  *
2633  * If we have specified it via mount option, then
2634  * use the mount option value. If the value specified at mount time is
2635  * greater than the blocks per group use the super block value.
2636  * If the super block value is greater than blocks per group return 0.
2637  * Allocator needs it be less than blocks per group.
2638  *
2639  */
2640 static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
2641 {
2642         unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride);
2643         unsigned long stripe_width =
2644                         le32_to_cpu(sbi->s_es->s_raid_stripe_width);
2645         int ret;
2646
2647         if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group)
2648                 ret = sbi->s_stripe;
2649         else if (stripe_width <= sbi->s_blocks_per_group)
2650                 ret = stripe_width;
2651         else if (stride <= sbi->s_blocks_per_group)
2652                 ret = stride;
2653         else
2654                 ret = 0;
2655
2656         /*
2657          * If the stripe width is 1, this makes no sense and
2658          * we set it to 0 to turn off stripe handling code.
2659          */
2660         if (ret <= 1)
2661                 ret = 0;
2662
2663         return ret;
2664 }
2665
2666 /*
2667  * Check whether this filesystem can be mounted based on
2668  * the features present and the RDONLY/RDWR mount requested.
2669  * Returns 1 if this filesystem can be mounted as requested,
2670  * 0 if it cannot be.
2671  */
2672 static int ext4_feature_set_ok(struct super_block *sb, int readonly)
2673 {
2674         if (ext4_has_unknown_ext4_incompat_features(sb)) {
2675                 ext4_msg(sb, KERN_ERR,
2676                         "Couldn't mount because of "
2677                         "unsupported optional features (%x)",
2678                         (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
2679                         ~EXT4_FEATURE_INCOMPAT_SUPP));
2680                 return 0;
2681         }
2682
2683         if (readonly)
2684                 return 1;
2685
2686         if (ext4_has_feature_readonly(sb)) {
2687                 ext4_msg(sb, KERN_INFO, "filesystem is read-only");
2688                 sb->s_flags |= MS_RDONLY;
2689                 return 1;
2690         }
2691
2692         /* Check that feature set is OK for a read-write mount */
2693         if (ext4_has_unknown_ext4_ro_compat_features(sb)) {
2694                 ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of "
2695                          "unsupported optional features (%x)",
2696                          (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
2697                                 ~EXT4_FEATURE_RO_COMPAT_SUPP));
2698                 return 0;
2699         }
2700         /*
2701          * Large file size enabled file system can only be mounted
2702          * read-write on 32-bit systems if kernel is built with CONFIG_LBDAF
2703          */
2704         if (ext4_has_feature_huge_file(sb)) {
2705                 if (sizeof(blkcnt_t) < sizeof(u64)) {
2706                         ext4_msg(sb, KERN_ERR, "Filesystem with huge files "
2707                                  "cannot be mounted RDWR without "
2708                                  "CONFIG_LBDAF");
2709                         return 0;
2710                 }
2711         }
2712         if (ext4_has_feature_bigalloc(sb) && !ext4_has_feature_extents(sb)) {
2713                 ext4_msg(sb, KERN_ERR,
2714                          "Can't support bigalloc feature without "
2715                          "extents feature\n");
2716                 return 0;
2717         }
2718
2719 #ifndef CONFIG_QUOTA
2720         if (ext4_has_feature_quota(sb) && !readonly) {
2721                 ext4_msg(sb, KERN_ERR,
2722                          "Filesystem with quota feature cannot be mounted RDWR "
2723                          "without CONFIG_QUOTA");
2724                 return 0;
2725         }
2726         if (ext4_has_feature_project(sb) && !readonly) {
2727                 ext4_msg(sb, KERN_ERR,
2728                          "Filesystem with project quota feature cannot be mounted RDWR "
2729                          "without CONFIG_QUOTA");
2730                 return 0;
2731         }
2732 #endif  /* CONFIG_QUOTA */
2733         return 1;
2734 }
2735
2736 /*
2737  * This function is called once a day if we have errors logged
2738  * on the file system
2739  */
2740 static void print_daily_error_info(unsigned long arg)
2741 {
2742         struct super_block *sb = (struct super_block *) arg;
2743         struct ext4_sb_info *sbi;
2744         struct ext4_super_block *es;
2745
2746         sbi = EXT4_SB(sb);
2747         es = sbi->s_es;
2748
2749         if (es->s_error_count)
2750                 /* fsck newer than v1.41.13 is needed to clean this condition. */
2751                 ext4_msg(sb, KERN_NOTICE, "error count since last fsck: %u",
2752                          le32_to_cpu(es->s_error_count));
2753         if (es->s_first_error_time) {
2754                 printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %u: %.*s:%d",
2755                        sb->s_id, le32_to_cpu(es->s_first_error_time),
2756                        (int) sizeof(es->s_first_error_func),
2757                        es->s_first_error_func,
2758                        le32_to_cpu(es->s_first_error_line));
2759                 if (es->s_first_error_ino)
2760                         printk(KERN_CONT ": inode %u",
2761                                le32_to_cpu(es->s_first_error_ino));
2762                 if (es->s_first_error_block)
2763                         printk(KERN_CONT ": block %llu", (unsigned long long)
2764                                le64_to_cpu(es->s_first_error_block));
2765                 printk(KERN_CONT "\n");
2766         }
2767         if (es->s_last_error_time) {
2768                 printk(KERN_NOTICE "EXT4-fs (%s): last error at time %u: %.*s:%d",
2769                        sb->s_id, le32_to_cpu(es->s_last_error_time),
2770                        (int) sizeof(es->s_last_error_func),
2771                        es->s_last_error_func,
2772                        le32_to_cpu(es->s_last_error_line));
2773                 if (es->s_last_error_ino)
2774                         printk(KERN_CONT ": inode %u",
2775                                le32_to_cpu(es->s_last_error_ino));
2776                 if (es->s_last_error_block)
2777                         printk(KERN_CONT ": block %llu", (unsigned long long)
2778                                le64_to_cpu(es->s_last_error_block));
2779                 printk(KERN_CONT "\n");
2780         }
2781         mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ);  /* Once a day */
2782 }
2783
2784 /* Find next suitable group and run ext4_init_inode_table */
2785 static int ext4_run_li_request(struct ext4_li_request *elr)
2786 {
2787         struct ext4_group_desc *gdp = NULL;
2788         ext4_group_t group, ngroups;
2789         struct super_block *sb;
2790         unsigned long timeout = 0;
2791         int ret = 0;
2792
2793         sb = elr->lr_super;
2794         ngroups = EXT4_SB(sb)->s_groups_count;
2795
2796         for (group = elr->lr_next_group; group < ngroups; group++) {
2797                 gdp = ext4_get_group_desc(sb, group, NULL);
2798                 if (!gdp) {
2799                         ret = 1;
2800                         break;
2801                 }
2802
2803                 if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
2804                         break;
2805         }
2806
2807         if (group >= ngroups)
2808                 ret = 1;
2809
2810         if (!ret) {
2811                 timeout = jiffies;
2812                 ret = ext4_init_inode_table(sb, group,
2813                                             elr->lr_timeout ? 0 : 1);
2814                 if (elr->lr_timeout == 0) {
2815                         timeout = (jiffies - timeout) *
2816                                   elr->lr_sbi->s_li_wait_mult;
2817                         elr->lr_timeout = timeout;
2818                 }
2819                 elr->lr_next_sched = jiffies + elr->lr_timeout;
2820                 elr->lr_next_group = group + 1;
2821         }
2822         return ret;
2823 }
2824
2825 /*
2826  * Remove lr_request from the list_request and free the
2827  * request structure. Should be called with li_list_mtx held
2828  */
2829 static void ext4_remove_li_request(struct ext4_li_request *elr)
2830 {
2831         struct ext4_sb_info *sbi;
2832
2833         if (!elr)
2834                 return;
2835
2836         sbi = elr->lr_sbi;
2837
2838         list_del(&elr->lr_request);
2839         sbi->s_li_request = NULL;
2840         kfree(elr);
2841 }
2842
2843 static void ext4_unregister_li_request(struct super_block *sb)
2844 {
2845         mutex_lock(&ext4_li_mtx);
2846         if (!ext4_li_info) {
2847                 mutex_unlock(&ext4_li_mtx);
2848                 return;
2849         }
2850
2851         mutex_lock(&ext4_li_info->li_list_mtx);
2852         ext4_remove_li_request(EXT4_SB(sb)->s_li_request);
2853         mutex_unlock(&ext4_li_info->li_list_mtx);
2854         mutex_unlock(&ext4_li_mtx);
2855 }
2856
2857 static struct task_struct *ext4_lazyinit_task;
2858
2859 /*
2860  * This is the function where ext4lazyinit thread lives. It walks
2861  * through the request list searching for next scheduled filesystem.
2862  * When such a fs is found, run the lazy initialization request
2863  * (ext4_rn_li_request) and keep track of the time spend in this
2864  * function. Based on that time we compute next schedule time of
2865  * the request. When walking through the list is complete, compute
2866  * next waking time and put itself into sleep.
2867  */
2868 static int ext4_lazyinit_thread(void *arg)
2869 {
2870         struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg;
2871         struct list_head *pos, *n;
2872         struct ext4_li_request *elr;
2873         unsigned long next_wakeup, cur;
2874
2875         BUG_ON(NULL == eli);
2876
2877 cont_thread:
2878         while (true) {
2879                 next_wakeup = MAX_JIFFY_OFFSET;
2880
2881                 mutex_lock(&eli->li_list_mtx);
2882                 if (list_empty(&eli->li_request_list)) {
2883                         mutex_unlock(&eli->li_list_mtx);
2884                         goto exit_thread;
2885                 }
2886                 list_for_each_safe(pos, n, &eli->li_request_list) {
2887                         int err = 0;
2888                         int progress = 0;
2889                         elr = list_entry(pos, struct ext4_li_request,
2890                                          lr_request);
2891
2892                         if (time_before(jiffies, elr->lr_next_sched)) {
2893                                 if (time_before(elr->lr_next_sched, next_wakeup))
2894                                         next_wakeup = elr->lr_next_sched;
2895                                 continue;
2896                         }
2897                         if (down_read_trylock(&elr->lr_super->s_umount)) {
2898                                 if (sb_start_write_trylock(elr->lr_super)) {
2899                                         progress = 1;
2900                                         /*
2901                                          * We hold sb->s_umount, sb can not
2902                                          * be removed from the list, it is
2903                                          * now safe to drop li_list_mtx
2904                                          */
2905                                         mutex_unlock(&eli->li_list_mtx);
2906                                         err = ext4_run_li_request(elr);
2907                                         sb_end_write(elr->lr_super);
2908                                         mutex_lock(&eli->li_list_mtx);
2909                                         n = pos->next;
2910                                 }
2911                                 up_read((&elr->lr_super->s_umount));
2912                         }
2913                         /* error, remove the lazy_init job */
2914                         if (err) {
2915                                 ext4_remove_li_request(elr);
2916                                 continue;
2917                         }
2918                         if (!progress) {
2919                                 elr->lr_next_sched = jiffies +
2920                                         (prandom_u32()
2921                                          % (EXT4_DEF_LI_MAX_START_DELAY * HZ));
2922                         }
2923                         if (time_before(elr->lr_next_sched, next_wakeup))
2924                                 next_wakeup = elr->lr_next_sched;
2925                 }
2926                 mutex_unlock(&eli->li_list_mtx);
2927
2928                 try_to_freeze();
2929
2930                 cur = jiffies;
2931                 if ((time_after_eq(cur, next_wakeup)) ||
2932                     (MAX_JIFFY_OFFSET == next_wakeup)) {
2933                         cond_resched();
2934                         continue;
2935                 }
2936
2937                 schedule_timeout_interruptible(next_wakeup - cur);
2938
2939                 if (kthread_should_stop()) {
2940                         ext4_clear_request_list();
2941                         goto exit_thread;
2942                 }
2943         }
2944
2945 exit_thread:
2946         /*
2947          * It looks like the request list is empty, but we need
2948          * to check it under the li_list_mtx lock, to prevent any
2949          * additions into it, and of course we should lock ext4_li_mtx
2950          * to atomically free the list and ext4_li_info, because at
2951          * this point another ext4 filesystem could be registering
2952          * new one.
2953          */
2954         mutex_lock(&ext4_li_mtx);
2955         mutex_lock(&eli->li_list_mtx);
2956         if (!list_empty(&eli->li_request_list)) {
2957                 mutex_unlock(&eli->li_list_mtx);
2958                 mutex_unlock(&ext4_li_mtx);
2959                 goto cont_thread;
2960         }
2961         mutex_unlock(&eli->li_list_mtx);
2962         kfree(ext4_li_info);
2963         ext4_li_info = NULL;
2964         mutex_unlock(&ext4_li_mtx);
2965
2966         return 0;
2967 }
2968
2969 static void ext4_clear_request_list(void)
2970 {
2971         struct list_head *pos, *n;
2972         struct ext4_li_request *elr;
2973
2974         mutex_lock(&ext4_li_info->li_list_mtx);
2975         list_for_each_safe(pos, n, &ext4_li_info->li_request_list) {
2976                 elr = list_entry(pos, struct ext4_li_request,
2977                                  lr_request);
2978                 ext4_remove_li_request(elr);
2979         }
2980         mutex_unlock(&ext4_li_info->li_list_mtx);
2981 }
2982
2983 static int ext4_run_lazyinit_thread(void)
2984 {
2985         ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread,
2986                                          ext4_li_info, "ext4lazyinit");
2987         if (IS_ERR(ext4_lazyinit_task)) {
2988                 int err = PTR_ERR(ext4_lazyinit_task);
2989                 ext4_clear_request_list();
2990                 kfree(ext4_li_info);
2991                 ext4_li_info = NULL;
2992                 printk(KERN_CRIT "EXT4-fs: error %d creating inode table "
2993                                  "initialization thread\n",
2994                                  err);
2995                 return err;
2996         }
2997         ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING;
2998         return 0;
2999 }
3000
3001 /*
3002  * Check whether it make sense to run itable init. thread or not.
3003  * If there is at least one uninitialized inode table, return
3004  * corresponding group number, else the loop goes through all
3005  * groups and return total number of groups.
3006  */
3007 static ext4_group_t ext4_has_uninit_itable(struct super_block *sb)
3008 {
3009         ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count;
3010         struct ext4_group_desc *gdp = NULL;
3011
3012         for (group = 0; group < ngroups; group++) {
3013                 gdp = ext4_get_group_desc(sb, group, NULL);
3014                 if (!gdp)
3015                         continue;
3016
3017                 if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
3018                         break;
3019         }
3020
3021         return group;
3022 }
3023
3024 static int ext4_li_info_new(void)
3025 {
3026         struct ext4_lazy_init *eli = NULL;
3027
3028         eli = kzalloc(sizeof(*eli), GFP_KERNEL);
3029         if (!eli)
3030                 return -ENOMEM;
3031
3032         INIT_LIST_HEAD(&eli->li_request_list);
3033         mutex_init(&eli->li_list_mtx);
3034
3035         eli->li_state |= EXT4_LAZYINIT_QUIT;
3036
3037         ext4_li_info = eli;
3038
3039         return 0;
3040 }
3041
3042 static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
3043                                             ext4_group_t start)
3044 {
3045         struct ext4_sb_info *sbi = EXT4_SB(sb);
3046         struct ext4_li_request *elr;
3047
3048         elr = kzalloc(sizeof(*elr), GFP_KERNEL);
3049         if (!elr)
3050                 return NULL;
3051
3052         elr->lr_super = sb;
3053         elr->lr_sbi = sbi;
3054         elr->lr_next_group = start;
3055
3056         /*
3057          * Randomize first schedule time of the request to
3058          * spread the inode table initialization requests
3059          * better.
3060          */
3061         elr->lr_next_sched = jiffies + (prandom_u32() %
3062                                 (EXT4_DEF_LI_MAX_START_DELAY * HZ));
3063         return elr;
3064 }
3065
3066 int ext4_register_li_request(struct super_block *sb,
3067                              ext4_group_t first_not_zeroed)
3068 {
3069         struct ext4_sb_info *sbi = EXT4_SB(sb);
3070         struct ext4_li_request *elr = NULL;
3071         ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
3072         int ret = 0;
3073
3074         mutex_lock(&ext4_li_mtx);
3075         if (sbi->s_li_request != NULL) {
3076                 /*
3077                  * Reset timeout so it can be computed again, because
3078                  * s_li_wait_mult might have changed.
3079                  */
3080                 sbi->s_li_request->lr_timeout = 0;
3081                 goto out;
3082         }
3083
3084         if (first_not_zeroed == ngroups ||
3085             (sb->s_flags & MS_RDONLY) ||
3086             !test_opt(sb, INIT_INODE_TABLE))
3087                 goto out;
3088
3089         elr = ext4_li_request_new(sb, first_not_zeroed);
3090         if (!elr) {
3091                 ret = -ENOMEM;
3092                 goto out;
3093         }
3094
3095         if (NULL == ext4_li_info) {
3096                 ret = ext4_li_info_new();
3097                 if (ret)
3098                         goto out;
3099         }
3100
3101         mutex_lock(&ext4_li_info->li_list_mtx);
3102         list_add(&elr->lr_request, &ext4_li_info->li_request_list);
3103         mutex_unlock(&ext4_li_info->li_list_mtx);
3104
3105         sbi->s_li_request = elr;
3106         /*
3107          * set elr to NULL here since it has been inserted to
3108          * the request_list and the removal and free of it is
3109          * handled by ext4_clear_request_list from now on.
3110          */
3111         elr = NULL;
3112
3113         if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {
3114                 ret = ext4_run_lazyinit_thread();
3115                 if (ret)
3116                         goto out;
3117         }
3118 out:
3119         mutex_unlock(&ext4_li_mtx);
3120         if (ret)
3121                 kfree(elr);
3122         return ret;
3123 }
3124
3125 /*
3126  * We do not need to lock anything since this is called on
3127  * module unload.
3128  */
3129 static void ext4_destroy_lazyinit_thread(void)
3130 {
3131         /*
3132          * If thread exited earlier
3133          * there's nothing to be done.
3134          */
3135         if (!ext4_li_info || !ext4_lazyinit_task)
3136                 return;
3137
3138         kthread_stop(ext4_lazyinit_task);
3139 }
3140
3141 static int set_journal_csum_feature_set(struct super_block *sb)
3142 {
3143         int ret = 1;
3144         int compat, incompat;
3145         struct ext4_sb_info *sbi = EXT4_SB(sb);
3146
3147         if (ext4_has_metadata_csum(sb)) {
3148                 /* journal checksum v3 */
3149                 compat = 0;
3150                 incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3;
3151         } else {
3152                 /* journal checksum v1 */
3153                 compat = JBD2_FEATURE_COMPAT_CHECKSUM;
3154                 incompat = 0;
3155         }
3156
3157         jbd2_journal_clear_features(sbi->s_journal,
3158                         JBD2_FEATURE_COMPAT_CHECKSUM, 0,
3159                         JBD2_FEATURE_INCOMPAT_CSUM_V3 |
3160                         JBD2_FEATURE_INCOMPAT_CSUM_V2);
3161         if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
3162                 ret = jbd2_journal_set_features(sbi->s_journal,
3163                                 compat, 0,
3164                                 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT |
3165                                 incompat);
3166         } else if (test_opt(sb, JOURNAL_CHECKSUM)) {
3167                 ret = jbd2_journal_set_features(sbi->s_journal,
3168                                 compat, 0,
3169                                 incompat);
3170                 jbd2_journal_clear_features(sbi->s_journal, 0, 0,
3171                                 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
3172         } else {
3173                 jbd2_journal_clear_features(sbi->s_journal, 0, 0,
3174                                 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
3175         }
3176
3177         return ret;
3178 }
3179
3180 /*
3181  * Note: calculating the overhead so we can be compatible with
3182  * historical BSD practice is quite difficult in the face of
3183  * clusters/bigalloc.  This is because multiple metadata blocks from
3184  * different block group can end up in the same allocation cluster.
3185  * Calculating the exact overhead in the face of clustered allocation
3186  * requires either O(all block bitmaps) in memory or O(number of block
3187  * groups**2) in time.  We will still calculate the superblock for
3188  * older file systems --- and if we come across with a bigalloc file
3189  * system with zero in s_overhead_clusters the estimate will be close to
3190  * correct especially for very large cluster sizes --- but for newer
3191  * file systems, it's better to calculate this figure once at mkfs
3192  * time, and store it in the superblock.  If the superblock value is
3193  * present (even for non-bigalloc file systems), we will use it.
3194  */
3195 static int count_overhead(struct super_block *sb, ext4_group_t grp,
3196                           char *buf)
3197 {
3198         struct ext4_sb_info     *sbi = EXT4_SB(sb);
3199         struct ext4_group_desc  *gdp;
3200         ext4_fsblk_t            first_block, last_block, b;
3201         ext4_group_t            i, ngroups = ext4_get_groups_count(sb);
3202         int                     s, j, count = 0;
3203
3204         if (!ext4_has_feature_bigalloc(sb))
3205                 return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) +
3206                         sbi->s_itb_per_group + 2);
3207
3208         first_block = le32_to_cpu(sbi->s_es->s_first_data_block) +
3209                 (grp * EXT4_BLOCKS_PER_GROUP(sb));
3210         last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1;
3211         for (i = 0; i < ngroups; i++) {
3212                 gdp = ext4_get_group_desc(sb, i, NULL);
3213                 b = ext4_block_bitmap(sb, gdp);
3214                 if (b >= first_block && b <= last_block) {
3215                         ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
3216                         count++;
3217                 }
3218                 b = ext4_inode_bitmap(sb, gdp);
3219                 if (b >= first_block && b <= last_block) {
3220                         ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
3221                         count++;
3222                 }
3223                 b = ext4_inode_table(sb, gdp);
3224                 if (b >= first_block && b + sbi->s_itb_per_group <= last_block)
3225                         for (j = 0; j < sbi->s_itb_per_group; j++, b++) {
3226                                 int c = EXT4_B2C(sbi, b - first_block);
3227                                 ext4_set_bit(c, buf);
3228                                 count++;
3229                         }
3230                 if (i != grp)
3231                         continue;
3232                 s = 0;
3233                 if (ext4_bg_has_super(sb, grp)) {
3234                         ext4_set_bit(s++, buf);
3235                         count++;
3236                 }
3237                 j = ext4_bg_num_gdb(sb, grp);
3238                 if (s + j > EXT4_BLOCKS_PER_GROUP(sb)) {
3239                         ext4_error(sb, "Invalid number of block group "
3240                                    "descriptor blocks: %d", j);
3241                         j = EXT4_BLOCKS_PER_GROUP(sb) - s;
3242                 }
3243                 count += j;
3244                 for (; j > 0; j--)
3245                         ext4_set_bit(EXT4_B2C(sbi, s++), buf);
3246         }
3247         if (!count)
3248                 return 0;
3249         return EXT4_CLUSTERS_PER_GROUP(sb) -
3250                 ext4_count_free(buf, EXT4_CLUSTERS_PER_GROUP(sb) / 8);
3251 }
3252
3253 /*
3254  * Compute the overhead and stash it in sbi->s_overhead
3255  */
3256 int ext4_calculate_overhead(struct super_block *sb)
3257 {
3258         struct ext4_sb_info *sbi = EXT4_SB(sb);
3259         struct ext4_super_block *es = sbi->s_es;
3260         struct inode *j_inode;
3261         unsigned int j_blocks, j_inum = le32_to_cpu(es->s_journal_inum);
3262         ext4_group_t i, ngroups = ext4_get_groups_count(sb);
3263         ext4_fsblk_t overhead = 0;
3264         char *buf = (char *) get_zeroed_page(GFP_NOFS);
3265
3266         if (!buf)
3267                 return -ENOMEM;
3268
3269         /*
3270          * Compute the overhead (FS structures).  This is constant
3271          * for a given filesystem unless the number of block groups
3272          * changes so we cache the previous value until it does.
3273          */
3274
3275         /*
3276          * All of the blocks before first_data_block are overhead
3277          */
3278         overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block));
3279
3280         /*
3281          * Add the overhead found in each block group
3282          */
3283         for (i = 0; i < ngroups; i++) {
3284                 int blks;
3285
3286                 blks = count_overhead(sb, i, buf);
3287                 overhead += blks;
3288                 if (blks)
3289                         memset(buf, 0, PAGE_SIZE);
3290                 cond_resched();
3291         }
3292
3293         /*
3294          * Add the internal journal blocks whether the journal has been
3295          * loaded or not
3296          */
3297         if (sbi->s_journal && !sbi->journal_bdev)
3298                 overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen);
3299         else if (ext4_has_feature_journal(sb) && !sbi->s_journal) {
3300                 j_inode = ext4_get_journal_inode(sb, j_inum);
3301                 if (j_inode) {
3302                         j_blocks = j_inode->i_size >> sb->s_blocksize_bits;
3303                         overhead += EXT4_NUM_B2C(sbi, j_blocks);
3304                         iput(j_inode);
3305                 } else {
3306                         ext4_msg(sb, KERN_ERR, "can't get journal size");
3307                 }
3308         }
3309         sbi->s_overhead = overhead;
3310         smp_wmb();
3311         free_page((unsigned long) buf);
3312         return 0;
3313 }
3314
3315 static void ext4_set_resv_clusters(struct super_block *sb)
3316 {
3317         ext4_fsblk_t resv_clusters;
3318         struct ext4_sb_info *sbi = EXT4_SB(sb);
3319
3320         /*
3321          * There's no need to reserve anything when we aren't using extents.
3322          * The space estimates are exact, there are no unwritten extents,
3323          * hole punching doesn't need new metadata... This is needed especially
3324          * to keep ext2/3 backward compatibility.
3325          */
3326         if (!ext4_has_feature_extents(sb))
3327                 return;
3328         /*
3329          * By default we reserve 2% or 4096 clusters, whichever is smaller.
3330          * This should cover the situations where we can not afford to run
3331          * out of space like for example punch hole, or converting
3332          * unwritten extents in delalloc path. In most cases such
3333          * allocation would require 1, or 2 blocks, higher numbers are
3334          * very rare.
3335          */
3336         resv_clusters = (ext4_blocks_count(sbi->s_es) >>
3337                          sbi->s_cluster_bits);
3338
3339         do_div(resv_clusters, 50);
3340         resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096);
3341
3342         atomic64_set(&sbi->s_resv_clusters, resv_clusters);
3343 }
3344
3345 static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3346 {
3347         char *orig_data = kstrdup(data, GFP_KERNEL);
3348         struct buffer_head *bh;
3349         struct ext4_super_block *es = NULL;
3350         struct ext4_sb_info *sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
3351         ext4_fsblk_t block;
3352         ext4_fsblk_t sb_block = get_sb_block(&data);
3353         ext4_fsblk_t logical_sb_block;
3354         unsigned long offset = 0;
3355         unsigned long journal_devnum = 0;
3356         unsigned long def_mount_opts;
3357         struct inode *root;
3358         const char *descr;
3359         int ret = -ENOMEM;
3360         int blocksize, clustersize;
3361         unsigned int db_count;
3362         unsigned int i;
3363         int needs_recovery, has_huge_files, has_bigalloc;
3364         __u64 blocks_count;
3365         int err = 0;
3366         unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
3367         ext4_group_t first_not_zeroed;
3368
3369         if ((data && !orig_data) || !sbi)
3370                 goto out_free_base;
3371
3372         if (!userns_mounts && !capable(CAP_SYS_ADMIN)) {
3373                 ret = -EPERM;
3374                 goto out_free_base;
3375         }
3376
3377         sbi->s_blockgroup_lock =
3378                 kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
3379         if (!sbi->s_blockgroup_lock)
3380                 goto out_free_base;
3381
3382         sb->s_fs_info = sbi;
3383         sbi->s_sb = sb;
3384         sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
3385         sbi->s_sb_block = sb_block;
3386         if (sb->s_bdev->bd_part)
3387                 sbi->s_sectors_written_start =
3388                         part_stat_read(sb->s_bdev->bd_part, sectors[1]);
3389
3390         /* Cleanup superblock name */
3391         strreplace(sb->s_id, '/', '!');
3392
3393         /* -EINVAL is default */
3394         ret = -EINVAL;
3395         blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
3396         if (!blocksize) {
3397                 ext4_msg(sb, KERN_ERR, "unable to set blocksize");
3398                 goto out_fail;
3399         }
3400
3401         /*
3402          * The ext4 superblock will not be buffer aligned for other than 1kB
3403          * block sizes.  We need to calculate the offset from buffer start.
3404          */
3405         if (blocksize != EXT4_MIN_BLOCK_SIZE) {
3406                 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
3407                 offset = do_div(logical_sb_block, blocksize);
3408         } else {
3409                 logical_sb_block = sb_block;
3410         }
3411
3412         if (!(bh = sb_bread_unmovable(sb, logical_sb_block))) {
3413                 ext4_msg(sb, KERN_ERR, "unable to read superblock");
3414                 goto out_fail;
3415         }
3416         /*
3417          * Note: s_es must be initialized as soon as possible because
3418          *       some ext4 macro-instructions depend on its value
3419          */
3420         es = (struct ext4_super_block *) (bh->b_data + offset);
3421         sbi->s_es = es;
3422         sb->s_magic = le16_to_cpu(es->s_magic);
3423         if (sb->s_magic != EXT4_SUPER_MAGIC)
3424                 goto cantfind_ext4;
3425         sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written);
3426
3427         /* Warn if metadata_csum and gdt_csum are both set. */
3428         if (ext4_has_feature_metadata_csum(sb) &&
3429             ext4_has_feature_gdt_csum(sb))
3430                 ext4_warning(sb, "metadata_csum and uninit_bg are "
3431                              "redundant flags; please run fsck.");
3432
3433         /* Check for a known checksum algorithm */
3434         if (!ext4_verify_csum_type(sb, es)) {
3435                 ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
3436                          "unknown checksum algorithm.");
3437                 silent = 1;
3438                 goto cantfind_ext4;
3439         }
3440
3441         /* Load the checksum driver */
3442         if (ext4_has_feature_metadata_csum(sb)) {
3443                 sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
3444                 if (IS_ERR(sbi->s_chksum_driver)) {
3445                         ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver.");
3446                         ret = PTR_ERR(sbi->s_chksum_driver);
3447                         sbi->s_chksum_driver = NULL;
3448                         goto failed_mount;
3449                 }
3450         }
3451
3452         /* Check superblock checksum */
3453         if (!ext4_superblock_csum_verify(sb, es)) {
3454                 ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
3455                          "invalid superblock checksum.  Run e2fsck?");
3456                 silent = 1;
3457                 ret = -EFSBADCRC;
3458                 goto cantfind_ext4;
3459         }
3460
3461         /* Precompute checksum seed for all metadata */
3462         if (ext4_has_feature_csum_seed(sb))
3463                 sbi->s_csum_seed = le32_to_cpu(es->s_checksum_seed);
3464         else if (ext4_has_metadata_csum(sb))
3465                 sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid,
3466                                                sizeof(es->s_uuid));
3467
3468         /* Set defaults before we parse the mount options */
3469         def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
3470         set_opt(sb, INIT_INODE_TABLE);
3471         if (def_mount_opts & EXT4_DEFM_DEBUG)
3472                 set_opt(sb, DEBUG);
3473         if (def_mount_opts & EXT4_DEFM_BSDGROUPS)
3474                 set_opt(sb, GRPID);
3475         if (def_mount_opts & EXT4_DEFM_UID16)
3476                 set_opt(sb, NO_UID32);
3477         /* xattr user namespace & acls are now defaulted on */
3478         set_opt(sb, XATTR_USER);
3479 #ifdef CONFIG_EXT4_FS_POSIX_ACL
3480         set_opt(sb, POSIX_ACL);
3481 #endif
3482         /* don't forget to enable journal_csum when metadata_csum is enabled. */
3483         if (ext4_has_metadata_csum(sb))
3484                 set_opt(sb, JOURNAL_CHECKSUM);
3485
3486         if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
3487                 set_opt(sb, JOURNAL_DATA);
3488         else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
3489                 set_opt(sb, ORDERED_DATA);
3490         else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
3491                 set_opt(sb, WRITEBACK_DATA);
3492
3493         if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC) {
3494                 if (!capable(CAP_SYS_ADMIN))
3495                         goto failed_mount;
3496                 set_opt(sb, ERRORS_PANIC);
3497         } else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE) {
3498                 set_opt(sb, ERRORS_CONT);
3499         } else {
3500                 set_opt(sb, ERRORS_RO);
3501         }
3502         /* block_validity enabled by default; disable with noblock_validity */
3503         set_opt(sb, BLOCK_VALIDITY);
3504         if (def_mount_opts & EXT4_DEFM_DISCARD)
3505                 set_opt(sb, DISCARD);
3506
3507         sbi->s_resuid = make_kuid(sb->s_user_ns, le16_to_cpu(es->s_def_resuid));
3508         if (!uid_valid(sbi->s_resuid))
3509                 sbi->s_resuid = make_kuid(sb->s_user_ns, EXT4_DEF_RESUID);
3510         sbi->s_resgid = make_kgid(sb->s_user_ns, le16_to_cpu(es->s_def_resgid));
3511         if (!gid_valid(sbi->s_resgid))
3512                 sbi->s_resgid = make_kgid(sb->s_user_ns, EXT4_DEF_RESGID);
3513         sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
3514         sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
3515         sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
3516
3517         if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0)
3518                 set_opt(sb, BARRIER);
3519
3520         /*
3521          * enable delayed allocation by default
3522          * Use -o nodelalloc to turn it off
3523          */
3524         if (!IS_EXT3_SB(sb) && !IS_EXT2_SB(sb) &&
3525             ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
3526                 set_opt(sb, DELALLOC);
3527
3528         /*
3529          * set default s_li_wait_mult for lazyinit, for the case there is
3530          * no mount option specified.
3531          */
3532         sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
3533
3534         if (sbi->s_es->s_mount_opts[0]) {
3535                 char *s_mount_opts = kstrndup(sbi->s_es->s_mount_opts,
3536                                               sizeof(sbi->s_es->s_mount_opts),
3537                                               GFP_KERNEL);
3538                 if (!s_mount_opts)
3539                         goto failed_mount;
3540                 if (!parse_options(s_mount_opts, sb, &journal_devnum,
3541                                    &journal_ioprio, 0)) {
3542                         ext4_msg(sb, KERN_WARNING,
3543                                  "failed to parse options in superblock: %s",
3544                                  s_mount_opts);
3545                 }
3546                 kfree(s_mount_opts);
3547         }
3548         sbi->s_def_mount_opt = sbi->s_mount_opt;
3549         if (!parse_options((char *) data, sb, &journal_devnum,
3550                            &journal_ioprio, 0))
3551                 goto failed_mount;
3552
3553         if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
3554                 printk_once(KERN_WARNING "EXT4-fs: Warning: mounting "
3555                             "with data=journal disables delayed "
3556                             "allocation and O_DIRECT support!\n");
3557                 if (test_opt2(sb, EXPLICIT_DELALLOC)) {
3558                         ext4_msg(sb, KERN_ERR, "can't mount with "
3559                                  "both data=journal and delalloc");
3560                         goto failed_mount;
3561                 }
3562                 if (test_opt(sb, DIOREAD_NOLOCK)) {
3563                         ext4_msg(sb, KERN_ERR, "can't mount with "
3564                                  "both data=journal and dioread_nolock");
3565                         goto failed_mount;
3566                 }
3567                 if (test_opt(sb, DAX)) {
3568                         ext4_msg(sb, KERN_ERR, "can't mount with "
3569                                  "both data=journal and dax");
3570                         goto failed_mount;
3571                 }
3572                 if (ext4_has_feature_encrypt(sb)) {
3573                         ext4_msg(sb, KERN_WARNING,
3574                                  "encrypted files will use data=ordered "
3575                                  "instead of data journaling mode");
3576                 }
3577                 if (test_opt(sb, DELALLOC))
3578                         clear_opt(sb, DELALLOC);
3579         } else {
3580                 sb->s_iflags |= SB_I_CGROUPWB;
3581         }
3582
3583         sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
3584                 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
3585
3586         if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
3587             (ext4_has_compat_features(sb) ||
3588              ext4_has_ro_compat_features(sb) ||
3589              ext4_has_incompat_features(sb)))
3590                 ext4_msg(sb, KERN_WARNING,
3591                        "feature flags set on rev 0 fs, "
3592                        "running e2fsck is recommended");
3593
3594         if (es->s_creator_os == cpu_to_le32(EXT4_OS_HURD)) {
3595                 set_opt2(sb, HURD_COMPAT);
3596                 if (ext4_has_feature_64bit(sb)) {
3597                         ext4_msg(sb, KERN_ERR,
3598                                  "The Hurd can't support 64-bit file systems");
3599                         goto failed_mount;
3600                 }
3601         }
3602
3603         if (IS_EXT2_SB(sb)) {
3604                 if (ext2_feature_set_ok(sb))
3605                         ext4_msg(sb, KERN_INFO, "mounting ext2 file system "
3606                                  "using the ext4 subsystem");
3607                 else {
3608                         ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due "
3609                                  "to feature incompatibilities");
3610                         goto failed_mount;
3611                 }
3612         }
3613
3614         if (IS_EXT3_SB(sb)) {
3615                 if (ext3_feature_set_ok(sb))
3616                         ext4_msg(sb, KERN_INFO, "mounting ext3 file system "
3617                                  "using the ext4 subsystem");
3618                 else {
3619                         ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due "
3620                                  "to feature incompatibilities");
3621                         goto failed_mount;
3622                 }
3623         }
3624
3625         /*
3626          * Check feature flags regardless of the revision level, since we
3627          * previously didn't change the revision level when setting the flags,
3628          * so there is a chance incompat flags are set on a rev 0 filesystem.
3629          */
3630         if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY)))
3631                 goto failed_mount;
3632
3633         blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
3634         if (blocksize < EXT4_MIN_BLOCK_SIZE ||
3635             blocksize > EXT4_MAX_BLOCK_SIZE) {
3636                 ext4_msg(sb, KERN_ERR,
3637                        "Unsupported filesystem blocksize %d (%d log_block_size)",
3638                          blocksize, le32_to_cpu(es->s_log_block_size));
3639                 goto failed_mount;
3640         }
3641         if (le32_to_cpu(es->s_log_block_size) >
3642             (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
3643                 ext4_msg(sb, KERN_ERR,
3644                          "Invalid log block size: %u",
3645                          le32_to_cpu(es->s_log_block_size));
3646                 goto failed_mount;
3647         }
3648
3649         if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) {
3650                 ext4_msg(sb, KERN_ERR,
3651                          "Number of reserved GDT blocks insanely large: %d",
3652                          le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks));
3653                 goto failed_mount;
3654         }
3655
3656         if (sbi->s_mount_opt & EXT4_MOUNT_DAX) {
3657                 err = bdev_dax_supported(sb, blocksize);
3658                 if (err)
3659                         goto failed_mount;
3660         }
3661
3662         if (ext4_has_feature_encrypt(sb) && es->s_encryption_level) {
3663                 ext4_msg(sb, KERN_ERR, "Unsupported encryption level %d",
3664                          es->s_encryption_level);
3665                 goto failed_mount;
3666         }
3667
3668         if (sb->s_blocksize != blocksize) {
3669                 /* Validate the filesystem blocksize */
3670                 if (!sb_set_blocksize(sb, blocksize)) {
3671                         ext4_msg(sb, KERN_ERR, "bad block size %d",
3672                                         blocksize);
3673                         goto failed_mount;
3674                 }
3675
3676                 brelse(bh);
3677                 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
3678                 offset = do_div(logical_sb_block, blocksize);
3679                 bh = sb_bread_unmovable(sb, logical_sb_block);
3680                 if (!bh) {
3681                         ext4_msg(sb, KERN_ERR,
3682                                "Can't read superblock on 2nd try");
3683                         goto failed_mount;
3684                 }
3685                 es = (struct ext4_super_block *)(bh->b_data + offset);
3686                 sbi->s_es = es;
3687                 if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
3688                         ext4_msg(sb, KERN_ERR,
3689                                "Magic mismatch, very weird!");
3690                         goto failed_mount;
3691                 }
3692         }
3693
3694         has_huge_files = ext4_has_feature_huge_file(sb);
3695         sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
3696                                                       has_huge_files);
3697         sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
3698
3699         if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
3700                 sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
3701                 sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
3702         } else {
3703                 sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
3704                 sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
3705                 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
3706                     (!is_power_of_2(sbi->s_inode_size)) ||
3707                     (sbi->s_inode_size > blocksize)) {
3708                         ext4_msg(sb, KERN_ERR,
3709                                "unsupported inode size: %d",
3710                                sbi->s_inode_size);
3711                         goto failed_mount;
3712                 }
3713                 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
3714                         sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2);
3715         }
3716
3717         sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
3718         if (ext4_has_feature_64bit(sb)) {
3719                 if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
3720                     sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
3721                     !is_power_of_2(sbi->s_desc_size)) {
3722                         ext4_msg(sb, KERN_ERR,
3723                                "unsupported descriptor size %lu",
3724                                sbi->s_desc_size);
3725                         goto failed_mount;
3726                 }
3727         } else
3728                 sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
3729
3730         sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
3731         sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
3732
3733         sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb);
3734         if (sbi->s_inodes_per_block == 0)
3735                 goto cantfind_ext4;
3736         if (sbi->s_inodes_per_group < sbi->s_inodes_per_block ||
3737             sbi->s_inodes_per_group > blocksize * 8) {
3738                 ext4_msg(sb, KERN_ERR, "invalid inodes per group: %lu\n",
3739                          sbi->s_blocks_per_group);
3740                 goto failed_mount;
3741         }
3742         sbi->s_itb_per_group = sbi->s_inodes_per_group /
3743                                         sbi->s_inodes_per_block;
3744         sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb);
3745         sbi->s_sbh = bh;
3746         sbi->s_mount_state = le16_to_cpu(es->s_state);
3747         sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
3748         sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
3749
3750         for (i = 0; i < 4; i++)
3751                 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
3752         sbi->s_def_hash_version = es->s_def_hash_version;
3753         if (ext4_has_feature_dir_index(sb)) {
3754                 i = le32_to_cpu(es->s_flags);
3755                 if (i & EXT2_FLAGS_UNSIGNED_HASH)
3756                         sbi->s_hash_unsigned = 3;
3757                 else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
3758 #ifdef __CHAR_UNSIGNED__
3759                         if (!(sb->s_flags & MS_RDONLY))
3760                                 es->s_flags |=
3761                                         cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
3762                         sbi->s_hash_unsigned = 3;
3763 #else
3764                         if (!(sb->s_flags & MS_RDONLY))
3765                                 es->s_flags |=
3766                                         cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
3767 #endif
3768                 }
3769         }
3770
3771         /* Handle clustersize */
3772         clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size);
3773         has_bigalloc = ext4_has_feature_bigalloc(sb);
3774         if (has_bigalloc) {
3775                 if (clustersize < blocksize) {
3776                         ext4_msg(sb, KERN_ERR,
3777                                  "cluster size (%d) smaller than "
3778                                  "block size (%d)", clustersize, blocksize);
3779                         goto failed_mount;
3780                 }
3781                 if (le32_to_cpu(es->s_log_cluster_size) >
3782                     (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
3783                         ext4_msg(sb, KERN_ERR,
3784                                  "Invalid log cluster size: %u",
3785                                  le32_to_cpu(es->s_log_cluster_size));
3786                         goto failed_mount;
3787                 }
3788                 sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) -
3789                         le32_to_cpu(es->s_log_block_size);
3790                 sbi->s_clusters_per_group =
3791                         le32_to_cpu(es->s_clusters_per_group);
3792                 if (sbi->s_clusters_per_group > blocksize * 8) {
3793                         ext4_msg(sb, KERN_ERR,
3794                                  "#clusters per group too big: %lu",
3795                                  sbi->s_clusters_per_group);
3796                         goto failed_mount;
3797                 }
3798                 if (sbi->s_blocks_per_group !=
3799                     (sbi->s_clusters_per_group * (clustersize / blocksize))) {
3800                         ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and "
3801                                  "clusters per group (%lu) inconsistent",
3802                                  sbi->s_blocks_per_group,
3803                                  sbi->s_clusters_per_group);
3804                         goto failed_mount;
3805                 }
3806         } else {
3807                 if (clustersize != blocksize) {
3808                         ext4_warning(sb, "fragment/cluster size (%d) != "
3809                                      "block size (%d)", clustersize,
3810                                      blocksize);
3811                         clustersize = blocksize;
3812                 }
3813                 if (sbi->s_blocks_per_group > blocksize * 8) {
3814                         ext4_msg(sb, KERN_ERR,
3815                                  "#blocks per group too big: %lu",
3816                                  sbi->s_blocks_per_group);
3817                         goto failed_mount;
3818                 }
3819                 sbi->s_clusters_per_group = sbi->s_blocks_per_group;
3820                 sbi->s_cluster_bits = 0;
3821         }
3822         sbi->s_cluster_ratio = clustersize / blocksize;
3823
3824         /* Do we have standard group size of clustersize * 8 blocks ? */
3825         if (sbi->s_blocks_per_group == clustersize << 3)
3826                 set_opt2(sb, STD_GROUP_SIZE);
3827
3828         /*
3829          * Test whether we have more sectors than will fit in sector_t,
3830          * and whether the max offset is addressable by the page cache.
3831          */
3832         err = generic_check_addressable(sb->s_blocksize_bits,
3833                                         ext4_blocks_count(es));
3834         if (err) {
3835                 ext4_msg(sb, KERN_ERR, "filesystem"
3836                          " too large to mount safely on this system");
3837                 if (sizeof(sector_t) < 8)
3838                         ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled");
3839                 goto failed_mount;
3840         }
3841
3842         if (EXT4_BLOCKS_PER_GROUP(sb) == 0)
3843                 goto cantfind_ext4;
3844
3845         /* check blocks count against device size */
3846         blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits;
3847         if (blocks_count && ext4_blocks_count(es) > blocks_count) {
3848                 ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu "
3849                        "exceeds size of device (%llu blocks)",
3850                        ext4_blocks_count(es), blocks_count);
3851                 goto failed_mount;
3852         }
3853
3854         /*
3855          * It makes no sense for the first data block to be beyond the end
3856          * of the filesystem.
3857          */
3858         if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
3859                 ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
3860                          "block %u is beyond end of filesystem (%llu)",
3861                          le32_to_cpu(es->s_first_data_block),
3862                          ext4_blocks_count(es));
3863                 goto failed_mount;
3864         }
3865         blocks_count = (ext4_blocks_count(es) -
3866                         le32_to_cpu(es->s_first_data_block) +
3867                         EXT4_BLOCKS_PER_GROUP(sb) - 1);
3868         do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
3869         if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
3870                 ext4_msg(sb, KERN_WARNING, "groups count too large: %u "
3871                        "(block count %llu, first data block %u, "
3872                        "blocks per group %lu)", sbi->s_groups_count,
3873                        ext4_blocks_count(es),
3874                        le32_to_cpu(es->s_first_data_block),
3875                        EXT4_BLOCKS_PER_GROUP(sb));
3876                 goto failed_mount;
3877         }
3878         sbi->s_groups_count = blocks_count;
3879         sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count,
3880                         (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
3881         db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
3882                    EXT4_DESC_PER_BLOCK(sb);
3883         if (ext4_has_feature_meta_bg(sb)) {
3884                 if (le32_to_cpu(es->s_first_meta_bg) > db_count) {
3885                         ext4_msg(sb, KERN_WARNING,
3886                                  "first meta block group too large: %u "
3887                                  "(group descriptor block count %u)",
3888                                  le32_to_cpu(es->s_first_meta_bg), db_count);
3889                         goto failed_mount;
3890                 }
3891         }
3892         sbi->s_group_desc = ext4_kvmalloc(db_count *
3893                                           sizeof(struct buffer_head *),
3894                                           GFP_KERNEL);
3895         if (sbi->s_group_desc == NULL) {
3896                 ext4_msg(sb, KERN_ERR, "not enough memory");
3897                 ret = -ENOMEM;
3898                 goto failed_mount;
3899         }
3900
3901         bgl_lock_init(sbi->s_blockgroup_lock);
3902
3903         for (i = 0; i < db_count; i++) {
3904                 block = descriptor_loc(sb, logical_sb_block, i);
3905                 sbi->s_group_desc[i] = sb_bread_unmovable(sb, block);
3906                 if (!sbi->s_group_desc[i]) {
3907                         ext4_msg(sb, KERN_ERR,
3908                                "can't read group descriptor %d", i);
3909                         db_count = i;
3910                         goto failed_mount2;
3911                 }
3912         }
3913         if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) {
3914                 ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
3915                 ret = -EFSCORRUPTED;
3916                 goto failed_mount2;
3917         }
3918
3919         sbi->s_gdb_count = db_count;
3920         get_random_bytes(&sbi->s_next_generation, sizeof(u32));
3921         spin_lock_init(&sbi->s_next_gen_lock);
3922
3923         setup_timer(&sbi->s_err_report, print_daily_error_info,
3924                 (unsigned long) sb);
3925
3926         /* Register extent status tree shrinker */
3927         if (ext4_es_register_shrinker(sbi))
3928                 goto failed_mount3;
3929
3930         sbi->s_stripe = ext4_get_stripe_size(sbi);
3931         sbi->s_extent_max_zeroout_kb = 32;
3932
3933         /*
3934          * set up enough so that it can read an inode
3935          */
3936         sb->s_op = &ext4_sops;
3937         sb->s_export_op = &ext4_export_ops;
3938         sb->s_xattr = ext4_xattr_handlers;
3939         sb->s_cop = &ext4_cryptops;
3940 #ifdef CONFIG_QUOTA
3941         sb->dq_op = &ext4_quota_operations;
3942         if (ext4_has_feature_quota(sb))
3943                 sb->s_qcop = &dquot_quotactl_sysfile_ops;
3944         else
3945                 sb->s_qcop = &ext4_qctl_operations;
3946         sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
3947 #endif
3948         memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
3949
3950         INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
3951         mutex_init(&sbi->s_orphan_lock);
3952
3953         sb->s_root = NULL;
3954
3955         needs_recovery = (es->s_last_orphan != 0 ||
3956                           ext4_has_feature_journal_needs_recovery(sb));
3957
3958         if (ext4_has_feature_mmp(sb) && !(sb->s_flags & MS_RDONLY))
3959                 if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block)))
3960                         goto failed_mount3a;
3961
3962         /*
3963          * The first inode we look at is the journal inode.  Don't try
3964          * root first: it may be modified in the journal!
3965          */
3966         if (!test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) {
3967                 err = ext4_load_journal(sb, es, journal_devnum);
3968                 if (err)
3969                         goto failed_mount3a;
3970         } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
3971                    ext4_has_feature_journal_needs_recovery(sb)) {
3972                 ext4_msg(sb, KERN_ERR, "required journal recovery "
3973                        "suppressed and not mounted read-only");
3974                 goto failed_mount_wq;
3975         } else {
3976                 /* Nojournal mode, all journal mount options are illegal */
3977                 if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) {
3978                         ext4_msg(sb, KERN_ERR, "can't mount with "
3979                                  "journal_checksum, fs mounted w/o journal");
3980                         goto failed_mount_wq;
3981                 }
3982                 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
3983                         ext4_msg(sb, KERN_ERR, "can't mount with "
3984                                  "journal_async_commit, fs mounted w/o journal");
3985                         goto failed_mount_wq;
3986                 }
3987                 if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
3988                         ext4_msg(sb, KERN_ERR, "can't mount with "
3989                                  "commit=%lu, fs mounted w/o journal",
3990                                  sbi->s_commit_interval / HZ);
3991                         goto failed_mount_wq;
3992                 }
3993                 if (EXT4_MOUNT_DATA_FLAGS &
3994                     (sbi->s_mount_opt ^ sbi->s_def_mount_opt)) {
3995                         ext4_msg(sb, KERN_ERR, "can't mount with "
3996                                  "data=, fs mounted w/o journal");
3997                         goto failed_mount_wq;
3998                 }
3999                 sbi->s_def_mount_opt &= EXT4_MOUNT_JOURNAL_CHECKSUM;
4000                 clear_opt(sb, JOURNAL_CHECKSUM);
4001                 clear_opt(sb, DATA_FLAGS);
4002                 sbi->s_journal = NULL;
4003                 needs_recovery = 0;
4004                 goto no_journal;
4005         }
4006
4007         if (ext4_has_feature_64bit(sb) &&
4008             !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
4009                                        JBD2_FEATURE_INCOMPAT_64BIT)) {
4010                 ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
4011                 goto failed_mount_wq;
4012         }
4013
4014         if (!set_journal_csum_feature_set(sb)) {
4015                 ext4_msg(sb, KERN_ERR, "Failed to set journal checksum "
4016                          "feature set");
4017                 goto failed_mount_wq;
4018         }
4019
4020         /* We have now updated the journal if required, so we can
4021          * validate the data journaling mode. */
4022         switch (test_opt(sb, DATA_FLAGS)) {
4023         case 0:
4024                 /* No mode set, assume a default based on the journal
4025                  * capabilities: ORDERED_DATA if the journal can
4026                  * cope, else JOURNAL_DATA
4027                  */
4028                 if (jbd2_journal_check_available_features
4029                     (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE))
4030                         set_opt(sb, ORDERED_DATA);
4031                 else
4032                         set_opt(sb, JOURNAL_DATA);
4033                 break;
4034
4035         case EXT4_MOUNT_ORDERED_DATA:
4036         case EXT4_MOUNT_WRITEBACK_DATA:
4037                 if (!jbd2_journal_check_available_features
4038                     (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
4039                         ext4_msg(sb, KERN_ERR, "Journal does not support "
4040                                "requested data journaling mode");
4041                         goto failed_mount_wq;
4042                 }
4043         default:
4044                 break;
4045         }
4046
4047         if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA &&
4048             test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
4049                 ext4_msg(sb, KERN_ERR, "can't mount with "
4050                         "journal_async_commit in data=ordered mode");
4051                 goto failed_mount_wq;
4052         }
4053
4054         set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
4055
4056         sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
4057
4058 no_journal:
4059         sbi->s_mb_cache = ext4_xattr_create_cache();
4060         if (!sbi->s_mb_cache) {
4061                 ext4_msg(sb, KERN_ERR, "Failed to create an mb_cache");
4062                 goto failed_mount_wq;
4063         }
4064
4065         if ((DUMMY_ENCRYPTION_ENABLED(sbi) || ext4_has_feature_encrypt(sb)) &&
4066             (blocksize != PAGE_SIZE)) {
4067                 ext4_msg(sb, KERN_ERR,
4068                          "Unsupported blocksize for fs encryption");
4069                 goto failed_mount_wq;
4070         }
4071
4072         if (DUMMY_ENCRYPTION_ENABLED(sbi) && !(sb->s_flags & MS_RDONLY) &&
4073             !ext4_has_feature_encrypt(sb)) {
4074                 ext4_set_feature_encrypt(sb);
4075                 ext4_commit_super(sb, 1);
4076         }
4077
4078         /*
4079          * Get the # of file system overhead blocks from the
4080          * superblock if present.
4081          */
4082         if (es->s_overhead_clusters)
4083                 sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
4084         else {
4085                 err = ext4_calculate_overhead(sb);
4086                 if (err)
4087                         goto failed_mount_wq;
4088         }
4089
4090         /*
4091          * The maximum number of concurrent works can be high and
4092          * concurrency isn't really necessary.  Limit it to 1.
4093          */
4094         EXT4_SB(sb)->rsv_conversion_wq =
4095                 alloc_workqueue("ext4-rsv-conversion", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
4096         if (!EXT4_SB(sb)->rsv_conversion_wq) {
4097                 printk(KERN_ERR "EXT4-fs: failed to create workqueue\n");
4098                 ret = -ENOMEM;
4099                 goto failed_mount4;
4100         }
4101
4102         /*
4103          * The jbd2_journal_load will have done any necessary log recovery,
4104          * so we can safely mount the rest of the filesystem now.
4105          */
4106
4107         root = ext4_iget(sb, EXT4_ROOT_INO);
4108         if (IS_ERR(root)) {
4109                 ext4_msg(sb, KERN_ERR, "get root inode failed");
4110                 ret = PTR_ERR(root);
4111                 root = NULL;
4112                 goto failed_mount4;
4113         }
4114         if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
4115                 ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
4116                 iput(root);
4117                 goto failed_mount4;
4118         }
4119         sb->s_root = d_make_root(root);
4120         if (!sb->s_root) {
4121                 ext4_msg(sb, KERN_ERR, "get root dentry failed");
4122                 ret = -ENOMEM;
4123                 goto failed_mount4;
4124         }
4125
4126         if (ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY))
4127                 sb->s_flags |= MS_RDONLY;
4128
4129         /* determine the minimum size of new large inodes, if present */
4130         if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
4131                 sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
4132                                                      EXT4_GOOD_OLD_INODE_SIZE;
4133                 if (ext4_has_feature_extra_isize(sb)) {
4134                         if (sbi->s_want_extra_isize <
4135                             le16_to_cpu(es->s_want_extra_isize))
4136                                 sbi->s_want_extra_isize =
4137                                         le16_to_cpu(es->s_want_extra_isize);
4138                         if (sbi->s_want_extra_isize <
4139                             le16_to_cpu(es->s_min_extra_isize))
4140                                 sbi->s_want_extra_isize =
4141                                         le16_to_cpu(es->s_min_extra_isize);
4142                 }
4143         }
4144         /* Check if enough inode space is available */
4145         if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize >
4146                                                         sbi->s_inode_size) {
4147                 sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
4148                                                        EXT4_GOOD_OLD_INODE_SIZE;
4149                 ext4_msg(sb, KERN_INFO, "required extra inode space not"
4150                          "available");
4151         }
4152
4153         ext4_set_resv_clusters(sb);
4154
4155         err = ext4_setup_system_zone(sb);
4156         if (err) {
4157                 ext4_msg(sb, KERN_ERR, "failed to initialize system "
4158                          "zone (%d)", err);
4159                 goto failed_mount4a;
4160         }
4161
4162         ext4_ext_init(sb);
4163         err = ext4_mb_init(sb);
4164         if (err) {
4165                 ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
4166                          err);
4167                 goto failed_mount5;
4168         }
4169
4170         block = ext4_count_free_clusters(sb);
4171         ext4_free_blocks_count_set(sbi->s_es,
4172                                    EXT4_C2B(sbi, block));
4173         err = percpu_counter_init(&sbi->s_freeclusters_counter, block,
4174                                   GFP_KERNEL);
4175         if (!err) {
4176                 unsigned long freei = ext4_count_free_inodes(sb);
4177                 sbi->s_es->s_free_inodes_count = cpu_to_le32(freei);
4178                 err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
4179                                           GFP_KERNEL);
4180         }
4181         if (!err)
4182                 err = percpu_counter_init(&sbi->s_dirs_counter,
4183                                           ext4_count_dirs(sb), GFP_KERNEL);
4184         if (!err)
4185                 err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0,
4186                                           GFP_KERNEL);
4187         if (!err)
4188                 err = percpu_init_rwsem(&sbi->s_journal_flag_rwsem);
4189
4190         if (err) {
4191                 ext4_msg(sb, KERN_ERR, "insufficient memory");
4192                 goto failed_mount6;
4193         }
4194
4195         if (ext4_has_feature_flex_bg(sb))
4196                 if (!ext4_fill_flex_info(sb)) {
4197                         ext4_msg(sb, KERN_ERR,
4198                                "unable to initialize "
4199                                "flex_bg meta info!");
4200                         goto failed_mount6;
4201                 }
4202
4203         err = ext4_register_li_request(sb, first_not_zeroed);
4204         if (err)
4205                 goto failed_mount6;
4206
4207         err = ext4_register_sysfs(sb);
4208         if (err)
4209                 goto failed_mount7;
4210
4211 #ifdef CONFIG_QUOTA
4212         /* Enable quota usage during mount. */
4213         if (ext4_has_feature_quota(sb) && !(sb->s_flags & MS_RDONLY)) {
4214                 err = ext4_enable_quotas(sb);
4215                 if (err)
4216                         goto failed_mount8;
4217         }
4218 #endif  /* CONFIG_QUOTA */
4219
4220         EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
4221         ext4_orphan_cleanup(sb, es);
4222         EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
4223         if (needs_recovery) {
4224                 ext4_msg(sb, KERN_INFO, "recovery complete");
4225                 ext4_mark_recovery_complete(sb, es);
4226         }
4227         if (EXT4_SB(sb)->s_journal) {
4228                 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
4229                         descr = " journalled data mode";
4230                 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
4231                         descr = " ordered data mode";
4232                 else
4233                         descr = " writeback data mode";
4234         } else
4235                 descr = "out journal";
4236
4237         if (test_opt(sb, DISCARD)) {
4238                 struct request_queue *q = bdev_get_queue(sb->s_bdev);
4239                 if (!blk_queue_discard(q))
4240                         ext4_msg(sb, KERN_WARNING,
4241                                  "mounting with \"discard\" option, but "
4242                                  "the device does not support discard");
4243         }
4244
4245         if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount"))
4246                 ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
4247                          "Opts: %.*s%s%s", descr,
4248                          (int) sizeof(sbi->s_es->s_mount_opts),
4249                          sbi->s_es->s_mount_opts,
4250                          *sbi->s_es->s_mount_opts ? "; " : "", orig_data);
4251
4252         if (es->s_error_count)
4253                 mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
4254
4255         /* Enable message ratelimiting. Default is 10 messages per 5 secs. */
4256         ratelimit_state_init(&sbi->s_err_ratelimit_state, 5 * HZ, 10);
4257         ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10);
4258         ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10);
4259
4260         kfree(orig_data);
4261 #ifdef CONFIG_EXT4_FS_ENCRYPTION
4262         memcpy(sbi->key_prefix, EXT4_KEY_DESC_PREFIX,
4263                                 EXT4_KEY_DESC_PREFIX_SIZE);
4264         sbi->key_prefix_size = EXT4_KEY_DESC_PREFIX_SIZE;
4265 #endif
4266         return 0;
4267
4268 cantfind_ext4:
4269         if (!silent)
4270                 ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
4271         goto failed_mount;
4272
4273 #ifdef CONFIG_QUOTA
4274 failed_mount8:
4275         ext4_unregister_sysfs(sb);
4276 #endif
4277 failed_mount7:
4278         ext4_unregister_li_request(sb);
4279 failed_mount6:
4280         ext4_mb_release(sb);
4281         if (sbi->s_flex_groups)
4282                 kvfree(sbi->s_flex_groups);
4283         percpu_counter_destroy(&sbi->s_freeclusters_counter);
4284         percpu_counter_destroy(&sbi->s_freeinodes_counter);
4285         percpu_counter_destroy(&sbi->s_dirs_counter);
4286         percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
4287 failed_mount5:
4288         ext4_ext_release(sb);
4289         ext4_release_system_zone(sb);
4290 failed_mount4a:
4291         dput(sb->s_root);
4292         sb->s_root = NULL;
4293 failed_mount4:
4294         ext4_msg(sb, KERN_ERR, "mount failed");
4295         if (EXT4_SB(sb)->rsv_conversion_wq)
4296                 destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
4297 failed_mount_wq:
4298         if (sbi->s_mb_cache) {
4299                 ext4_xattr_destroy_cache(sbi->s_mb_cache);
4300                 sbi->s_mb_cache = NULL;
4301         }
4302         if (sbi->s_journal) {
4303                 jbd2_journal_destroy(sbi->s_journal);
4304                 sbi->s_journal = NULL;
4305         }
4306 failed_mount3a:
4307         ext4_es_unregister_shrinker(sbi);
4308 failed_mount3:
4309         del_timer_sync(&sbi->s_err_report);
4310         if (sbi->s_mmp_tsk)
4311                 kthread_stop(sbi->s_mmp_tsk);
4312 failed_mount2:
4313         for (i = 0; i < db_count; i++)
4314                 brelse(sbi->s_group_desc[i]);
4315         kvfree(sbi->s_group_desc);
4316 failed_mount:
4317         if (sbi->s_chksum_driver)
4318                 crypto_free_shash(sbi->s_chksum_driver);
4319 #ifdef CONFIG_QUOTA
4320         for (i = 0; i < EXT4_MAXQUOTAS; i++)
4321                 kfree(sbi->s_qf_names[i]);
4322 #endif
4323         ext4_blkdev_remove(sbi);
4324         brelse(bh);
4325 out_fail:
4326         /* sb->s_user_ns will be put when sb is destroyed */
4327         sb->s_fs_info = NULL;
4328         kfree(sbi->s_blockgroup_lock);
4329 out_free_base:
4330         kfree(sbi);
4331         kfree(orig_data);
4332         return err ? err : ret;
4333 }
4334
4335 /*
4336  * Setup any per-fs journal parameters now.  We'll do this both on
4337  * initial mount, once the journal has been initialised but before we've
4338  * done any recovery; and again on any subsequent remount.
4339  */
4340 static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
4341 {
4342         struct ext4_sb_info *sbi = EXT4_SB(sb);
4343
4344         journal->j_commit_interval = sbi->s_commit_interval;
4345         journal->j_min_batch_time = sbi->s_min_batch_time;
4346         journal->j_max_batch_time = sbi->s_max_batch_time;
4347
4348         write_lock(&journal->j_state_lock);
4349         if (test_opt(sb, BARRIER))
4350                 journal->j_flags |= JBD2_BARRIER;
4351         else
4352                 journal->j_flags &= ~JBD2_BARRIER;
4353         if (test_opt(sb, DATA_ERR_ABORT))
4354                 journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
4355         else
4356                 journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
4357         write_unlock(&journal->j_state_lock);
4358 }
4359
4360 static struct inode *ext4_get_journal_inode(struct super_block *sb,
4361                                              unsigned int journal_inum)
4362 {
4363         struct inode *journal_inode;
4364
4365         /*
4366          * Test for the existence of a valid inode on disk.  Bad things
4367          * happen if we iget() an unused inode, as the subsequent iput()
4368          * will try to delete it.
4369          */
4370         journal_inode = ext4_iget(sb, journal_inum);
4371         if (IS_ERR(journal_inode)) {
4372                 ext4_msg(sb, KERN_ERR, "no journal found");
4373                 return NULL;
4374         }
4375         if (!journal_inode->i_nlink) {
4376                 make_bad_inode(journal_inode);
4377                 iput(journal_inode);
4378                 ext4_msg(sb, KERN_ERR, "journal inode is deleted");
4379                 return NULL;
4380         }
4381
4382         jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
4383                   journal_inode, journal_inode->i_size);
4384         if (!S_ISREG(journal_inode->i_mode)) {
4385                 ext4_msg(sb, KERN_ERR, "invalid journal inode");
4386                 iput(journal_inode);
4387                 return NULL;
4388         }
4389         return journal_inode;
4390 }
4391
4392 static journal_t *ext4_get_journal(struct super_block *sb,
4393                                    unsigned int journal_inum)
4394 {
4395         struct inode *journal_inode;
4396         journal_t *journal;
4397
4398         BUG_ON(!ext4_has_feature_journal(sb));
4399
4400         journal_inode = ext4_get_journal_inode(sb, journal_inum);
4401         if (!journal_inode)
4402                 return NULL;
4403
4404         journal = jbd2_journal_init_inode(journal_inode);
4405         if (!journal) {
4406                 ext4_msg(sb, KERN_ERR, "Could not load journal inode");
4407                 iput(journal_inode);
4408                 return NULL;
4409         }
4410         journal->j_private = sb;
4411         ext4_init_journal_params(sb, journal);
4412         return journal;
4413 }
4414
4415 static journal_t *ext4_get_dev_journal(struct super_block *sb,
4416                                        dev_t j_dev)
4417 {
4418         struct buffer_head *bh;
4419         journal_t *journal;
4420         ext4_fsblk_t start;
4421         ext4_fsblk_t len;
4422         int hblock, blocksize;
4423         ext4_fsblk_t sb_block;
4424         unsigned long offset;
4425         struct ext4_super_block *es;
4426         struct block_device *bdev;
4427
4428         BUG_ON(!ext4_has_feature_journal(sb));
4429
4430         bdev = ext4_blkdev_get(j_dev, sb);
4431         if (bdev == NULL)
4432                 return NULL;
4433
4434         blocksize = sb->s_blocksize;
4435         hblock = bdev_logical_block_size(bdev);
4436         if (blocksize < hblock) {
4437                 ext4_msg(sb, KERN_ERR,
4438                         "blocksize too small for journal device");
4439                 goto out_bdev;
4440         }
4441
4442         sb_block = EXT4_MIN_BLOCK_SIZE / blocksize;
4443         offset = EXT4_MIN_BLOCK_SIZE % blocksize;
4444         set_blocksize(bdev, blocksize);
4445         if (!(bh = __bread(bdev, sb_block, blocksize))) {
4446                 ext4_msg(sb, KERN_ERR, "couldn't read superblock of "
4447                        "external journal");
4448                 goto out_bdev;
4449         }
4450
4451         es = (struct ext4_super_block *) (bh->b_data + offset);
4452         if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
4453             !(le32_to_cpu(es->s_feature_incompat) &
4454               EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
4455                 ext4_msg(sb, KERN_ERR, "external journal has "
4456                                         "bad superblock");
4457                 brelse(bh);
4458                 goto out_bdev;
4459         }
4460
4461         if ((le32_to_cpu(es->s_feature_ro_compat) &
4462              EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
4463             es->s_checksum != ext4_superblock_csum(sb, es)) {
4464                 ext4_msg(sb, KERN_ERR, "external journal has "
4465                                        "corrupt superblock");
4466                 brelse(bh);
4467                 goto out_bdev;
4468         }
4469
4470         if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
4471                 ext4_msg(sb, KERN_ERR, "journal UUID does not match");
4472                 brelse(bh);
4473                 goto out_bdev;
4474         }
4475
4476         len = ext4_blocks_count(es);
4477         start = sb_block + 1;
4478         brelse(bh);     /* we're done with the superblock */
4479
4480         journal = jbd2_journal_init_dev(bdev, sb->s_bdev,
4481                                         start, len, blocksize);
4482         if (!journal) {
4483                 ext4_msg(sb, KERN_ERR, "failed to create device journal");
4484                 goto out_bdev;
4485         }
4486         journal->j_private = sb;
4487         ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1, &journal->j_sb_buffer);
4488         wait_on_buffer(journal->j_sb_buffer);
4489         if (!buffer_uptodate(journal->j_sb_buffer)) {
4490                 ext4_msg(sb, KERN_ERR, "I/O error on journal device");
4491                 goto out_journal;
4492         }
4493         if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
4494                 ext4_msg(sb, KERN_ERR, "External journal has more than one "
4495                                         "user (unsupported) - %d",
4496                         be32_to_cpu(journal->j_superblock->s_nr_users));
4497                 goto out_journal;
4498         }
4499         EXT4_SB(sb)->journal_bdev = bdev;
4500         ext4_init_journal_params(sb, journal);
4501         return journal;
4502
4503 out_journal:
4504         jbd2_journal_destroy(journal);
4505 out_bdev:
4506         ext4_blkdev_put(bdev);
4507         return NULL;
4508 }
4509
4510 static int ext4_load_journal(struct super_block *sb,
4511                              struct ext4_super_block *es,
4512                              unsigned long journal_devnum)
4513 {
4514         journal_t *journal;
4515         unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
4516         dev_t journal_dev;
4517         int err = 0;
4518         int really_read_only;
4519
4520         BUG_ON(!ext4_has_feature_journal(sb));
4521
4522         if (journal_devnum &&
4523             journal_devnum != le32_to_cpu(es->s_journal_dev)) {
4524                 ext4_msg(sb, KERN_INFO, "external journal device major/minor "
4525                         "numbers have changed");
4526                 journal_dev = new_decode_dev(journal_devnum);
4527         } else
4528                 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
4529
4530         really_read_only = bdev_read_only(sb->s_bdev);
4531
4532         /*
4533          * Are we loading a blank journal or performing recovery after a
4534          * crash?  For recovery, we need to check in advance whether we
4535          * can get read-write access to the device.
4536          */
4537         if (ext4_has_feature_journal_needs_recovery(sb)) {
4538                 if (sb->s_flags & MS_RDONLY) {
4539                         ext4_msg(sb, KERN_INFO, "INFO: recovery "
4540                                         "required on readonly filesystem");
4541                         if (really_read_only) {
4542                                 ext4_msg(sb, KERN_ERR, "write access "
4543                                         "unavailable, cannot proceed");
4544                                 return -EROFS;
4545                         }
4546                         ext4_msg(sb, KERN_INFO, "write access will "
4547                                "be enabled during recovery");
4548                 }
4549         }
4550
4551         if (journal_inum && journal_dev) {
4552                 ext4_msg(sb, KERN_ERR, "filesystem has both journal "
4553                        "and inode journals!");
4554                 return -EINVAL;
4555         }
4556
4557         if (journal_inum) {
4558                 if (!(journal = ext4_get_journal(sb, journal_inum)))
4559                         return -EINVAL;
4560         } else {
4561                 if (!(journal = ext4_get_dev_journal(sb, journal_dev)))
4562                         return -EINVAL;
4563         }
4564
4565         if (!(journal->j_flags & JBD2_BARRIER))
4566                 ext4_msg(sb, KERN_INFO, "barriers disabled");
4567
4568         if (!ext4_has_feature_journal_needs_recovery(sb))
4569                 err = jbd2_journal_wipe(journal, !really_read_only);
4570         if (!err) {
4571                 char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL);
4572                 if (save)
4573                         memcpy(save, ((char *) es) +
4574                                EXT4_S_ERR_START, EXT4_S_ERR_LEN);
4575                 err = jbd2_journal_load(journal);
4576                 if (save)
4577                         memcpy(((char *) es) + EXT4_S_ERR_START,
4578                                save, EXT4_S_ERR_LEN);
4579                 kfree(save);
4580         }
4581
4582         if (err) {
4583                 ext4_msg(sb, KERN_ERR, "error loading journal");
4584                 jbd2_journal_destroy(journal);
4585                 return err;
4586         }
4587
4588         EXT4_SB(sb)->s_journal = journal;
4589         ext4_clear_journal_err(sb, es);
4590
4591         if (!really_read_only && journal_devnum &&
4592             journal_devnum != le32_to_cpu(es->s_journal_dev)) {
4593                 es->s_journal_dev = cpu_to_le32(journal_devnum);
4594
4595                 /* Make sure we flush the recovery flag to disk. */
4596                 ext4_commit_super(sb, 1);
4597         }
4598
4599         return 0;
4600 }
4601
4602 static int ext4_commit_super(struct super_block *sb, int sync)
4603 {
4604         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
4605         struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
4606         int error = 0;
4607
4608         if (!sbh || block_device_ejected(sb))
4609                 return error;
4610         /*
4611          * If the file system is mounted read-only, don't update the
4612          * superblock write time.  This avoids updating the superblock
4613          * write time when we are mounting the root file system
4614          * read/only but we need to replay the journal; at that point,
4615          * for people who are east of GMT and who make their clock
4616          * tick in localtime for Windows bug-for-bug compatibility,
4617          * the clock is set in the future, and this will cause e2fsck
4618          * to complain and force a full file system check.
4619          */
4620         if (!(sb->s_flags & MS_RDONLY))
4621                 es->s_wtime = cpu_to_le32(get_seconds());
4622         if (sb->s_bdev->bd_part)
4623                 es->s_kbytes_written =
4624                         cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
4625                             ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
4626                               EXT4_SB(sb)->s_sectors_written_start) >> 1));
4627         else
4628                 es->s_kbytes_written =
4629                         cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
4630         if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeclusters_counter))
4631                 ext4_free_blocks_count_set(es,
4632                         EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive(
4633                                 &EXT4_SB(sb)->s_freeclusters_counter)));
4634         if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter))
4635                 es->s_free_inodes_count =
4636                         cpu_to_le32(percpu_counter_sum_positive(
4637                                 &EXT4_SB(sb)->s_freeinodes_counter));
4638         BUFFER_TRACE(sbh, "marking dirty");
4639         ext4_superblock_csum_set(sb);
4640         if (sync)
4641                 lock_buffer(sbh);
4642         if (buffer_write_io_error(sbh)) {
4643                 /*
4644                  * Oh, dear.  A previous attempt to write the
4645                  * superblock failed.  This could happen because the
4646                  * USB device was yanked out.  Or it could happen to
4647                  * be a transient write error and maybe the block will
4648                  * be remapped.  Nothing we can do but to retry the
4649                  * write and hope for the best.
4650                  */
4651                 ext4_msg(sb, KERN_ERR, "previous I/O error to "
4652                        "superblock detected");
4653                 clear_buffer_write_io_error(sbh);
4654                 set_buffer_uptodate(sbh);
4655         }
4656         mark_buffer_dirty(sbh);
4657         if (sync) {
4658                 unlock_buffer(sbh);
4659                 error = __sync_dirty_buffer(sbh,
4660                         test_opt(sb, BARRIER) ? REQ_FUA : REQ_SYNC);
4661                 if (error)
4662                         return error;
4663
4664                 error = buffer_write_io_error(sbh);
4665                 if (error) {
4666                         ext4_msg(sb, KERN_ERR, "I/O error while writing "
4667                                "superblock");
4668                         clear_buffer_write_io_error(sbh);
4669                         set_buffer_uptodate(sbh);
4670                 }
4671         }
4672         return error;
4673 }
4674
4675 /*
4676  * Have we just finished recovery?  If so, and if we are mounting (or
4677  * remounting) the filesystem readonly, then we will end up with a
4678  * consistent fs on disk.  Record that fact.
4679  */
4680 static void ext4_mark_recovery_complete(struct super_block *sb,
4681                                         struct ext4_super_block *es)
4682 {
4683         journal_t *journal = EXT4_SB(sb)->s_journal;
4684
4685         if (!ext4_has_feature_journal(sb)) {
4686                 BUG_ON(journal != NULL);
4687                 return;
4688         }
4689         jbd2_journal_lock_updates(journal);
4690         if (jbd2_journal_flush(journal) < 0)
4691                 goto out;
4692
4693         if (ext4_has_feature_journal_needs_recovery(sb) &&
4694             sb->s_flags & MS_RDONLY) {
4695                 ext4_clear_feature_journal_needs_recovery(sb);
4696                 ext4_commit_super(sb, 1);
4697         }
4698
4699 out:
4700         jbd2_journal_unlock_updates(journal);
4701 }
4702
4703 /*
4704  * If we are mounting (or read-write remounting) a filesystem whose journal
4705  * has recorded an error from a previous lifetime, move that error to the
4706  * main filesystem now.
4707  */
4708 static void ext4_clear_journal_err(struct super_block *sb,
4709                                    struct ext4_super_block *es)
4710 {
4711         journal_t *journal;
4712         int j_errno;
4713         const char *errstr;
4714
4715         BUG_ON(!ext4_has_feature_journal(sb));
4716
4717         journal = EXT4_SB(sb)->s_journal;
4718
4719         /*
4720          * Now check for any error status which may have been recorded in the
4721          * journal by a prior ext4_error() or ext4_abort()
4722          */
4723
4724         j_errno = jbd2_journal_errno(journal);
4725         if (j_errno) {
4726                 char nbuf[16];
4727
4728                 errstr = ext4_decode_error(sb, j_errno, nbuf);
4729                 ext4_warning(sb, "Filesystem error recorded "
4730                              "from previous mount: %s", errstr);
4731                 ext4_warning(sb, "Marking fs in need of filesystem check.");
4732
4733                 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
4734                 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
4735                 ext4_commit_super(sb, 1);
4736
4737                 jbd2_journal_clear_err(journal);
4738                 jbd2_journal_update_sb_errno(journal);
4739         }
4740 }
4741
4742 /*
4743  * Force the running and committing transactions to commit,
4744  * and wait on the commit.
4745  */
4746 int ext4_force_commit(struct super_block *sb)
4747 {
4748         journal_t *journal;
4749
4750         if (sb->s_flags & MS_RDONLY)
4751                 return 0;
4752
4753         journal = EXT4_SB(sb)->s_journal;
4754         return ext4_journal_force_commit(journal);
4755 }
4756
4757 static int ext4_sync_fs(struct super_block *sb, int wait)
4758 {
4759         int ret = 0;
4760         tid_t target;
4761         bool needs_barrier = false;
4762         struct ext4_sb_info *sbi = EXT4_SB(sb);
4763
4764         trace_ext4_sync_fs(sb, wait);
4765         flush_workqueue(sbi->rsv_conversion_wq);
4766         /*
4767          * Writeback quota in non-journalled quota case - journalled quota has
4768          * no dirty dquots
4769          */
4770         dquot_writeback_dquots(sb, -1);
4771         /*
4772          * Data writeback is possible w/o journal transaction, so barrier must
4773          * being sent at the end of the function. But we can skip it if
4774          * transaction_commit will do it for us.
4775          */
4776         if (sbi->s_journal) {
4777                 target = jbd2_get_latest_transaction(sbi->s_journal);
4778                 if (wait && sbi->s_journal->j_flags & JBD2_BARRIER &&
4779                     !jbd2_trans_will_send_data_barrier(sbi->s_journal, target))
4780                         needs_barrier = true;
4781
4782                 if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
4783                         if (wait)
4784                                 ret = jbd2_log_wait_commit(sbi->s_journal,
4785                                                            target);
4786                 }
4787         } else if (wait && test_opt(sb, BARRIER))
4788                 needs_barrier = true;
4789         if (needs_barrier) {
4790                 int err;
4791                 err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL);
4792                 if (!ret)
4793                         ret = err;
4794         }
4795
4796         return ret;
4797 }
4798
4799 /*
4800  * LVM calls this function before a (read-only) snapshot is created.  This
4801  * gives us a chance to flush the journal completely and mark the fs clean.
4802  *
4803  * Note that only this function cannot bring a filesystem to be in a clean
4804  * state independently. It relies on upper layer to stop all data & metadata
4805  * modifications.
4806  */
4807 static int ext4_freeze(struct super_block *sb)
4808 {
4809         int error = 0;
4810         journal_t *journal;
4811
4812         if (sb->s_flags & MS_RDONLY)
4813                 return 0;
4814
4815         journal = EXT4_SB(sb)->s_journal;
4816
4817         if (journal) {
4818                 /* Now we set up the journal barrier. */
4819                 jbd2_journal_lock_updates(journal);
4820
4821                 /*
4822                  * Don't clear the needs_recovery flag if we failed to
4823                  * flush the journal.
4824                  */
4825                 error = jbd2_journal_flush(journal);
4826                 if (error < 0)
4827                         goto out;
4828
4829                 /* Journal blocked and flushed, clear needs_recovery flag. */
4830                 ext4_clear_feature_journal_needs_recovery(sb);
4831         }
4832
4833         error = ext4_commit_super(sb, 1);
4834 out:
4835         if (journal)
4836                 /* we rely on upper layer to stop further updates */
4837                 jbd2_journal_unlock_updates(journal);
4838         return error;
4839 }
4840
4841 /*
4842  * Called by LVM after the snapshot is done.  We need to reset the RECOVER
4843  * flag here, even though the filesystem is not technically dirty yet.
4844  */
4845 static int ext4_unfreeze(struct super_block *sb)
4846 {
4847         if (sb->s_flags & MS_RDONLY)
4848                 return 0;
4849
4850         if (EXT4_SB(sb)->s_journal) {
4851                 /* Reset the needs_recovery flag before the fs is unlocked. */
4852                 ext4_set_feature_journal_needs_recovery(sb);
4853         }
4854
4855         ext4_commit_super(sb, 1);
4856         return 0;
4857 }
4858
4859 /*
4860  * Structure to save mount options for ext4_remount's benefit
4861  */
4862 struct ext4_mount_options {
4863         unsigned long s_mount_opt;
4864         unsigned long s_mount_opt2;
4865         kuid_t s_resuid;
4866         kgid_t s_resgid;
4867         unsigned long s_commit_interval;
4868         u32 s_min_batch_time, s_max_batch_time;
4869 #ifdef CONFIG_QUOTA
4870         int s_jquota_fmt;
4871         char *s_qf_names[EXT4_MAXQUOTAS];
4872 #endif
4873 };
4874
4875 static int ext4_remount(struct super_block *sb, int *flags, char *data)
4876 {
4877         struct ext4_super_block *es;
4878         struct ext4_sb_info *sbi = EXT4_SB(sb);
4879         unsigned long old_sb_flags;
4880         struct ext4_mount_options old_opts;
4881         int enable_quota = 0;
4882         ext4_group_t g;
4883         unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
4884         int err = 0;
4885 #ifdef CONFIG_QUOTA
4886         int i, j;
4887 #endif
4888         char *orig_data = kstrdup(data, GFP_KERNEL);
4889
4890         /* Store the original options */
4891         old_sb_flags = sb->s_flags;
4892         old_opts.s_mount_opt = sbi->s_mount_opt;
4893         old_opts.s_mount_opt2 = sbi->s_mount_opt2;
4894         old_opts.s_resuid = sbi->s_resuid;
4895         old_opts.s_resgid = sbi->s_resgid;
4896         old_opts.s_commit_interval = sbi->s_commit_interval;
4897         old_opts.s_min_batch_time = sbi->s_min_batch_time;
4898         old_opts.s_max_batch_time = sbi->s_max_batch_time;
4899 #ifdef CONFIG_QUOTA
4900         old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
4901         for (i = 0; i < EXT4_MAXQUOTAS; i++)
4902                 if (sbi->s_qf_names[i]) {
4903                         old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i],
4904                                                          GFP_KERNEL);
4905                         if (!old_opts.s_qf_names[i]) {
4906                                 for (j = 0; j < i; j++)
4907                                         kfree(old_opts.s_qf_names[j]);
4908                                 kfree(orig_data);
4909                                 return -ENOMEM;
4910                         }
4911                 } else
4912                         old_opts.s_qf_names[i] = NULL;
4913 #endif
4914         if (sbi->s_journal && sbi->s_journal->j_task->io_context)
4915                 journal_ioprio = sbi->s_journal->j_task->io_context->ioprio;
4916
4917         if (!parse_options(data, sb, NULL, &journal_ioprio, 1)) {
4918                 err = -EINVAL;
4919                 goto restore_opts;
4920         }
4921
4922         if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
4923             test_opt(sb, JOURNAL_CHECKSUM)) {
4924                 ext4_msg(sb, KERN_ERR, "changing journal_checksum "
4925                          "during remount not supported; ignoring");
4926                 sbi->s_mount_opt ^= EXT4_MOUNT_JOURNAL_CHECKSUM;
4927         }
4928
4929         if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
4930                 if (test_opt2(sb, EXPLICIT_DELALLOC)) {
4931                         ext4_msg(sb, KERN_ERR, "can't mount with "
4932                                  "both data=journal and delalloc");
4933                         err = -EINVAL;
4934                         goto restore_opts;
4935                 }
4936                 if (test_opt(sb, DIOREAD_NOLOCK)) {
4937                         ext4_msg(sb, KERN_ERR, "can't mount with "
4938                                  "both data=journal and dioread_nolock");
4939                         err = -EINVAL;
4940                         goto restore_opts;
4941                 }
4942                 if (test_opt(sb, DAX)) {
4943                         ext4_msg(sb, KERN_ERR, "can't mount with "
4944                                  "both data=journal and dax");
4945                         err = -EINVAL;
4946                         goto restore_opts;
4947                 }
4948         } else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) {
4949                 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
4950                         ext4_msg(sb, KERN_ERR, "can't mount with "
4951                                 "journal_async_commit in data=ordered mode");
4952                         err = -EINVAL;
4953                         goto restore_opts;
4954                 }
4955         }
4956
4957         if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX) {
4958                 ext4_msg(sb, KERN_WARNING, "warning: refusing change of "
4959                         "dax flag with busy inodes while remounting");
4960                 sbi->s_mount_opt ^= EXT4_MOUNT_DAX;
4961         }
4962
4963         if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
4964                 ext4_abort(sb, "Abort forced by user");
4965
4966         sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
4967                 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
4968
4969         es = sbi->s_es;
4970
4971         if (sbi->s_journal) {
4972                 ext4_init_journal_params(sb, sbi->s_journal);
4973                 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
4974         }
4975
4976         if (*flags & MS_LAZYTIME)
4977                 sb->s_flags |= MS_LAZYTIME;
4978
4979         if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
4980                 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {
4981                         err = -EROFS;
4982                         goto restore_opts;
4983                 }
4984
4985                 if (*flags & MS_RDONLY) {
4986                         err = sync_filesystem(sb);
4987                         if (err < 0)
4988                                 goto restore_opts;
4989                         err = dquot_suspend(sb, -1);
4990                         if (err < 0)
4991                                 goto restore_opts;
4992
4993                         /*
4994                          * First of all, the unconditional stuff we have to do
4995                          * to disable replay of the journal when we next remount
4996                          */
4997                         sb->s_flags |= MS_RDONLY;
4998
4999                         /*
5000                          * OK, test if we are remounting a valid rw partition
5001                          * readonly, and if so set the rdonly flag and then
5002                          * mark the partition as valid again.
5003                          */
5004                         if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) &&
5005                             (sbi->s_mount_state & EXT4_VALID_FS))
5006                                 es->s_state = cpu_to_le16(sbi->s_mount_state);
5007
5008                         if (sbi->s_journal)
5009                                 ext4_mark_recovery_complete(sb, es);
5010                 } else {
5011                         /* Make sure we can mount this feature set readwrite */
5012                         if (ext4_has_feature_readonly(sb) ||
5013                             !ext4_feature_set_ok(sb, 0)) {
5014                                 err = -EROFS;
5015                                 goto restore_opts;
5016                         }
5017                         /*
5018                          * Make sure the group descriptor checksums
5019                          * are sane.  If they aren't, refuse to remount r/w.
5020                          */
5021                         for (g = 0; g < sbi->s_groups_count; g++) {
5022                                 struct ext4_group_desc *gdp =
5023                                         ext4_get_group_desc(sb, g, NULL);
5024
5025                                 if (!ext4_group_desc_csum_verify(sb, g, gdp)) {
5026                                         ext4_msg(sb, KERN_ERR,
5027                "ext4_remount: Checksum for group %u failed (%u!=%u)",
5028                 g, le16_to_cpu(ext4_group_desc_csum(sb, g, gdp)),
5029                                                le16_to_cpu(gdp->bg_checksum));
5030                                         err = -EFSBADCRC;
5031                                         goto restore_opts;
5032                                 }
5033                         }
5034
5035                         /*
5036                          * If we have an unprocessed orphan list hanging
5037                          * around from a previously readonly bdev mount,
5038                          * require a full umount/remount for now.
5039                          */
5040                         if (es->s_last_orphan) {
5041                                 ext4_msg(sb, KERN_WARNING, "Couldn't "
5042                                        "remount RDWR because of unprocessed "
5043                                        "orphan inode list.  Please "
5044                                        "umount/remount instead");
5045                                 err = -EINVAL;
5046                                 goto restore_opts;
5047                         }
5048
5049                         /*
5050                          * Mounting a RDONLY partition read-write, so reread
5051                          * and store the current valid flag.  (It may have
5052                          * been changed by e2fsck since we originally mounted
5053                          * the partition.)
5054                          */
5055                         if (sbi->s_journal)
5056                                 ext4_clear_journal_err(sb, es);
5057                         sbi->s_mount_state = le16_to_cpu(es->s_state);
5058                         if (!ext4_setup_super(sb, es, 0))
5059                                 sb->s_flags &= ~MS_RDONLY;
5060                         if (ext4_has_feature_mmp(sb))
5061                                 if (ext4_multi_mount_protect(sb,
5062                                                 le64_to_cpu(es->s_mmp_block))) {
5063                                         err = -EROFS;
5064                                         goto restore_opts;
5065                                 }
5066                         enable_quota = 1;
5067                 }
5068         }
5069
5070         /*
5071          * Reinitialize lazy itable initialization thread based on
5072          * current settings
5073          */
5074         if ((sb->s_flags & MS_RDONLY) || !test_opt(sb, INIT_INODE_TABLE))
5075                 ext4_unregister_li_request(sb);
5076         else {
5077                 ext4_group_t first_not_zeroed;
5078                 first_not_zeroed = ext4_has_uninit_itable(sb);
5079                 ext4_register_li_request(sb, first_not_zeroed);
5080         }
5081
5082         ext4_setup_system_zone(sb);
5083         if (sbi->s_journal == NULL && !(old_sb_flags & MS_RDONLY))
5084                 ext4_commit_super(sb, 1);
5085
5086 #ifdef CONFIG_QUOTA
5087         /* Release old quota file names */
5088         for (i = 0; i < EXT4_MAXQUOTAS; i++)
5089                 kfree(old_opts.s_qf_names[i]);
5090         if (enable_quota) {
5091                 if (sb_any_quota_suspended(sb))
5092                         dquot_resume(sb, -1);
5093                 else if (ext4_has_feature_quota(sb)) {
5094                         err = ext4_enable_quotas(sb);
5095                         if (err)
5096                                 goto restore_opts;
5097                 }
5098         }
5099 #endif
5100
5101         *flags = (*flags & ~MS_LAZYTIME) | (sb->s_flags & MS_LAZYTIME);
5102         ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data);
5103         kfree(orig_data);
5104         return 0;
5105
5106 restore_opts:
5107         sb->s_flags = old_sb_flags;
5108         sbi->s_mount_opt = old_opts.s_mount_opt;
5109         sbi->s_mount_opt2 = old_opts.s_mount_opt2;
5110         sbi->s_resuid = old_opts.s_resuid;
5111         sbi->s_resgid = old_opts.s_resgid;
5112         sbi->s_commit_interval = old_opts.s_commit_interval;
5113         sbi->s_min_batch_time = old_opts.s_min_batch_time;
5114         sbi->s_max_batch_time = old_opts.s_max_batch_time;
5115 #ifdef CONFIG_QUOTA
5116         sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
5117         for (i = 0; i < EXT4_MAXQUOTAS; i++) {
5118                 kfree(sbi->s_qf_names[i]);
5119                 sbi->s_qf_names[i] = old_opts.s_qf_names[i];
5120         }
5121 #endif
5122         kfree(orig_data);
5123         return err;
5124 }
5125
5126 #ifdef CONFIG_QUOTA
5127 static int ext4_statfs_project(struct super_block *sb,
5128                                kprojid_t projid, struct kstatfs *buf)
5129 {
5130         struct kqid qid;
5131         struct dquot *dquot;
5132         u64 limit;
5133         u64 curblock;
5134
5135         qid = make_kqid_projid(projid);
5136         dquot = dqget(sb, qid);
5137         if (IS_ERR(dquot))
5138                 return PTR_ERR(dquot);
5139         spin_lock(&dq_data_lock);
5140
5141         limit = (dquot->dq_dqb.dqb_bsoftlimit ?
5142                  dquot->dq_dqb.dqb_bsoftlimit :
5143                  dquot->dq_dqb.dqb_bhardlimit) >> sb->s_blocksize_bits;
5144         if (limit && buf->f_blocks > limit) {
5145                 curblock = dquot->dq_dqb.dqb_curspace >> sb->s_blocksize_bits;
5146                 buf->f_blocks = limit;
5147                 buf->f_bfree = buf->f_bavail =
5148                         (buf->f_blocks > curblock) ?
5149                          (buf->f_blocks - curblock) : 0;
5150         }
5151
5152         limit = dquot->dq_dqb.dqb_isoftlimit ?
5153                 dquot->dq_dqb.dqb_isoftlimit :
5154                 dquot->dq_dqb.dqb_ihardlimit;
5155         if (limit && buf->f_files > limit) {
5156                 buf->f_files = limit;
5157                 buf->f_ffree =
5158                         (buf->f_files > dquot->dq_dqb.dqb_curinodes) ?
5159                          (buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0;
5160         }
5161
5162         spin_unlock(&dq_data_lock);
5163         dqput(dquot);
5164         return 0;
5165 }
5166 #endif
5167
5168 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
5169 {
5170         struct super_block *sb = dentry->d_sb;
5171         struct ext4_sb_info *sbi = EXT4_SB(sb);
5172         struct ext4_super_block *es = sbi->s_es;
5173         ext4_fsblk_t overhead = 0, resv_blocks;
5174         u64 fsid;
5175         s64 bfree;
5176         resv_blocks = EXT4_C2B(sbi, atomic64_read(&sbi->s_resv_clusters));
5177
5178         if (!test_opt(sb, MINIX_DF))
5179                 overhead = sbi->s_overhead;
5180
5181         buf->f_type = EXT4_SUPER_MAGIC;
5182         buf->f_bsize = sb->s_blocksize;
5183         buf->f_blocks = ext4_blocks_count(es) - EXT4_C2B(sbi, overhead);
5184         bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) -
5185                 percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
5186         /* prevent underflow in case that few free space is available */
5187         buf->f_bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0));
5188         buf->f_bavail = buf->f_bfree -
5189                         (ext4_r_blocks_count(es) + resv_blocks);
5190         if (buf->f_bfree < (ext4_r_blocks_count(es) + resv_blocks))
5191                 buf->f_bavail = 0;
5192         buf->f_files = le32_to_cpu(es->s_inodes_count);
5193         buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
5194         buf->f_namelen = EXT4_NAME_LEN;
5195         fsid = le64_to_cpup((void *)es->s_uuid) ^
5196                le64_to_cpup((void *)es->s_uuid + sizeof(u64));
5197         buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
5198         buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
5199
5200 #ifdef CONFIG_QUOTA
5201         if (ext4_test_inode_flag(dentry->d_inode, EXT4_INODE_PROJINHERIT) &&
5202             sb_has_quota_limits_enabled(sb, PRJQUOTA))
5203                 ext4_statfs_project(sb, EXT4_I(dentry->d_inode)->i_projid, buf);
5204 #endif
5205         return 0;
5206 }
5207
5208 /* Helper function for writing quotas on sync - we need to start transaction
5209  * before quota file is locked for write. Otherwise the are possible deadlocks:
5210  * Process 1                         Process 2
5211  * ext4_create()                     quota_sync()
5212  *   jbd2_journal_start()                  write_dquot()
5213  *   dquot_initialize()                         down(dqio_mutex)
5214  *     down(dqio_mutex)                    jbd2_journal_start()
5215  *
5216  */
5217
5218 #ifdef CONFIG_QUOTA
5219
5220 static inline struct inode *dquot_to_inode(struct dquot *dquot)
5221 {
5222         return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type];
5223 }
5224
5225 static int ext4_write_dquot(struct dquot *dquot)
5226 {
5227         int ret, err;
5228         handle_t *handle;
5229         struct inode *inode;
5230
5231         inode = dquot_to_inode(dquot);
5232         handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
5233                                     EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
5234         if (IS_ERR(handle))
5235                 return PTR_ERR(handle);
5236         ret = dquot_commit(dquot);
5237         err = ext4_journal_stop(handle);
5238         if (!ret)
5239                 ret = err;
5240         return ret;
5241 }
5242
5243 static int ext4_acquire_dquot(struct dquot *dquot)
5244 {
5245         int ret, err;
5246         handle_t *handle;
5247
5248         handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
5249                                     EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
5250         if (IS_ERR(handle))
5251                 return PTR_ERR(handle);
5252         ret = dquot_acquire(dquot);
5253         err = ext4_journal_stop(handle);
5254         if (!ret)
5255                 ret = err;
5256         return ret;
5257 }
5258
5259 static int ext4_release_dquot(struct dquot *dquot)
5260 {
5261         int ret, err;
5262         handle_t *handle;
5263
5264         handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
5265                                     EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
5266         if (IS_ERR(handle)) {
5267                 /* Release dquot anyway to avoid endless cycle in dqput() */
5268                 dquot_release(dquot);
5269                 return PTR_ERR(handle);
5270         }
5271         ret = dquot_release(dquot);
5272         err = ext4_journal_stop(handle);
5273         if (!ret)
5274                 ret = err;
5275         return ret;
5276 }
5277
5278 static int ext4_mark_dquot_dirty(struct dquot *dquot)
5279 {
5280         struct super_block *sb = dquot->dq_sb;
5281         struct ext4_sb_info *sbi = EXT4_SB(sb);
5282
5283         /* Are we journaling quotas? */
5284         if (ext4_has_feature_quota(sb) ||
5285             sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
5286                 dquot_mark_dquot_dirty(dquot);
5287                 return ext4_write_dquot(dquot);
5288         } else {
5289                 return dquot_mark_dquot_dirty(dquot);
5290         }
5291 }
5292
5293 static int ext4_write_info(struct super_block *sb, int type)
5294 {
5295         int ret, err;
5296         handle_t *handle;
5297
5298         /* Data block + inode block */
5299         handle = ext4_journal_start(d_inode(sb->s_root), EXT4_HT_QUOTA, 2);
5300         if (IS_ERR(handle))
5301                 return PTR_ERR(handle);
5302         ret = dquot_commit_info(sb, type);
5303         err = ext4_journal_stop(handle);
5304         if (!ret)
5305                 ret = err;
5306         return ret;
5307 }
5308
5309 /*
5310  * Turn on quotas during mount time - we need to find
5311  * the quota file and such...
5312  */
5313 static int ext4_quota_on_mount(struct super_block *sb, int type)
5314 {
5315         return dquot_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type],
5316                                         EXT4_SB(sb)->s_jquota_fmt, type);
5317 }
5318
5319 static void lockdep_set_quota_inode(struct inode *inode, int subclass)
5320 {
5321         struct ext4_inode_info *ei = EXT4_I(inode);
5322
5323         /* The first argument of lockdep_set_subclass has to be
5324          * *exactly* the same as the argument to init_rwsem() --- in
5325          * this case, in init_once() --- or lockdep gets unhappy
5326          * because the name of the lock is set using the
5327          * stringification of the argument to init_rwsem().
5328          */
5329         (void) ei;      /* shut up clang warning if !CONFIG_LOCKDEP */
5330         lockdep_set_subclass(&ei->i_data_sem, subclass);
5331 }
5332
5333 /*
5334  * Standard function to be called on quota_on
5335  */
5336 static int ext4_quota_on(struct super_block *sb, int type, int format_id,
5337                          const struct path *path)
5338 {
5339         int err;
5340
5341         if (!test_opt(sb, QUOTA))
5342                 return -EINVAL;
5343
5344         /* Quotafile not on the same filesystem? */
5345         if (path->dentry->d_sb != sb)
5346                 return -EXDEV;
5347         /* Journaling quota? */
5348         if (EXT4_SB(sb)->s_qf_names[type]) {
5349                 /* Quotafile not in fs root? */
5350                 if (path->dentry->d_parent != sb->s_root)
5351                         ext4_msg(sb, KERN_WARNING,
5352                                 "Quota file not on filesystem root. "
5353                                 "Journaled quota will not work");
5354         }
5355
5356         /*
5357          * When we journal data on quota file, we have to flush journal to see
5358          * all updates to the file when we bypass pagecache...
5359          */
5360         if (EXT4_SB(sb)->s_journal &&
5361             ext4_should_journal_data(d_inode(path->dentry))) {
5362                 /*
5363                  * We don't need to lock updates but journal_flush() could
5364                  * otherwise be livelocked...
5365                  */
5366                 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
5367                 err = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
5368                 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
5369                 if (err)
5370                         return err;
5371         }
5372         lockdep_set_quota_inode(path->dentry->d_inode, I_DATA_SEM_QUOTA);
5373         err = dquot_quota_on(sb, type, format_id, path);
5374         if (err)
5375                 lockdep_set_quota_inode(path->dentry->d_inode,
5376                                              I_DATA_SEM_NORMAL);
5377         return err;
5378 }
5379
5380 static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
5381                              unsigned int flags)
5382 {
5383         int err;
5384         struct inode *qf_inode;
5385         unsigned long qf_inums[EXT4_MAXQUOTAS] = {
5386                 le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
5387                 le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
5388                 le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
5389         };
5390
5391         BUG_ON(!ext4_has_feature_quota(sb));
5392
5393         if (!qf_inums[type])
5394                 return -EPERM;
5395
5396         qf_inode = ext4_iget(sb, qf_inums[type]);
5397         if (IS_ERR(qf_inode)) {
5398                 ext4_error(sb, "Bad quota inode # %lu", qf_inums[type]);
5399                 return PTR_ERR(qf_inode);
5400         }
5401
5402         /* Don't account quota for quota files to avoid recursion */
5403         qf_inode->i_flags |= S_NOQUOTA;
5404         lockdep_set_quota_inode(qf_inode, I_DATA_SEM_QUOTA);
5405         err = dquot_enable(qf_inode, type, format_id, flags);
5406         iput(qf_inode);
5407         if (err)
5408                 lockdep_set_quota_inode(qf_inode, I_DATA_SEM_NORMAL);
5409
5410         return err;
5411 }
5412
5413 /* Enable usage tracking for all quota types. */
5414 static int ext4_enable_quotas(struct super_block *sb)
5415 {
5416         int type, err = 0;
5417         unsigned long qf_inums[EXT4_MAXQUOTAS] = {
5418                 le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
5419                 le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
5420                 le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
5421         };
5422         bool quota_mopt[EXT4_MAXQUOTAS] = {
5423                 test_opt(sb, USRQUOTA),
5424                 test_opt(sb, GRPQUOTA),
5425                 test_opt(sb, PRJQUOTA),
5426         };
5427
5428         sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
5429         for (type = 0; type < EXT4_MAXQUOTAS; type++) {
5430                 if (qf_inums[type]) {
5431                         err = ext4_quota_enable(sb, type, QFMT_VFS_V1,
5432                                 DQUOT_USAGE_ENABLED |
5433                                 (quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0));
5434                         if (err) {
5435                                 ext4_warning(sb,
5436                                         "Failed to enable quota tracking "
5437                                         "(type=%d, err=%d). Please run "
5438                                         "e2fsck to fix.", type, err);
5439                                 return err;
5440                         }
5441                 }
5442         }
5443         return 0;
5444 }
5445
5446 static int ext4_quota_off(struct super_block *sb, int type)
5447 {
5448         struct inode *inode = sb_dqopt(sb)->files[type];
5449         handle_t *handle;
5450
5451         /* Force all delayed allocation blocks to be allocated.
5452          * Caller already holds s_umount sem */
5453         if (test_opt(sb, DELALLOC))
5454                 sync_filesystem(sb);
5455
5456         if (!inode)
5457                 goto out;
5458
5459         /* Update modification times of quota files when userspace can
5460          * start looking at them */
5461         handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
5462         if (IS_ERR(handle))
5463                 goto out;
5464         inode->i_mtime = inode->i_ctime = current_time(inode);
5465         ext4_mark_inode_dirty(handle, inode);
5466         ext4_journal_stop(handle);
5467
5468 out:
5469         return dquot_quota_off(sb, type);
5470 }
5471
5472 /* Read data from quotafile - avoid pagecache and such because we cannot afford
5473  * acquiring the locks... As quota files are never truncated and quota code
5474  * itself serializes the operations (and no one else should touch the files)
5475  * we don't have to be afraid of races */
5476 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
5477                                size_t len, loff_t off)
5478 {
5479         struct inode *inode = sb_dqopt(sb)->files[type];
5480         ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
5481         int offset = off & (sb->s_blocksize - 1);
5482         int tocopy;
5483         size_t toread;
5484         struct buffer_head *bh;
5485         loff_t i_size = i_size_read(inode);
5486
5487         if (off > i_size)
5488                 return 0;
5489         if (off+len > i_size)
5490                 len = i_size-off;
5491         toread = len;
5492         while (toread > 0) {
5493                 tocopy = sb->s_blocksize - offset < toread ?
5494                                 sb->s_blocksize - offset : toread;
5495                 bh = ext4_bread(NULL, inode, blk, 0);
5496                 if (IS_ERR(bh))
5497                         return PTR_ERR(bh);
5498                 if (!bh)        /* A hole? */
5499                         memset(data, 0, tocopy);
5500                 else
5501                         memcpy(data, bh->b_data+offset, tocopy);
5502                 brelse(bh);
5503                 offset = 0;
5504                 toread -= tocopy;
5505                 data += tocopy;
5506                 blk++;
5507         }
5508         return len;
5509 }
5510
5511 /* Write to quotafile (we know the transaction is already started and has
5512  * enough credits) */
5513 static ssize_t ext4_quota_write(struct super_block *sb, int type,
5514                                 const char *data, size_t len, loff_t off)
5515 {
5516         struct inode *inode = sb_dqopt(sb)->files[type];
5517         ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
5518         int err, offset = off & (sb->s_blocksize - 1);
5519         int retries = 0;
5520         struct buffer_head *bh;
5521         handle_t *handle = journal_current_handle();
5522
5523         if (EXT4_SB(sb)->s_journal && !handle) {
5524                 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
5525                         " cancelled because transaction is not started",
5526                         (unsigned long long)off, (unsigned long long)len);
5527                 return -EIO;
5528         }
5529         /*
5530          * Since we account only one data block in transaction credits,
5531          * then it is impossible to cross a block boundary.
5532          */
5533         if (sb->s_blocksize - offset < len) {
5534                 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
5535                         " cancelled because not block aligned",
5536                         (unsigned long long)off, (unsigned long long)len);
5537                 return -EIO;
5538         }
5539
5540         do {
5541                 bh = ext4_bread(handle, inode, blk,
5542                                 EXT4_GET_BLOCKS_CREATE |
5543                                 EXT4_GET_BLOCKS_METADATA_NOFAIL);
5544         } while (IS_ERR(bh) && (PTR_ERR(bh) == -ENOSPC) &&
5545                  ext4_should_retry_alloc(inode->i_sb, &retries));
5546         if (IS_ERR(bh))
5547                 return PTR_ERR(bh);
5548         if (!bh)
5549                 goto out;
5550         BUFFER_TRACE(bh, "get write access");
5551         err = ext4_journal_get_write_access(handle, bh);
5552         if (err) {
5553                 brelse(bh);
5554                 return err;
5555         }
5556         lock_buffer(bh);
5557         memcpy(bh->b_data+offset, data, len);
5558         flush_dcache_page(bh->b_page);
5559         unlock_buffer(bh);
5560         err = ext4_handle_dirty_metadata(handle, NULL, bh);
5561         brelse(bh);
5562 out:
5563         if (inode->i_size < off + len) {
5564                 i_size_write(inode, off + len);
5565                 EXT4_I(inode)->i_disksize = inode->i_size;
5566                 ext4_mark_inode_dirty(handle, inode);
5567         }
5568         return len;
5569 }
5570
5571 static int ext4_get_next_id(struct super_block *sb, struct kqid *qid)
5572 {
5573         const struct quota_format_ops   *ops;
5574
5575         if (!sb_has_quota_loaded(sb, qid->type))
5576                 return -ESRCH;
5577         ops = sb_dqopt(sb)->ops[qid->type];
5578         if (!ops || !ops->get_next_id)
5579                 return -ENOSYS;
5580         return dquot_get_next_id(sb, qid);
5581 }
5582 #endif
5583
5584 static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
5585                        const char *dev_name, void *data)
5586 {
5587         return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super);
5588 }
5589
5590 #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
5591 static inline void register_as_ext2(void)
5592 {
5593         int err = register_filesystem(&ext2_fs_type);
5594         if (err)
5595                 printk(KERN_WARNING
5596                        "EXT4-fs: Unable to register as ext2 (%d)\n", err);
5597 }
5598
5599 static inline void unregister_as_ext2(void)
5600 {
5601         unregister_filesystem(&ext2_fs_type);
5602 }
5603
5604 static inline int ext2_feature_set_ok(struct super_block *sb)
5605 {
5606         if (ext4_has_unknown_ext2_incompat_features(sb))
5607                 return 0;
5608         if (sb->s_flags & MS_RDONLY)
5609                 return 1;
5610         if (ext4_has_unknown_ext2_ro_compat_features(sb))
5611                 return 0;
5612         return 1;
5613 }
5614 #else
5615 static inline void register_as_ext2(void) { }
5616 static inline void unregister_as_ext2(void) { }
5617 static inline int ext2_feature_set_ok(struct super_block *sb) { return 0; }
5618 #endif
5619
5620 static inline void register_as_ext3(void)
5621 {
5622         int err = register_filesystem(&ext3_fs_type);
5623         if (err)
5624                 printk(KERN_WARNING
5625                        "EXT4-fs: Unable to register as ext3 (%d)\n", err);
5626 }
5627
5628 static inline void unregister_as_ext3(void)
5629 {
5630         unregister_filesystem(&ext3_fs_type);
5631 }
5632
5633 static inline int ext3_feature_set_ok(struct super_block *sb)
5634 {
5635         if (ext4_has_unknown_ext3_incompat_features(sb))
5636                 return 0;
5637         if (!ext4_has_feature_journal(sb))
5638                 return 0;
5639         if (sb->s_flags & MS_RDONLY)
5640                 return 1;
5641         if (ext4_has_unknown_ext3_ro_compat_features(sb))
5642                 return 0;
5643         return 1;
5644 }
5645
5646 static struct file_system_type ext4_fs_type = {
5647         .owner          = THIS_MODULE,
5648         .name           = "ext4",
5649         .mount          = ext4_mount,
5650         .kill_sb        = kill_block_super,
5651         .fs_flags       = FS_REQUIRES_DEV | FS_USERNS_MOUNT,
5652 };
5653 MODULE_ALIAS_FS("ext4");
5654
5655 /* Shared across all ext4 file systems */
5656 wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
5657
5658 static int __init ext4_init_fs(void)
5659 {
5660         int i, err;
5661
5662         ratelimit_state_init(&ext4_mount_msg_ratelimit, 30 * HZ, 64);
5663         ext4_li_info = NULL;
5664         mutex_init(&ext4_li_mtx);
5665
5666         /* Build-time check for flags consistency */
5667         ext4_check_flag_values();
5668
5669         for (i = 0; i < EXT4_WQ_HASH_SZ; i++)
5670                 init_waitqueue_head(&ext4__ioend_wq[i]);
5671
5672         err = ext4_init_es();
5673         if (err)
5674                 return err;
5675
5676         err = ext4_init_pageio();
5677         if (err)
5678                 goto out5;
5679
5680         err = ext4_init_system_zone();
5681         if (err)
5682                 goto out4;
5683
5684         err = ext4_init_sysfs();
5685         if (err)
5686                 goto out3;
5687
5688         err = ext4_init_mballoc();
5689         if (err)
5690                 goto out2;
5691         err = init_inodecache();
5692         if (err)
5693                 goto out1;
5694         register_as_ext3();
5695         register_as_ext2();
5696         err = register_filesystem(&ext4_fs_type);
5697         if (err)
5698                 goto out;
5699
5700         return 0;
5701 out:
5702         unregister_as_ext2();
5703         unregister_as_ext3();
5704         destroy_inodecache();
5705 out1:
5706         ext4_exit_mballoc();
5707 out2:
5708         ext4_exit_sysfs();
5709 out3:
5710         ext4_exit_system_zone();
5711 out4:
5712         ext4_exit_pageio();
5713 out5:
5714         ext4_exit_es();
5715
5716         return err;
5717 }
5718
5719 static void __exit ext4_exit_fs(void)
5720 {
5721         ext4_destroy_lazyinit_thread();
5722         unregister_as_ext2();
5723         unregister_as_ext3();
5724         unregister_filesystem(&ext4_fs_type);
5725         destroy_inodecache();
5726         ext4_exit_mballoc();
5727         ext4_exit_sysfs();
5728         ext4_exit_system_zone();
5729         ext4_exit_pageio();
5730         ext4_exit_es();
5731 }
5732
5733 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
5734 MODULE_DESCRIPTION("Fourth Extended Filesystem");
5735 MODULE_LICENSE("GPL");
5736 module_init(ext4_init_fs)
5737 module_exit(ext4_exit_fs)