fs/ext4/super.c

   1 /*
   2  *  linux/fs/ext4/super.c
   3  *
   4  * Copyright (C) 1992, 1993, 1994, 1995
   5  * Remy Card (card@masi.ibp.fr)
   6  * Laboratoire MASI - Institut Blaise Pascal
   7  * Universite Pierre et Marie Curie (Paris VI)
   8  *
   9  *  from
  10  *
  11  *  linux/fs/minix/inode.c
  12  *
  13  *  Copyright (C) 1991, 1992  Linus Torvalds
  14  *
  15  *  Big-endian to little-endian byte-swapping/bitmaps by
  16  *        David S. Miller (davem@caip.rutgers.edu), 1995
  17  */
  18
  19 #include <linux/module.h>
  20 #include <linux/string.h>
  21 #include <linux/fs.h>
  22 #include <linux/time.h>
  23 #include <linux/vmalloc.h>
  24 #include <linux/jbd2.h>
  25 #include <linux/slab.h>
  26 #include <linux/init.h>
  27 #include <linux/blkdev.h>
  28 #include <linux/parser.h>
  29 #include <linux/smp_lock.h>
  30 #include <linux/buffer_head.h>
  31 #include <linux/exportfs.h>
  32 #include <linux/vfs.h>
  33 #include <linux/random.h>
  34 #include <linux/mount.h>
  35 #include <linux/namei.h>
  36 #include <linux/quotaops.h>
  37 #include <linux/seq_file.h>
  38 #include <linux/proc_fs.h>
  39 #include <linux/ctype.h>
  40 #include <linux/marker.h>
  41 #include <linux/log2.h>
  42 #include <linux/crc16.h>
  43 #include <asm/uaccess.h>
  44
  45 #include "ext4.h"
  46 #include "ext4_jbd2.h"
  47 #include "xattr.h"
  48 #include "acl.h"
  49 #include "namei.h"
  50 #include "group.h"
  51
  52 struct proc_dir_entry *ext4_proc_root;
  53 static struct kset *ext4_kset;
  54
  55 static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
  56                              unsigned long journal_devnum);
  57 static int ext4_commit_super(struct super_block *sb, int sync);
  58 static void ext4_mark_recovery_complete(struct super_block *sb,
  59                                         struct ext4_super_block *es);
  60 static void ext4_clear_journal_err(struct super_block *sb,
  61                                    struct ext4_super_block *es);
  62 static int ext4_sync_fs(struct super_block *sb, int wait);
  63 static const char *ext4_decode_error(struct super_block *sb, int errno,
  64                                      char nbuf[16]);
  65 static int ext4_remount(struct super_block *sb, int *flags, char *data);
  66 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
  67 static int ext4_unfreeze(struct super_block *sb);
  68 static void ext4_write_super(struct super_block *sb);
  69 static int ext4_freeze(struct super_block *sb);
  70
  71
  72 ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
  73                                struct ext4_group_desc *bg)
  74 {
  75         return le32_to_cpu(bg->bg_block_bitmap_lo) |
  76                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
  77                 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
  78 }
  79
  80 ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
  81                                struct ext4_group_desc *bg)
  82 {
  83         return le32_to_cpu(bg->bg_inode_bitmap_lo) |
  84                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
  85                 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
  86 }
  87
  88 ext4_fsblk_t ext4_inode_table(struct super_block *sb,
  89                               struct ext4_group_desc *bg)
  90 {
  91         return le32_to_cpu(bg->bg_inode_table_lo) |
  92                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
  93                 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
  94 }
  95
  96 __u32 ext4_free_blks_count(struct super_block *sb,
  97                               struct ext4_group_desc *bg)
  98 {
  99         return le16_to_cpu(bg->bg_free_blocks_count_lo) |
 100                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 101                 (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
 102 }
 103
 104 __u32 ext4_free_inodes_count(struct super_block *sb,
 105                               struct ext4_group_desc *bg)
 106 {
 107         return le16_to_cpu(bg->bg_free_inodes_count_lo) |
 108                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 109                 (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
 110 }
 111
 112 __u32 ext4_used_dirs_count(struct super_block *sb,
 113                               struct ext4_group_desc *bg)
 114 {
 115         return le16_to_cpu(bg->bg_used_dirs_count_lo) |
 116                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 117                 (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
 118 }
 119
 120 __u32 ext4_itable_unused_count(struct super_block *sb,
 121                               struct ext4_group_desc *bg)
 122 {
 123         return le16_to_cpu(bg->bg_itable_unused_lo) |
 124                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 125                 (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
 126 }
 127
 128 void ext4_block_bitmap_set(struct super_block *sb,
 129                            struct ext4_group_desc *bg, ext4_fsblk_t blk)
 130 {
 131         bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk);
 132         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 133                 bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32);
 134 }
 135
 136 void ext4_inode_bitmap_set(struct super_block *sb,
 137                            struct ext4_group_desc *bg, ext4_fsblk_t blk)
 138 {
 139         bg->bg_inode_bitmap_lo  = cpu_to_le32((u32)blk);
 140         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 141                 bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32);
 142 }
 143
 144 void ext4_inode_table_set(struct super_block *sb,
 145                           struct ext4_group_desc *bg, ext4_fsblk_t blk)
 146 {
 147         bg->bg_inode_table_lo = cpu_to_le32((u32)blk);
 148         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 149                 bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
 150 }
 151
 152 void ext4_free_blks_set(struct super_block *sb,
 153                           struct ext4_group_desc *bg, __u32 count)
 154 {
 155         bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
 156         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 157                 bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16);
 158 }
 159
 160 void ext4_free_inodes_set(struct super_block *sb,
 161                           struct ext4_group_desc *bg, __u32 count)
 162 {
 163         bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count);
 164         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 165                 bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16);
 166 }
 167
 168 void ext4_used_dirs_set(struct super_block *sb,
 169                           struct ext4_group_desc *bg, __u32 count)
 170 {
 171         bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count);
 172         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 173                 bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16);
 174 }
 175
 176 void ext4_itable_unused_set(struct super_block *sb,
 177                           struct ext4_group_desc *bg, __u32 count)
 178 {
 179         bg->bg_itable_unused_lo = cpu_to_le16((__u16)count);
 180         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 181                 bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
 182 }
 183
 184 /*
 185  * Wrappers for jbd2_journal_start/end.
 186  *
 187  * The only special thing we need to do here is to make sure that all
 188  * journal_end calls result in the superblock being marked dirty, so
 189  * that sync() will call the filesystem's write_super callback if
 190  * appropriate.
 191  */
 192 handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
 193 {
 194         journal_t *journal;
 195
 196         if (sb->s_flags & MS_RDONLY)
 197                 return ERR_PTR(-EROFS);
 198
 199         /* Special case here: if the journal has aborted behind our
 200          * backs (eg. EIO in the commit thread), then we still need to
 201          * take the FS itself readonly cleanly. */
 202         journal = EXT4_SB(sb)->s_journal;
 203         if (journal) {
 204                 if (is_journal_aborted(journal)) {
 205                         ext4_abort(sb, __func__,
 206                                    "Detected aborted journal");
 207                         return ERR_PTR(-EROFS);
 208                 }
 209                 return jbd2_journal_start(journal, nblocks);
 210         }
 211         /*
 212          * We're not journaling, return the appropriate indication.
 213          */
 214         current->journal_info = EXT4_NOJOURNAL_HANDLE;
 215         return current->journal_info;
 216 }
 217
 218 /*
 219  * The only special thing we need to do here is to make sure that all
 220  * jbd2_journal_stop calls result in the superblock being marked dirty, so
 221  * that sync() will call the filesystem's write_super callback if
 222  * appropriate.
 223  */
 224 int __ext4_journal_stop(const char *where, handle_t *handle)
 225 {
 226         struct super_block *sb;
 227         int err;
 228         int rc;
 229
 230         if (!ext4_handle_valid(handle)) {
 231                 /*
 232                  * Do this here since we don't call jbd2_journal_stop() in
 233                  * no-journal mode.
 234                  */
 235                 current->journal_info = NULL;
 236                 return 0;
 237         }
 238         sb = handle->h_transaction->t_journal->j_private;
 239         err = handle->h_err;
 240         rc = jbd2_journal_stop(handle);
 241
 242         if (!err)
 243                 err = rc;
 244         if (err)
 245                 __ext4_std_error(sb, where, err);
 246         return err;
 247 }
 248
 249 void ext4_journal_abort_handle(const char *caller, const char *err_fn,
 250                 struct buffer_head *bh, handle_t *handle, int err)
 251 {
 252         char nbuf[16];
 253         const char *errstr = ext4_decode_error(NULL, err, nbuf);
 254
 255         BUG_ON(!ext4_handle_valid(handle));
 256
 257         if (bh)
 258                 BUFFER_TRACE(bh, "abort");
 259
 260         if (!handle->h_err)
 261                 handle->h_err = err;
 262
 263         if (is_handle_aborted(handle))
 264                 return;
 265
 266         printk(KERN_ERR "%s: aborting transaction: %s in %s\n",
 267                caller, errstr, err_fn);
 268
 269         jbd2_journal_abort_handle(handle);
 270 }
 271
 272 /* Deal with the reporting of failure conditions on a filesystem such as
 273  * inconsistencies detected or read IO failures.
 274  *
 275  * On ext2, we can store the error state of the filesystem in the
 276  * superblock.  That is not possible on ext4, because we may have other
 277  * write ordering constraints on the superblock which prevent us from
 278  * writing it out straight away; and given that the journal is about to
 279  * be aborted, we can't rely on the current, or future, transactions to
 280  * write out the superblock safely.
 281  *
 282  * We'll just use the jbd2_journal_abort() error code to record an error in
 283  * the journal instead.  On recovery, the journal will compain about
 284  * that error until we've noted it down and cleared it.
 285  */
 286
 287 static void ext4_handle_error(struct super_block *sb)
 288 {
 289         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 290
 291         EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 292         es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
 293
 294         if (sb->s_flags & MS_RDONLY)
 295                 return;
 296
 297         if (!test_opt(sb, ERRORS_CONT)) {
 298                 journal_t *journal = EXT4_SB(sb)->s_journal;
 299
 300                 EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
 301                 if (journal)
 302                         jbd2_journal_abort(journal, -EIO);
 303         }
 304         if (test_opt(sb, ERRORS_RO)) {
 305                 printk(KERN_CRIT "Remounting filesystem read-only\n");
 306                 sb->s_flags |= MS_RDONLY;
 307         }
 308         ext4_commit_super(sb, 1);
 309         if (test_opt(sb, ERRORS_PANIC))
 310                 panic("EXT4-fs (device %s): panic forced after error\n",
 311                         sb->s_id);
 312 }
 313
 314 void ext4_error(struct super_block *sb, const char *function,
 315                 const char *fmt, ...)
 316 {
 317         va_list args;
 318
 319         va_start(args, fmt);
 320         printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
 321         vprintk(fmt, args);
 322         printk("\n");
 323         va_end(args);
 324
 325         ext4_handle_error(sb);
 326 }
 327
 328 static const char *ext4_decode_error(struct super_block *sb, int errno,
 329                                      char nbuf[16])
 330 {
 331         char *errstr = NULL;
 332
 333         switch (errno) {
 334         case -EIO:
 335                 errstr = "IO failure";
 336                 break;
 337         case -ENOMEM:
 338                 errstr = "Out of memory";
 339                 break;
 340         case -EROFS:
 341                 if (!sb || EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT)
 342                         errstr = "Journal has aborted";
 343                 else
 344                         errstr = "Readonly filesystem";
 345                 break;
 346         default:
 347                 /* If the caller passed in an extra buffer for unknown
 348                  * errors, textualise them now.  Else we just return
 349                  * NULL. */
 350                 if (nbuf) {
 351                         /* Check for truncated error codes... */
 352                         if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
 353                                 errstr = nbuf;
 354                 }
 355                 break;
 356         }
 357
 358         return errstr;
 359 }
 360
 361 /* __ext4_std_error decodes expected errors from journaling functions
 362  * automatically and invokes the appropriate error response.  */
 363
 364 void __ext4_std_error(struct super_block *sb, const char *function, int errno)
 365 {
 366         char nbuf[16];
 367         const char *errstr;
 368
 369         /* Special case: if the error is EROFS, and we're not already
 370          * inside a transaction, then there's really no point in logging
 371          * an error. */
 372         if (errno == -EROFS && journal_current_handle() == NULL &&
 373             (sb->s_flags & MS_RDONLY))
 374                 return;
 375
 376         errstr = ext4_decode_error(sb, errno, nbuf);
 377         printk(KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n",
 378                sb->s_id, function, errstr);
 379
 380         ext4_handle_error(sb);
 381 }
 382
 383 /*
 384  * ext4_abort is a much stronger failure handler than ext4_error.  The
 385  * abort function may be used to deal with unrecoverable failures such
 386  * as journal IO errors or ENOMEM at a critical moment in log management.
 387  *
 388  * We unconditionally force the filesystem into an ABORT|READONLY state,
 389  * unless the error response on the fs has been set to panic in which
 390  * case we take the easy way out and panic immediately.
 391  */
 392
 393 void ext4_abort(struct super_block *sb, const char *function,
 394                 const char *fmt, ...)
 395 {
 396         va_list args;
 397
 398         printk(KERN_CRIT "ext4_abort called.\n");
 399
 400         va_start(args, fmt);
 401         printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
 402         vprintk(fmt, args);
 403         printk("\n");
 404         va_end(args);
 405
 406         if (test_opt(sb, ERRORS_PANIC))
 407                 panic("EXT4-fs panic from previous error\n");
 408
 409         if (sb->s_flags & MS_RDONLY)
 410                 return;
 411
 412         printk(KERN_CRIT "Remounting filesystem read-only\n");
 413         EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 414         sb->s_flags |= MS_RDONLY;
 415         EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
 416         if (EXT4_SB(sb)->s_journal)
 417                 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
 418 }
 419
 420 void ext4_warning(struct super_block *sb, const char *function,
 421                   const char *fmt, ...)
 422 {
 423         va_list args;
 424
 425         va_start(args, fmt);
 426         printk(KERN_WARNING "EXT4-fs warning (device %s): %s: ",
 427                sb->s_id, function);
 428         vprintk(fmt, args);
 429         printk("\n");
 430         va_end(args);
 431 }
 432
 433 void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp,
 434                                 const char *function, const char *fmt, ...)
 435 __releases(bitlock)
 436 __acquires(bitlock)
 437 {
 438         va_list args;
 439         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 440
 441         va_start(args, fmt);
 442         printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
 443         vprintk(fmt, args);
 444         printk("\n");
 445         va_end(args);
 446
 447         if (test_opt(sb, ERRORS_CONT)) {
 448                 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 449                 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
 450                 ext4_commit_super(sb, 0);
 451                 return;
 452         }
 453         ext4_unlock_group(sb, grp);
 454         ext4_handle_error(sb);
 455         /*
 456          * We only get here in the ERRORS_RO case; relocking the group
 457          * may be dangerous, but nothing bad will happen since the
 458          * filesystem will have already been marked read/only and the
 459          * journal has been aborted.  We return 1 as a hint to callers
 460          * who might what to use the return value from
 461          * ext4_grp_locked_error() to distinguish beween the
 462          * ERRORS_CONT and ERRORS_RO case, and perhaps return more
 463          * aggressively from the ext4 function in question, with a
 464          * more appropriate error code.
 465          */
 466         ext4_lock_group(sb, grp);
 467         return;
 468 }
 469
 470
 471 void ext4_update_dynamic_rev(struct super_block *sb)
 472 {
 473         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 474
 475         if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
 476                 return;
 477
 478         ext4_warning(sb, __func__,
 479                      "updating to rev %d because of new feature flag, "
 480                      "running e2fsck is recommended",
 481                      EXT4_DYNAMIC_REV);
 482
 483         es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO);
 484         es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE);
 485         es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV);
 486         /* leave es->s_feature_*compat flags alone */
 487         /* es->s_uuid will be set by e2fsck if empty */
 488
 489         /*
 490          * The rest of the superblock fields should be zero, and if not it
 491          * means they are likely already in use, so leave them alone.  We
 492          * can leave it up to e2fsck to clean up any inconsistencies there.
 493          */
 494 }
 495
 496 /*
 497  * Open the external journal device
 498  */
 499 static struct block_device *ext4_blkdev_get(dev_t dev)
 500 {
 501         struct block_device *bdev;
 502         char b[BDEVNAME_SIZE];
 503
 504         bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE);
 505         if (IS_ERR(bdev))
 506                 goto fail;
 507         return bdev;
 508
 509 fail:
 510         printk(KERN_ERR "EXT4-fs: failed to open journal device %s: %ld\n",
 511                         __bdevname(dev, b), PTR_ERR(bdev));
 512         return NULL;
 513 }
 514
 515 /*
 516  * Release the journal device
 517  */
 518 static int ext4_blkdev_put(struct block_device *bdev)
 519 {
 520         bd_release(bdev);
 521         return blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
 522 }
 523
 524 static int ext4_blkdev_remove(struct ext4_sb_info *sbi)
 525 {
 526         struct block_device *bdev;
 527         int ret = -ENODEV;
 528
 529         bdev = sbi->journal_bdev;
 530         if (bdev) {
 531                 ret = ext4_blkdev_put(bdev);
 532                 sbi->journal_bdev = NULL;
 533         }
 534         return ret;
 535 }
 536
 537 static inline struct inode *orphan_list_entry(struct list_head *l)
 538 {
 539         return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
 540 }
 541
 542 static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
 543 {
 544         struct list_head *l;
 545
 546         printk(KERN_ERR "sb orphan head is %d\n",
 547                le32_to_cpu(sbi->s_es->s_last_orphan));
 548
 549         printk(KERN_ERR "sb_info orphan list:\n");
 550         list_for_each(l, &sbi->s_orphan) {
 551                 struct inode *inode = orphan_list_entry(l);
 552                 printk(KERN_ERR "  "
 553                        "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
 554                        inode->i_sb->s_id, inode->i_ino, inode,
 555                        inode->i_mode, inode->i_nlink,
 556                        NEXT_ORPHAN(inode));
 557         }
 558 }
 559
 560 static void ext4_put_super(struct super_block *sb)
 561 {
 562         struct ext4_sb_info *sbi = EXT4_SB(sb);
 563         struct ext4_super_block *es = sbi->s_es;
 564         int i, err;
 565
 566         ext4_mb_release(sb);
 567         ext4_ext_release(sb);
 568         ext4_xattr_put_super(sb);
 569         if (sbi->s_journal) {
 570                 err = jbd2_journal_destroy(sbi->s_journal);
 571                 sbi->s_journal = NULL;
 572                 if (err < 0)
 573                         ext4_abort(sb, __func__,
 574                                    "Couldn't clean up the journal");
 575         }
 576         if (!(sb->s_flags & MS_RDONLY)) {
 577                 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
 578                 es->s_state = cpu_to_le16(sbi->s_mount_state);
 579                 ext4_commit_super(sb, 1);
 580         }
 581         if (sbi->s_proc) {
 582                 remove_proc_entry(sb->s_id, ext4_proc_root);
 583         }
 584         kobject_del(&sbi->s_kobj);
 585
 586         for (i = 0; i < sbi->s_gdb_count; i++)
 587                 brelse(sbi->s_group_desc[i]);
 588         kfree(sbi->s_group_desc);
 589         if (is_vmalloc_addr(sbi->s_flex_groups))
 590                 vfree(sbi->s_flex_groups);
 591         else
 592                 kfree(sbi->s_flex_groups);
 593         percpu_counter_destroy(&sbi->s_freeblocks_counter);
 594         percpu_counter_destroy(&sbi->s_freeinodes_counter);
 595         percpu_counter_destroy(&sbi->s_dirs_counter);
 596         percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
 597         brelse(sbi->s_sbh);
 598 #ifdef CONFIG_QUOTA
 599         for (i = 0; i < MAXQUOTAS; i++)
 600                 kfree(sbi->s_qf_names[i]);
 601 #endif
 602
 603         /* Debugging code just in case the in-memory inode orphan list
 604          * isn't empty.  The on-disk one can be non-empty if we've
 605          * detected an error and taken the fs readonly, but the
 606          * in-memory list had better be clean by this point. */
 607         if (!list_empty(&sbi->s_orphan))
 608                 dump_orphan_list(sb, sbi);
 609         J_ASSERT(list_empty(&sbi->s_orphan));
 610
 611         invalidate_bdev(sb->s_bdev);
 612         if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
 613                 /*
 614                  * Invalidate the journal device's buffers.  We don't want them
 615                  * floating about in memory - the physical journal device may
 616                  * hotswapped, and it breaks the `ro-after' testing code.
 617                  */
 618                 sync_blockdev(sbi->journal_bdev);
 619                 invalidate_bdev(sbi->journal_bdev);
 620                 ext4_blkdev_remove(sbi);
 621         }
 622         sb->s_fs_info = NULL;
 623         /*
 624          * Now that we are completely done shutting down the
 625          * superblock, we need to actually destroy the kobject.
 626          */
 627         unlock_kernel();
 628         unlock_super(sb);
 629         kobject_put(&sbi->s_kobj);
 630         wait_for_completion(&sbi->s_kobj_unregister);
 631         lock_super(sb);
 632         lock_kernel();
 633         kfree(sbi->s_blockgroup_lock);
 634         kfree(sbi);
 635         return;
 636 }
 637
 638 static struct kmem_cache *ext4_inode_cachep;
 639
 640 /*
 641  * Called inside transaction, so use GFP_NOFS
 642  */
 643 static struct inode *ext4_alloc_inode(struct super_block *sb)
 644 {
 645         struct ext4_inode_info *ei;
 646
 647         ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
 648         if (!ei)
 649                 return NULL;
 650 #ifdef CONFIG_EXT4_FS_POSIX_ACL
 651         ei->i_acl = EXT4_ACL_NOT_CACHED;
 652         ei->i_default_acl = EXT4_ACL_NOT_CACHED;
 653 #endif
 654         ei->vfs_inode.i_version = 1;
 655         ei->vfs_inode.i_data.writeback_index = 0;
 656         memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
 657         INIT_LIST_HEAD(&ei->i_prealloc_list);
 658         spin_lock_init(&ei->i_prealloc_lock);
 659         /*
 660          * Note:  We can be called before EXT4_SB(sb)->s_journal is set,
 661          * therefore it can be null here.  Don't check it, just initialize
 662          * jinode.
 663          */
 664         jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode);
 665         ei->i_reserved_data_blocks = 0;
 666         ei->i_reserved_meta_blocks = 0;
 667         ei->i_allocated_meta_blocks = 0;
 668         ei->i_delalloc_reserved_flag = 0;
 669         spin_lock_init(&(ei->i_block_reservation_lock));
 670         return &ei->vfs_inode;
 671 }
 672
 673 static void ext4_destroy_inode(struct inode *inode)
 674 {
 675         if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
 676                 printk("EXT4 Inode %p: orphan list check failed!\n",
 677                         EXT4_I(inode));
 678                 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
 679                                 EXT4_I(inode), sizeof(struct ext4_inode_info),
 680                                 true);
 681                 dump_stack();
 682         }
 683         kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
 684 }
 685
 686 static void init_once(void *foo)
 687 {
 688         struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
 689
 690         INIT_LIST_HEAD(&ei->i_orphan);
 691 #ifdef CONFIG_EXT4_FS_XATTR
 692         init_rwsem(&ei->xattr_sem);
 693 #endif
 694         init_rwsem(&ei->i_data_sem);
 695         inode_init_once(&ei->vfs_inode);
 696 }
 697
 698 static int init_inodecache(void)
 699 {
 700         ext4_inode_cachep = kmem_cache_create("ext4_inode_cache",
 701                                              sizeof(struct ext4_inode_info),
 702                                              0, (SLAB_RECLAIM_ACCOUNT|
 703                                                 SLAB_MEM_SPREAD),
 704                                              init_once);
 705         if (ext4_inode_cachep == NULL)
 706                 return -ENOMEM;
 707         return 0;
 708 }
 709
 710 static void destroy_inodecache(void)
 711 {
 712         kmem_cache_destroy(ext4_inode_cachep);
 713 }
 714
 715 static void ext4_clear_inode(struct inode *inode)
 716 {
 717 #ifdef CONFIG_EXT4_FS_POSIX_ACL
 718         if (EXT4_I(inode)->i_acl &&
 719                         EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) {
 720                 posix_acl_release(EXT4_I(inode)->i_acl);
 721                 EXT4_I(inode)->i_acl = EXT4_ACL_NOT_CACHED;
 722         }
 723         if (EXT4_I(inode)->i_default_acl &&
 724                         EXT4_I(inode)->i_default_acl != EXT4_ACL_NOT_CACHED) {
 725                 posix_acl_release(EXT4_I(inode)->i_default_acl);
 726                 EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED;
 727         }
 728 #endif
 729         ext4_discard_preallocations(inode);
 730         if (EXT4_JOURNAL(inode))
 731                 jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
 732                                        &EXT4_I(inode)->jinode);
 733 }
 734
 735 static inline void ext4_show_quota_options(struct seq_file *seq,
 736                                            struct super_block *sb)
 737 {
 738 #if defined(CONFIG_QUOTA)
 739         struct ext4_sb_info *sbi = EXT4_SB(sb);
 740
 741         if (sbi->s_jquota_fmt)
 742                 seq_printf(seq, ",jqfmt=%s",
 743                 (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold" : "vfsv0");
 744
 745         if (sbi->s_qf_names[USRQUOTA])
 746                 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
 747
 748         if (sbi->s_qf_names[GRPQUOTA])
 749                 seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
 750
 751         if (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA)
 752                 seq_puts(seq, ",usrquota");
 753
 754         if (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)
 755                 seq_puts(seq, ",grpquota");
 756 #endif
 757 }
 758
 759 /*
 760  * Show an option if
 761  *  - it's set to a non-default value OR
 762  *  - if the per-sb default is different from the global default
 763  */
 764 static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
 765 {
 766         int def_errors;
 767         unsigned long def_mount_opts;
 768         struct super_block *sb = vfs->mnt_sb;
 769         struct ext4_sb_info *sbi = EXT4_SB(sb);
 770         struct ext4_super_block *es = sbi->s_es;
 771
 772         def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
 773         def_errors     = le16_to_cpu(es->s_errors);
 774
 775         if (sbi->s_sb_block != 1)
 776                 seq_printf(seq, ",sb=%llu", sbi->s_sb_block);
 777         if (test_opt(sb, MINIX_DF))
 778                 seq_puts(seq, ",minixdf");
 779         if (test_opt(sb, GRPID) && !(def_mount_opts & EXT4_DEFM_BSDGROUPS))
 780                 seq_puts(seq, ",grpid");
 781         if (!test_opt(sb, GRPID) && (def_mount_opts & EXT4_DEFM_BSDGROUPS))
 782                 seq_puts(seq, ",nogrpid");
 783         if (sbi->s_resuid != EXT4_DEF_RESUID ||
 784             le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID) {
 785                 seq_printf(seq, ",resuid=%u", sbi->s_resuid);
 786         }
 787         if (sbi->s_resgid != EXT4_DEF_RESGID ||
 788             le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) {
 789                 seq_printf(seq, ",resgid=%u", sbi->s_resgid);
 790         }
 791         if (test_opt(sb, ERRORS_RO)) {
 792                 if (def_errors == EXT4_ERRORS_PANIC ||
 793                     def_errors == EXT4_ERRORS_CONTINUE) {
 794                         seq_puts(seq, ",errors=remount-ro");
 795                 }
 796         }
 797         if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE)
 798                 seq_puts(seq, ",errors=continue");
 799         if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC)
 800                 seq_puts(seq, ",errors=panic");
 801         if (test_opt(sb, NO_UID32) && !(def_mount_opts & EXT4_DEFM_UID16))
 802                 seq_puts(seq, ",nouid32");
 803         if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG))
 804                 seq_puts(seq, ",debug");
 805         if (test_opt(sb, OLDALLOC))
 806                 seq_puts(seq, ",oldalloc");
 807 #ifdef CONFIG_EXT4_FS_XATTR
 808         if (test_opt(sb, XATTR_USER) &&
 809                 !(def_mount_opts & EXT4_DEFM_XATTR_USER))
 810                 seq_puts(seq, ",user_xattr");
 811         if (!test_opt(sb, XATTR_USER) &&
 812             (def_mount_opts & EXT4_DEFM_XATTR_USER)) {
 813                 seq_puts(seq, ",nouser_xattr");
 814         }
 815 #endif
 816 #ifdef CONFIG_EXT4_FS_POSIX_ACL
 817         if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL))
 818                 seq_puts(seq, ",acl");
 819         if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL))
 820                 seq_puts(seq, ",noacl");
 821 #endif
 822         if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
 823                 seq_printf(seq, ",commit=%u",
 824                            (unsigned) (sbi->s_commit_interval / HZ));
 825         }
 826         if (sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) {
 827                 seq_printf(seq, ",min_batch_time=%u",
 828                            (unsigned) sbi->s_min_batch_time);
 829         }
 830         if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) {
 831                 seq_printf(seq, ",max_batch_time=%u",
 832                            (unsigned) sbi->s_min_batch_time);
 833         }
 834
 835         /*
 836          * We're changing the default of barrier mount option, so
 837          * let's always display its mount state so it's clear what its
 838          * status is.
 839          */
 840         seq_puts(seq, ",barrier=");
 841         seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0");
 842         if (test_opt(sb, JOURNAL_ASYNC_COMMIT))
 843                 seq_puts(seq, ",journal_async_commit");
 844         if (test_opt(sb, NOBH))
 845                 seq_puts(seq, ",nobh");
 846         if (test_opt(sb, I_VERSION))
 847                 seq_puts(seq, ",i_version");
 848         if (!test_opt(sb, DELALLOC))
 849                 seq_puts(seq, ",nodelalloc");
 850
 851
 852         if (sbi->s_stripe)
 853                 seq_printf(seq, ",stripe=%lu", sbi->s_stripe);
 854         /*
 855          * journal mode get enabled in different ways
 856          * So just print the value even if we didn't specify it
 857          */
 858         if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
 859                 seq_puts(seq, ",data=journal");
 860         else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
 861                 seq_puts(seq, ",data=ordered");
 862         else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
 863                 seq_puts(seq, ",data=writeback");
 864
 865         if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
 866                 seq_printf(seq, ",inode_readahead_blks=%u",
 867                            sbi->s_inode_readahead_blks);
 868
 869         if (test_opt(sb, DATA_ERR_ABORT))
 870                 seq_puts(seq, ",data_err=abort");
 871
 872         if (test_opt(sb, NO_AUTO_DA_ALLOC))
 873                 seq_puts(seq, ",noauto_da_alloc");
 874
 875         ext4_show_quota_options(seq, sb);
 876         return 0;
 877 }
 878
 879
 880 static struct inode *ext4_nfs_get_inode(struct super_block *sb,
 881                 u64 ino, u32 generation)
 882 {
 883         struct inode *inode;
 884
 885         if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)
 886                 return ERR_PTR(-ESTALE);
 887         if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))
 888                 return ERR_PTR(-ESTALE);
 889
 890         /* iget isn't really right if the inode is currently unallocated!!
 891          *
 892          * ext4_read_inode will return a bad_inode if the inode had been
 893          * deleted, so we should be safe.
 894          *
 895          * Currently we don't know the generation for parent directory, so
 896          * a generation of 0 means "accept any"
 897          */
 898         inode = ext4_iget(sb, ino);
 899         if (IS_ERR(inode))
 900                 return ERR_CAST(inode);
 901         if (generation && inode->i_generation != generation) {
 902                 iput(inode);
 903                 return ERR_PTR(-ESTALE);
 904         }
 905
 906         return inode;
 907 }
 908
 909 static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid,
 910                 int fh_len, int fh_type)
 911 {
 912         return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
 913                                     ext4_nfs_get_inode);
 914 }
 915
 916 static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
 917                 int fh_len, int fh_type)
 918 {
 919         return generic_fh_to_parent(sb, fid, fh_len, fh_type,
 920                                     ext4_nfs_get_inode);
 921 }
 922
 923 /*
 924  * Try to release metadata pages (indirect blocks, directories) which are
 925  * mapped via the block device.  Since these pages could have journal heads
 926  * which would prevent try_to_free_buffers() from freeing them, we must use
 927  * jbd2 layer's try_to_free_buffers() function to release them.
 928  */
 929 static int bdev_try_to_free_page(struct super_block *sb, struct page *page, gfp_t wait)
 930 {
 931         journal_t *journal = EXT4_SB(sb)->s_journal;
 932
 933         WARN_ON(PageChecked(page));
 934         if (!page_has_buffers(page))
 935                 return 0;
 936         if (journal)
 937                 return jbd2_journal_try_to_free_buffers(journal, page,
 938                                                         wait & ~__GFP_WAIT);
 939         return try_to_free_buffers(page);
 940 }
 941
 942 #ifdef CONFIG_QUOTA
 943 #define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group")
 944 #define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
 945
 946 static int ext4_write_dquot(struct dquot *dquot);
 947 static int ext4_acquire_dquot(struct dquot *dquot);
 948 static int ext4_release_dquot(struct dquot *dquot);
 949 static int ext4_mark_dquot_dirty(struct dquot *dquot);
 950 static int ext4_write_info(struct super_block *sb, int type);
 951 static int ext4_quota_on(struct super_block *sb, int type, int format_id,
 952                                 char *path, int remount);
 953 static int ext4_quota_on_mount(struct super_block *sb, int type);
 954 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
 955                                size_t len, loff_t off);
 956 static ssize_t ext4_quota_write(struct super_block *sb, int type,
 957                                 const char *data, size_t len, loff_t off);
 958
 959 static struct dquot_operations ext4_quota_operations = {
 960         .initialize     = dquot_initialize,
 961         .drop           = dquot_drop,
 962         .alloc_space    = dquot_alloc_space,
 963         .reserve_space  = dquot_reserve_space,
 964         .claim_space    = dquot_claim_space,
 965         .release_rsv    = dquot_release_reserved_space,
 966         .get_reserved_space = ext4_get_reserved_space,
 967         .alloc_inode    = dquot_alloc_inode,
 968         .free_space     = dquot_free_space,
 969         .free_inode     = dquot_free_inode,
 970         .transfer       = dquot_transfer,
 971         .write_dquot    = ext4_write_dquot,
 972         .acquire_dquot  = ext4_acquire_dquot,
 973         .release_dquot  = ext4_release_dquot,
 974         .mark_dirty     = ext4_mark_dquot_dirty,
 975         .write_info     = ext4_write_info,
 976         .alloc_dquot    = dquot_alloc,
 977         .destroy_dquot  = dquot_destroy,
 978 };
 979
 980 static struct quotactl_ops ext4_qctl_operations = {
 981         .quota_on       = ext4_quota_on,
 982         .quota_off      = vfs_quota_off,
 983         .quota_sync     = vfs_quota_sync,
 984         .get_info       = vfs_get_dqinfo,
 985         .set_info       = vfs_set_dqinfo,
 986         .get_dqblk      = vfs_get_dqblk,
 987         .set_dqblk      = vfs_set_dqblk
 988 };
 989 #endif
 990
 991 static const struct super_operations ext4_sops = {
 992         .alloc_inode    = ext4_alloc_inode,
 993         .destroy_inode  = ext4_destroy_inode,
 994         .write_inode    = ext4_write_inode,
 995         .dirty_inode    = ext4_dirty_inode,
 996         .delete_inode   = ext4_delete_inode,
 997         .put_super      = ext4_put_super,
 998         .sync_fs        = ext4_sync_fs,
 999         .freeze_fs      = ext4_freeze,
1000         .unfreeze_fs    = ext4_unfreeze,
1001         .statfs         = ext4_statfs,
1002         .remount_fs     = ext4_remount,
1003         .clear_inode    = ext4_clear_inode,
1004         .show_options   = ext4_show_options,
1005 #ifdef CONFIG_QUOTA
1006         .quota_read     = ext4_quota_read,
1007         .quota_write    = ext4_quota_write,
1008 #endif
1009         .bdev_try_to_free_page = bdev_try_to_free_page,
1010 };
1011
1012 static const struct super_operations ext4_nojournal_sops = {
1013         .alloc_inode    = ext4_alloc_inode,
1014         .destroy_inode  = ext4_destroy_inode,
1015         .write_inode    = ext4_write_inode,
1016         .dirty_inode    = ext4_dirty_inode,
1017         .delete_inode   = ext4_delete_inode,
1018         .write_super    = ext4_write_super,
1019         .put_super      = ext4_put_super,
1020         .statfs         = ext4_statfs,
1021         .remount_fs     = ext4_remount,
1022         .clear_inode    = ext4_clear_inode,
1023         .show_options   = ext4_show_options,
1024 #ifdef CONFIG_QUOTA
1025         .quota_read     = ext4_quota_read,
1026         .quota_write    = ext4_quota_write,
1027 #endif
1028         .bdev_try_to_free_page = bdev_try_to_free_page,
1029 };
1030
1031 static const struct export_operations ext4_export_ops = {
1032         .fh_to_dentry = ext4_fh_to_dentry,
1033         .fh_to_parent = ext4_fh_to_parent,
1034         .get_parent = ext4_get_parent,
1035 };
1036
1037 enum {
1038         Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
1039         Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
1040         Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov,
1041         Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
1042         Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, Opt_nobh, Opt_bh,
1043         Opt_commit, Opt_min_batch_time, Opt_max_batch_time,
1044         Opt_journal_update, Opt_journal_dev,
1045         Opt_journal_checksum, Opt_journal_async_commit,
1046         Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
1047         Opt_data_err_abort, Opt_data_err_ignore,
1048         Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
1049         Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
1050         Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize,
1051         Opt_usrquota, Opt_grpquota, Opt_i_version,
1052         Opt_stripe, Opt_delalloc, Opt_nodelalloc,
1053         Opt_inode_readahead_blks, Opt_journal_ioprio
1054 };
1055
1056 static const match_table_t tokens = {
1057         {Opt_bsd_df, "bsddf"},
1058         {Opt_minix_df, "minixdf"},
1059         {Opt_grpid, "grpid"},
1060         {Opt_grpid, "bsdgroups"},
1061         {Opt_nogrpid, "nogrpid"},
1062         {Opt_nogrpid, "sysvgroups"},
1063         {Opt_resgid, "resgid=%u"},
1064         {Opt_resuid, "resuid=%u"},
1065         {Opt_sb, "sb=%u"},
1066         {Opt_err_cont, "errors=continue"},
1067         {Opt_err_panic, "errors=panic"},
1068         {Opt_err_ro, "errors=remount-ro"},
1069         {Opt_nouid32, "nouid32"},
1070         {Opt_debug, "debug"},
1071         {Opt_oldalloc, "oldalloc"},
1072         {Opt_orlov, "orlov"},
1073         {Opt_user_xattr, "user_xattr"},
1074         {Opt_nouser_xattr, "nouser_xattr"},
1075         {Opt_acl, "acl"},
1076         {Opt_noacl, "noacl"},
1077         {Opt_noload, "noload"},
1078         {Opt_nobh, "nobh"},
1079         {Opt_bh, "bh"},
1080         {Opt_commit, "commit=%u"},
1081         {Opt_min_batch_time, "min_batch_time=%u"},
1082         {Opt_max_batch_time, "max_batch_time=%u"},
1083         {Opt_journal_update, "journal=update"},
1084         {Opt_journal_dev, "journal_dev=%u"},
1085         {Opt_journal_checksum, "journal_checksum"},
1086         {Opt_journal_async_commit, "journal_async_commit"},
1087         {Opt_abort, "abort"},
1088         {Opt_data_journal, "data=journal"},
1089         {Opt_data_ordered, "data=ordered"},
1090         {Opt_data_writeback, "data=writeback"},
1091         {Opt_data_err_abort, "data_err=abort"},
1092         {Opt_data_err_ignore, "data_err=ignore"},
1093         {Opt_offusrjquota, "usrjquota="},
1094         {Opt_usrjquota, "usrjquota=%s"},
1095         {Opt_offgrpjquota, "grpjquota="},
1096         {Opt_grpjquota, "grpjquota=%s"},
1097         {Opt_jqfmt_vfsold, "jqfmt=vfsold"},
1098         {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
1099         {Opt_grpquota, "grpquota"},
1100         {Opt_noquota, "noquota"},
1101         {Opt_quota, "quota"},
1102         {Opt_usrquota, "usrquota"},
1103         {Opt_barrier, "barrier=%u"},
1104         {Opt_barrier, "barrier"},
1105         {Opt_nobarrier, "nobarrier"},
1106         {Opt_i_version, "i_version"},
1107         {Opt_stripe, "stripe=%u"},
1108         {Opt_resize, "resize"},
1109         {Opt_delalloc, "delalloc"},
1110         {Opt_nodelalloc, "nodelalloc"},
1111         {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
1112         {Opt_journal_ioprio, "journal_ioprio=%u"},
1113         {Opt_auto_da_alloc, "auto_da_alloc=%u"},
1114         {Opt_auto_da_alloc, "auto_da_alloc"},
1115         {Opt_noauto_da_alloc, "noauto_da_alloc"},
1116         {Opt_err, NULL},
1117 };
1118
1119 static ext4_fsblk_t get_sb_block(void **data)
1120 {
1121         ext4_fsblk_t    sb_block;
1122         char            *options = (char *) *data;
1123
1124         if (!options || strncmp(options, "sb=", 3) != 0)
1125                 return 1;       /* Default location */
1126         options += 3;
1127         /*todo: use simple_strtoll with >32bit ext4 */
1128         sb_block = simple_strtoul(options, &options, 0);
1129         if (*options && *options != ',') {
1130                 printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n",
1131                        (char *) *data);
1132                 return 1;
1133         }
1134         if (*options == ',')
1135                 options++;
1136         *data = (void *) options;
1137         return sb_block;
1138 }
1139
1140 #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
1141
1142 static int parse_options(char *options, struct super_block *sb,
1143                          unsigned long *journal_devnum,
1144                          unsigned int *journal_ioprio,
1145                          ext4_fsblk_t *n_blocks_count, int is_remount)
1146 {
1147         struct ext4_sb_info *sbi = EXT4_SB(sb);
1148         char *p;
1149         substring_t args[MAX_OPT_ARGS];
1150         int data_opt = 0;
1151         int option;
1152 #ifdef CONFIG_QUOTA
1153         int qtype, qfmt;
1154         char *qname;
1155 #endif
1156
1157         if (!options)
1158                 return 1;
1159
1160         while ((p = strsep(&options, ",")) != NULL) {
1161                 int token;
1162                 if (!*p)
1163                         continue;
1164
1165                 token = match_token(p, tokens, args);
1166                 switch (token) {
1167                 case Opt_bsd_df:
1168                         clear_opt(sbi->s_mount_opt, MINIX_DF);
1169                         break;
1170                 case Opt_minix_df:
1171                         set_opt(sbi->s_mount_opt, MINIX_DF);
1172                         break;
1173                 case Opt_grpid:
1174                         set_opt(sbi->s_mount_opt, GRPID);
1175                         break;
1176                 case Opt_nogrpid:
1177                         clear_opt(sbi->s_mount_opt, GRPID);
1178                         break;
1179                 case Opt_resuid:
1180                         if (match_int(&args[0], &option))
1181                                 return 0;
1182                         sbi->s_resuid = option;
1183                         break;
1184                 case Opt_resgid:
1185                         if (match_int(&args[0], &option))
1186                                 return 0;
1187                         sbi->s_resgid = option;
1188                         break;
1189                 case Opt_sb:
1190                         /* handled by get_sb_block() instead of here */
1191                         /* *sb_block = match_int(&args[0]); */
1192                         break;
1193                 case Opt_err_panic:
1194                         clear_opt(sbi->s_mount_opt, ERRORS_CONT);
1195                         clear_opt(sbi->s_mount_opt, ERRORS_RO);
1196                         set_opt(sbi->s_mount_opt, ERRORS_PANIC);
1197                         break;
1198                 case Opt_err_ro:
1199                         clear_opt(sbi->s_mount_opt, ERRORS_CONT);
1200                         clear_opt(sbi->s_mount_opt, ERRORS_PANIC);
1201                         set_opt(sbi->s_mount_opt, ERRORS_RO);
1202                         break;
1203                 case Opt_err_cont:
1204                         clear_opt(sbi->s_mount_opt, ERRORS_RO);
1205                         clear_opt(sbi->s_mount_opt, ERRORS_PANIC);
1206                         set_opt(sbi->s_mount_opt, ERRORS_CONT);
1207                         break;
1208                 case Opt_nouid32:
1209                         set_opt(sbi->s_mount_opt, NO_UID32);
1210                         break;
1211                 case Opt_debug:
1212                         set_opt(sbi->s_mount_opt, DEBUG);
1213                         break;
1214                 case Opt_oldalloc:
1215                         set_opt(sbi->s_mount_opt, OLDALLOC);
1216                         break;
1217                 case Opt_orlov:
1218                         clear_opt(sbi->s_mount_opt, OLDALLOC);
1219                         break;
1220 #ifdef CONFIG_EXT4_FS_XATTR
1221                 case Opt_user_xattr:
1222                         set_opt(sbi->s_mount_opt, XATTR_USER);
1223                         break;
1224                 case Opt_nouser_xattr:
1225                         clear_opt(sbi->s_mount_opt, XATTR_USER);
1226                         break;
1227 #else
1228                 case Opt_user_xattr:
1229                 case Opt_nouser_xattr:
1230                         printk(KERN_ERR "EXT4 (no)user_xattr options "
1231                                "not supported\n");
1232                         break;
1233 #endif
1234 #ifdef CONFIG_EXT4_FS_POSIX_ACL
1235                 case Opt_acl:
1236                         set_opt(sbi->s_mount_opt, POSIX_ACL);
1237                         break;
1238                 case Opt_noacl:
1239                         clear_opt(sbi->s_mount_opt, POSIX_ACL);
1240                         break;
1241 #else
1242                 case Opt_acl:
1243                 case Opt_noacl:
1244                         printk(KERN_ERR "EXT4 (no)acl options "
1245                                "not supported\n");
1246                         break;
1247 #endif
1248                 case Opt_journal_update:
1249                         /* @@@ FIXME */
1250                         /* Eventually we will want to be able to create
1251                            a journal file here.  For now, only allow the
1252                            user to specify an existing inode to be the
1253                            journal file. */
1254                         if (is_remount) {
1255                                 printk(KERN_ERR "EXT4-fs: cannot specify "
1256                                        "journal on remount\n");
1257                                 return 0;
1258                         }
1259                         set_opt(sbi->s_mount_opt, UPDATE_JOURNAL);
1260                         break;
1261                 case Opt_journal_dev:
1262                         if (is_remount) {
1263                                 printk(KERN_ERR "EXT4-fs: cannot specify "
1264                                        "journal on remount\n");
1265                                 return 0;
1266                         }
1267                         if (match_int(&args[0], &option))
1268                                 return 0;
1269                         *journal_devnum = option;
1270                         break;
1271                 case Opt_journal_checksum:
1272                         set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM);
1273                         break;
1274                 case Opt_journal_async_commit:
1275                         set_opt(sbi->s_mount_opt, JOURNAL_ASYNC_COMMIT);
1276                         set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM);
1277                         break;
1278                 case Opt_noload:
1279                         set_opt(sbi->s_mount_opt, NOLOAD);
1280                         break;
1281                 case Opt_commit:
1282                         if (match_int(&args[0], &option))
1283                                 return 0;
1284                         if (option < 0)
1285                                 return 0;
1286                         if (option == 0)
1287                                 option = JBD2_DEFAULT_MAX_COMMIT_AGE;
1288                         sbi->s_commit_interval = HZ * option;
1289                         break;
1290                 case Opt_max_batch_time:
1291                         if (match_int(&args[0], &option))
1292                                 return 0;
1293                         if (option < 0)
1294                                 return 0;
1295                         if (option == 0)
1296                                 option = EXT4_DEF_MAX_BATCH_TIME;
1297                         sbi->s_max_batch_time = option;
1298                         break;
1299                 case Opt_min_batch_time:
1300                         if (match_int(&args[0], &option))
1301                                 return 0;
1302                         if (option < 0)
1303                                 return 0;
1304                         sbi->s_min_batch_time = option;
1305                         break;
1306                 case Opt_data_journal:
1307                         data_opt = EXT4_MOUNT_JOURNAL_DATA;
1308                         goto datacheck;
1309                 case Opt_data_ordered:
1310                         data_opt = EXT4_MOUNT_ORDERED_DATA;
1311                         goto datacheck;
1312                 case Opt_data_writeback:
1313                         data_opt = EXT4_MOUNT_WRITEBACK_DATA;
1314                 datacheck:
1315                         if (is_remount) {
1316                                 if ((sbi->s_mount_opt & EXT4_MOUNT_DATA_FLAGS)
1317                                                 != data_opt) {
1318                                         printk(KERN_ERR
1319                                                 "EXT4-fs: cannot change data "
1320                                                 "mode on remount\n");
1321                                         return 0;
1322                                 }
1323                         } else {
1324                                 sbi->s_mount_opt &= ~EXT4_MOUNT_DATA_FLAGS;
1325                                 sbi->s_mount_opt |= data_opt;
1326                         }
1327                         break;
1328                 case Opt_data_err_abort:
1329                         set_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
1330                         break;
1331                 case Opt_data_err_ignore:
1332                         clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
1333                         break;
1334 #ifdef CONFIG_QUOTA
1335                 case Opt_usrjquota:
1336                         qtype = USRQUOTA;
1337                         goto set_qf_name;
1338                 case Opt_grpjquota:
1339                         qtype = GRPQUOTA;
1340 set_qf_name:
1341                         if (sb_any_quota_loaded(sb) &&
1342                             !sbi->s_qf_names[qtype]) {
1343                                 printk(KERN_ERR
1344                                        "EXT4-fs: Cannot change journaled "
1345                                        "quota options when quota turned on.\n");
1346                                 return 0;
1347                         }
1348                         qname = match_strdup(&args[0]);
1349                         if (!qname) {
1350                                 printk(KERN_ERR
1351                                         "EXT4-fs: not enough memory for "
1352                                         "storing quotafile name.\n");
1353                                 return 0;
1354                         }
1355                         if (sbi->s_qf_names[qtype] &&
1356                             strcmp(sbi->s_qf_names[qtype], qname)) {
1357                                 printk(KERN_ERR
1358                                         "EXT4-fs: %s quota file already "
1359                                         "specified.\n", QTYPE2NAME(qtype));
1360                                 kfree(qname);
1361                                 return 0;
1362                         }
1363                         sbi->s_qf_names[qtype] = qname;
1364                         if (strchr(sbi->s_qf_names[qtype], '/')) {
1365                                 printk(KERN_ERR
1366                                         "EXT4-fs: quotafile must be on "
1367                                         "filesystem root.\n");
1368                                 kfree(sbi->s_qf_names[qtype]);
1369                                 sbi->s_qf_names[qtype] = NULL;
1370                                 return 0;
1371                         }
1372                         set_opt(sbi->s_mount_opt, QUOTA);
1373                         break;
1374                 case Opt_offusrjquota:
1375                         qtype = USRQUOTA;
1376                         goto clear_qf_name;
1377                 case Opt_offgrpjquota:
1378                         qtype = GRPQUOTA;
1379 clear_qf_name:
1380                         if (sb_any_quota_loaded(sb) &&
1381                             sbi->s_qf_names[qtype]) {
1382                                 printk(KERN_ERR "EXT4-fs: Cannot change "
1383                                         "journaled quota options when "
1384                                         "quota turned on.\n");
1385                                 return 0;
1386                         }
1387                         /*
1388                          * The space will be released later when all options
1389                          * are confirmed to be correct
1390                          */
1391                         sbi->s_qf_names[qtype] = NULL;
1392                         break;
1393                 case Opt_jqfmt_vfsold:
1394                         qfmt = QFMT_VFS_OLD;
1395                         goto set_qf_format;
1396                 case Opt_jqfmt_vfsv0:
1397                         qfmt = QFMT_VFS_V0;
1398 set_qf_format:
1399                         if (sb_any_quota_loaded(sb) &&
1400                             sbi->s_jquota_fmt != qfmt) {
1401                                 printk(KERN_ERR "EXT4-fs: Cannot change "
1402                                         "journaled quota options when "
1403                                         "quota turned on.\n");
1404                                 return 0;
1405                         }
1406                         sbi->s_jquota_fmt = qfmt;
1407                         break;
1408                 case Opt_quota:
1409                 case Opt_usrquota:
1410                         set_opt(sbi->s_mount_opt, QUOTA);
1411                         set_opt(sbi->s_mount_opt, USRQUOTA);
1412                         break;
1413                 case Opt_grpquota:
1414                         set_opt(sbi->s_mount_opt, QUOTA);
1415                         set_opt(sbi->s_mount_opt, GRPQUOTA);
1416                         break;
1417                 case Opt_noquota:
1418                         if (sb_any_quota_loaded(sb)) {
1419                                 printk(KERN_ERR "EXT4-fs: Cannot change quota "
1420                                         "options when quota turned on.\n");
1421                                 return 0;
1422                         }
1423                         clear_opt(sbi->s_mount_opt, QUOTA);
1424                         clear_opt(sbi->s_mount_opt, USRQUOTA);
1425                         clear_opt(sbi->s_mount_opt, GRPQUOTA);
1426                         break;
1427 #else
1428                 case Opt_quota:
1429                 case Opt_usrquota:
1430                 case Opt_grpquota:
1431                         printk(KERN_ERR
1432                                 "EXT4-fs: quota options not supported.\n");
1433                         break;
1434                 case Opt_usrjquota:
1435                 case Opt_grpjquota:
1436                 case Opt_offusrjquota:
1437                 case Opt_offgrpjquota:
1438                 case Opt_jqfmt_vfsold:
1439                 case Opt_jqfmt_vfsv0:
1440                         printk(KERN_ERR
1441                                 "EXT4-fs: journaled quota options not "
1442                                 "supported.\n");
1443                         break;
1444                 case Opt_noquota:
1445                         break;
1446 #endif
1447                 case Opt_abort:
1448                         set_opt(sbi->s_mount_opt, ABORT);
1449                         break;
1450                 case Opt_nobarrier:
1451                         clear_opt(sbi->s_mount_opt, BARRIER);
1452                         break;
1453                 case Opt_barrier:
1454                         if (match_int(&args[0], &option)) {
1455                                 set_opt(sbi->s_mount_opt, BARRIER);
1456                                 break;
1457                         }
1458                         if (option)
1459                                 set_opt(sbi->s_mount_opt, BARRIER);
1460                         else
1461                                 clear_opt(sbi->s_mount_opt, BARRIER);
1462                         break;
1463                 case Opt_ignore:
1464                         break;
1465                 case Opt_resize:
1466                         if (!is_remount) {
1467                                 printk("EXT4-fs: resize option only available "
1468                                         "for remount\n");
1469                                 return 0;
1470                         }
1471                         if (match_int(&args[0], &option) != 0)
1472                                 return 0;
1473                         *n_blocks_count = option;
1474                         break;
1475                 case Opt_nobh:
1476                         set_opt(sbi->s_mount_opt, NOBH);
1477                         break;
1478                 case Opt_bh:
1479                         clear_opt(sbi->s_mount_opt, NOBH);
1480                         break;
1481                 case Opt_i_version:
1482                         set_opt(sbi->s_mount_opt, I_VERSION);
1483                         sb->s_flags |= MS_I_VERSION;
1484                         break;
1485                 case Opt_nodelalloc:
1486                         clear_opt(sbi->s_mount_opt, DELALLOC);
1487                         break;
1488                 case Opt_stripe:
1489                         if (match_int(&args[0], &option))
1490                                 return 0;
1491                         if (option < 0)
1492                                 return 0;
1493                         sbi->s_stripe = option;
1494                         break;
1495                 case Opt_delalloc:
1496                         set_opt(sbi->s_mount_opt, DELALLOC);
1497                         break;
1498                 case Opt_inode_readahead_blks:
1499                         if (match_int(&args[0], &option))
1500                                 return 0;
1501                         if (option < 0 || option > (1 << 30))
1502                                 return 0;
1503                         if (!is_power_of_2(option)) {
1504                                 printk(KERN_ERR "EXT4-fs: inode_readahead_blks"
1505                                        " must be a power of 2\n");
1506                                 return 0;
1507                         }
1508                         sbi->s_inode_readahead_blks = option;
1509                         break;
1510                 case Opt_journal_ioprio:
1511                         if (match_int(&args[0], &option))
1512                                 return 0;
1513                         if (option < 0 || option > 7)
1514                                 break;
1515                         *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE,
1516                                                             option);
1517                         break;
1518                 case Opt_noauto_da_alloc:
1519                         set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
1520                         break;
1521                 case Opt_auto_da_alloc:
1522                         if (match_int(&args[0], &option)) {
1523                                 clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC);
1524                                 break;
1525                         }
1526                         if (option)
1527                                 clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC);
1528                         else
1529                                 set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
1530                         break;
1531                 default:
1532                         printk(KERN_ERR
1533                                "EXT4-fs: Unrecognized mount option \"%s\" "
1534                                "or missing value\n", p);
1535                         return 0;
1536                 }
1537         }
1538 #ifdef CONFIG_QUOTA
1539         if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
1540                 if ((sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA) &&
1541                      sbi->s_qf_names[USRQUOTA])
1542                         clear_opt(sbi->s_mount_opt, USRQUOTA);
1543
1544                 if ((sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA) &&
1545                      sbi->s_qf_names[GRPQUOTA])
1546                         clear_opt(sbi->s_mount_opt, GRPQUOTA);
1547
1548                 if ((sbi->s_qf_names[USRQUOTA] &&
1549                                 (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)) ||
1550                     (sbi->s_qf_names[GRPQUOTA] &&
1551                                 (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA))) {
1552                         printk(KERN_ERR "EXT4-fs: old and new quota "
1553                                         "format mixing.\n");
1554                         return 0;
1555                 }
1556
1557                 if (!sbi->s_jquota_fmt) {
1558                         printk(KERN_ERR "EXT4-fs: journaled quota format "
1559                                         "not specified.\n");
1560                         return 0;
1561                 }
1562         } else {
1563                 if (sbi->s_jquota_fmt) {
1564                         printk(KERN_ERR "EXT4-fs: journaled quota format "
1565                                         "specified with no journaling "
1566                                         "enabled.\n");
1567                         return 0;
1568                 }
1569         }
1570 #endif
1571         return 1;
1572 }
1573
1574 static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1575                             int read_only)
1576 {
1577         struct ext4_sb_info *sbi = EXT4_SB(sb);
1578         int res = 0;
1579
1580         if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
1581                 printk(KERN_ERR "EXT4-fs warning: revision level too high, "
1582                        "forcing read-only mode\n");
1583                 res = MS_RDONLY;
1584         }
1585         if (read_only)
1586                 return res;
1587         if (!(sbi->s_mount_state & EXT4_VALID_FS))
1588                 printk(KERN_WARNING "EXT4-fs warning: mounting unchecked fs, "
1589                        "running e2fsck is recommended\n");
1590         else if ((sbi->s_mount_state & EXT4_ERROR_FS))
1591                 printk(KERN_WARNING
1592                        "EXT4-fs warning: mounting fs with errors, "
1593                        "running e2fsck is recommended\n");
1594         else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
1595                  le16_to_cpu(es->s_mnt_count) >=
1596                  (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
1597                 printk(KERN_WARNING
1598                        "EXT4-fs warning: maximal mount count reached, "
1599                        "running e2fsck is recommended\n");
1600         else if (le32_to_cpu(es->s_checkinterval) &&
1601                 (le32_to_cpu(es->s_lastcheck) +
1602                         le32_to_cpu(es->s_checkinterval) <= get_seconds()))
1603                 printk(KERN_WARNING
1604                        "EXT4-fs warning: checktime reached, "
1605                        "running e2fsck is recommended\n");
1606         if (!sbi->s_journal)
1607                 es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
1608         if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
1609                 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
1610         le16_add_cpu(&es->s_mnt_count, 1);
1611         es->s_mtime = cpu_to_le32(get_seconds());
1612         ext4_update_dynamic_rev(sb);
1613         if (sbi->s_journal)
1614                 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
1615
1616         ext4_commit_super(sb, 1);
1617         if (test_opt(sb, DEBUG))
1618                 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
1619                                 "bpg=%lu, ipg=%lu, mo=%04lx]\n",
1620                         sb->s_blocksize,
1621                         sbi->s_groups_count,
1622                         EXT4_BLOCKS_PER_GROUP(sb),
1623                         EXT4_INODES_PER_GROUP(sb),
1624                         sbi->s_mount_opt);
1625
1626         if (EXT4_SB(sb)->s_journal) {
1627                 printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n",
1628                        sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" :
1629                        "external", EXT4_SB(sb)->s_journal->j_devname);
1630         } else {
1631                 printk(KERN_INFO "EXT4 FS on %s, no journal\n", sb->s_id);
1632         }
1633         return res;
1634 }
1635
1636 static int ext4_fill_flex_info(struct super_block *sb)
1637 {
1638         struct ext4_sb_info *sbi = EXT4_SB(sb);
1639         struct ext4_group_desc *gdp = NULL;
1640         struct buffer_head *bh;
1641         ext4_group_t flex_group_count;
1642         ext4_group_t flex_group;
1643         int groups_per_flex = 0;
1644         size_t size;
1645         int i;
1646
1647         if (!sbi->s_es->s_log_groups_per_flex) {
1648                 sbi->s_log_groups_per_flex = 0;
1649                 return 1;
1650         }
1651
1652         sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
1653         groups_per_flex = 1 << sbi->s_log_groups_per_flex;
1654
1655         /* We allocate both existing and potentially added groups */
1656         flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
1657                         ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
1658                               EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex;
1659         size = flex_group_count * sizeof(struct flex_groups);
1660         sbi->s_flex_groups = kzalloc(size, GFP_KERNEL);
1661         if (sbi->s_flex_groups == NULL) {
1662                 sbi->s_flex_groups = vmalloc(size);
1663                 if (sbi->s_flex_groups)
1664                         memset(sbi->s_flex_groups, 0, size);
1665         }
1666         if (sbi->s_flex_groups == NULL) {
1667                 printk(KERN_ERR "EXT4-fs: not enough memory for "
1668                                 "%u flex groups\n", flex_group_count);
1669                 goto failed;
1670         }
1671
1672         for (i = 0; i < sbi->s_groups_count; i++) {
1673                 gdp = ext4_get_group_desc(sb, i, &bh);
1674
1675                 flex_group = ext4_flex_group(sbi, i);
1676                 atomic_set(&sbi->s_flex_groups[flex_group].free_inodes,
1677                            ext4_free_inodes_count(sb, gdp));
1678                 atomic_set(&sbi->s_flex_groups[flex_group].free_blocks,
1679                            ext4_free_blks_count(sb, gdp));
1680                 atomic_set(&sbi->s_flex_groups[flex_group].used_dirs,
1681                            ext4_used_dirs_count(sb, gdp));
1682         }
1683
1684         return 1;
1685 failed:
1686         return 0;
1687 }
1688
1689 __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
1690                             struct ext4_group_desc *gdp)
1691 {
1692         __u16 crc = 0;
1693
1694         if (sbi->s_es->s_feature_ro_compat &
1695             cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
1696                 int offset = offsetof(struct ext4_group_desc, bg_checksum);
1697                 __le32 le_group = cpu_to_le32(block_group);
1698
1699                 crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
1700                 crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
1701                 crc = crc16(crc, (__u8 *)gdp, offset);
1702                 offset += sizeof(gdp->bg_checksum); /* skip checksum */
1703                 /* for checksum of struct ext4_group_desc do the rest...*/
1704                 if ((sbi->s_es->s_feature_incompat &
1705                      cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) &&
1706                     offset < le16_to_cpu(sbi->s_es->s_desc_size))
1707                         crc = crc16(crc, (__u8 *)gdp + offset,
1708                                     le16_to_cpu(sbi->s_es->s_desc_size) -
1709                                         offset);
1710         }
1711
1712         return cpu_to_le16(crc);
1713 }
1714
1715 int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group,
1716                                 struct ext4_group_desc *gdp)
1717 {
1718         if ((sbi->s_es->s_feature_ro_compat &
1719              cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) &&
1720             (gdp->bg_checksum != ext4_group_desc_csum(sbi, block_group, gdp)))
1721                 return 0;
1722
1723         return 1;
1724 }
1725
1726 /* Called at mount-time, super-block is locked */
1727 static int ext4_check_descriptors(struct super_block *sb)
1728 {
1729         struct ext4_sb_info *sbi = EXT4_SB(sb);
1730         ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
1731         ext4_fsblk_t last_block;
1732         ext4_fsblk_t block_bitmap;
1733         ext4_fsblk_t inode_bitmap;
1734         ext4_fsblk_t inode_table;
1735         int flexbg_flag = 0;
1736         ext4_group_t i;
1737
1738         if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
1739                 flexbg_flag = 1;
1740
1741         ext4_debug("Checking group descriptors");
1742
1743         for (i = 0; i < sbi->s_groups_count; i++) {
1744                 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
1745
1746                 if (i == sbi->s_groups_count - 1 || flexbg_flag)
1747                         last_block = ext4_blocks_count(sbi->s_es) - 1;
1748                 else
1749                         last_block = first_block +
1750                                 (EXT4_BLOCKS_PER_GROUP(sb) - 1);
1751
1752                 block_bitmap = ext4_block_bitmap(sb, gdp);
1753                 if (block_bitmap < first_block || block_bitmap > last_block) {
1754                         printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1755                                "Block bitmap for group %u not in group "
1756                                "(block %llu)!\n", i, block_bitmap);
1757                         return 0;
1758                 }
1759                 inode_bitmap = ext4_inode_bitmap(sb, gdp);
1760                 if (inode_bitmap < first_block || inode_bitmap > last_block) {
1761                         printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1762                                "Inode bitmap for group %u not in group "
1763                                "(block %llu)!\n", i, inode_bitmap);
1764                         return 0;
1765                 }
1766                 inode_table = ext4_inode_table(sb, gdp);
1767                 if (inode_table < first_block ||
1768                     inode_table + sbi->s_itb_per_group - 1 > last_block) {
1769                         printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1770                                "Inode table for group %u not in group "
1771                                "(block %llu)!\n", i, inode_table);
1772                         return 0;
1773                 }
1774                 spin_lock(sb_bgl_lock(sbi, i));
1775                 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
1776                         printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1777                                "Checksum for group %u failed (%u!=%u)\n",
1778                                i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
1779                                gdp)), le16_to_cpu(gdp->bg_checksum));
1780                         if (!(sb->s_flags & MS_RDONLY)) {
1781                                 spin_unlock(sb_bgl_lock(sbi, i));
1782                                 return 0;
1783                         }
1784                 }
1785                 spin_unlock(sb_bgl_lock(sbi, i));
1786                 if (!flexbg_flag)
1787                         first_block += EXT4_BLOCKS_PER_GROUP(sb);
1788         }
1789
1790         ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb));
1791         sbi->s_es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb));
1792         return 1;
1793 }
1794
1795 /* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at
1796  * the superblock) which were deleted from all directories, but held open by
1797  * a process at the time of a crash.  We walk the list and try to delete these
1798  * inodes at recovery time (only with a read-write filesystem).
1799  *
1800  * In order to keep the orphan inode chain consistent during traversal (in
1801  * case of crash during recovery), we link each inode into the superblock
1802  * orphan list_head and handle it the same way as an inode deletion during
1803  * normal operation (which journals the operations for us).
1804  *
1805  * We only do an iget() and an iput() on each inode, which is very safe if we
1806  * accidentally point at an in-use or already deleted inode.  The worst that
1807  * can happen in this case is that we get a "bit already cleared" message from
1808  * ext4_free_inode().  The only reason we would point at a wrong inode is if
1809  * e2fsck was run on this filesystem, and it must have already done the orphan
1810  * inode cleanup for us, so we can safely abort without any further action.
1811  */
1812 static void ext4_orphan_cleanup(struct super_block *sb,
1813                                 struct ext4_super_block *es)
1814 {
1815         unsigned int s_flags = sb->s_flags;
1816         int nr_orphans = 0, nr_truncates = 0;
1817 #ifdef CONFIG_QUOTA
1818         int i;
1819 #endif
1820         if (!es->s_last_orphan) {
1821                 jbd_debug(4, "no orphan inodes to clean up\n");
1822                 return;
1823         }
1824
1825         if (bdev_read_only(sb->s_bdev)) {
1826                 printk(KERN_ERR "EXT4-fs: write access "
1827                         "unavailable, skipping orphan cleanup.\n");
1828                 return;
1829         }
1830
1831         if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
1832                 if (es->s_last_orphan)
1833                         jbd_debug(1, "Errors on filesystem, "
1834                                   "clearing orphan list.\n");
1835                 es->s_last_orphan = 0;
1836                 jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
1837                 return;
1838         }
1839
1840         if (s_flags & MS_RDONLY) {
1841                 printk(KERN_INFO "EXT4-fs: %s: orphan cleanup on readonly fs\n",
1842                        sb->s_id);
1843                 sb->s_flags &= ~MS_RDONLY;
1844         }
1845 #ifdef CONFIG_QUOTA
1846         /* Needed for iput() to work correctly and not trash data */
1847         sb->s_flags |= MS_ACTIVE;
1848         /* Turn on quotas so that they are updated correctly */
1849         for (i = 0; i < MAXQUOTAS; i++) {
1850                 if (EXT4_SB(sb)->s_qf_names[i]) {
1851                         int ret = ext4_quota_on_mount(sb, i);
1852                         if (ret < 0)
1853                                 printk(KERN_ERR
1854                                         "EXT4-fs: Cannot turn on journaled "
1855                                         "quota: error %d\n", ret);
1856                 }
1857         }
1858 #endif
1859
1860         while (es->s_last_orphan) {
1861                 struct inode *inode;
1862
1863                 inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
1864                 if (IS_ERR(inode)) {
1865                         es->s_last_orphan = 0;
1866                         break;
1867                 }
1868
1869                 list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
1870                 vfs_dq_init(inode);
1871                 if (inode->i_nlink) {
1872                         printk(KERN_DEBUG
1873                                 "%s: truncating inode %lu to %lld bytes\n",
1874                                 __func__, inode->i_ino, inode->i_size);
1875                         jbd_debug(2, "truncating inode %lu to %lld bytes\n",
1876                                   inode->i_ino, inode->i_size);
1877                         ext4_truncate(inode);
1878                         nr_truncates++;
1879                 } else {
1880                         printk(KERN_DEBUG
1881                                 "%s: deleting unreferenced inode %lu\n",
1882                                 __func__, inode->i_ino);
1883                         jbd_debug(2, "deleting unreferenced inode %lu\n",
1884                                   inode->i_ino);
1885                         nr_orphans++;
1886                 }
1887                 iput(inode);  /* The delete magic happens here! */
1888         }
1889
1890 #define PLURAL(x) (x), ((x) == 1) ? "" : "s"
1891
1892         if (nr_orphans)
1893                 printk(KERN_INFO "EXT4-fs: %s: %d orphan inode%s deleted\n",
1894                        sb->s_id, PLURAL(nr_orphans));
1895         if (nr_truncates)
1896                 printk(KERN_INFO "EXT4-fs: %s: %d truncate%s cleaned up\n",
1897                        sb->s_id, PLURAL(nr_truncates));
1898 #ifdef CONFIG_QUOTA
1899         /* Turn quotas off */
1900         for (i = 0; i < MAXQUOTAS; i++) {
1901                 if (sb_dqopt(sb)->files[i])
1902                         vfs_quota_off(sb, i, 0);
1903         }
1904 #endif
1905         sb->s_flags = s_flags; /* Restore MS_RDONLY status */
1906 }
1907 /*
1908  * Maximal extent format file size.
1909  * Resulting logical blkno at s_maxbytes must fit in our on-disk
1910  * extent format containers, within a sector_t, and within i_blocks
1911  * in the vfs.  ext4 inode has 48 bits of i_block in fsblock units,
1912  * so that won't be a limiting factor.
1913  *
1914  * Note, this does *not* consider any metadata overhead for vfs i_blocks.
1915  */
1916 static loff_t ext4_max_size(int blkbits, int has_huge_files)
1917 {
1918         loff_t res;
1919         loff_t upper_limit = MAX_LFS_FILESIZE;
1920
1921         /* small i_blocks in vfs inode? */
1922         if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
1923                 /*
1924                  * CONFIG_LBD is not enabled implies the inode
1925                  * i_block represent total blocks in 512 bytes
1926                  * 32 == size of vfs inode i_blocks * 8
1927                  */
1928                 upper_limit = (1LL << 32) - 1;
1929
1930                 /* total blocks in file system block size */
1931                 upper_limit >>= (blkbits - 9);
1932                 upper_limit <<= blkbits;
1933         }
1934
1935         /* 32-bit extent-start container, ee_block */
1936         res = 1LL << 32;
1937         res <<= blkbits;
1938         res -= 1;
1939
1940         /* Sanity check against vm- & vfs- imposed limits */
1941         if (res > upper_limit)
1942                 res = upper_limit;
1943
1944         return res;
1945 }
1946
1947 /*
1948  * Maximal bitmap file size.  There is a direct, and {,double-,triple-}indirect
1949  * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks.
1950  * We need to be 1 filesystem block less than the 2^48 sector limit.
1951  */
1952 static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
1953 {
1954         loff_t res = EXT4_NDIR_BLOCKS;
1955         int meta_blocks;
1956         loff_t upper_limit;
1957         /* This is calculated to be the largest file size for a
1958          * dense, bitmapped file such that the total number of
1959          * sectors in the file, including data and all indirect blocks,
1960          * does not exceed 2^48 -1
1961          * __u32 i_blocks_lo and _u16 i_blocks_high representing the
1962          * total number of  512 bytes blocks of the file
1963          */
1964
1965         if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
1966                 /*
1967                  * !has_huge_files or CONFIG_LBD is not enabled
1968                  * implies the inode i_block represent total blocks in
1969                  * 512 bytes 32 == size of vfs inode i_blocks * 8
1970                  */
1971                 upper_limit = (1LL << 32) - 1;
1972
1973                 /* total blocks in file system block size */
1974                 upper_limit >>= (bits - 9);
1975
1976         } else {
1977                 /*
1978                  * We use 48 bit ext4_inode i_blocks
1979                  * With EXT4_HUGE_FILE_FL set the i_blocks
1980                  * represent total number of blocks in
1981                  * file system block size
1982                  */
1983                 upper_limit = (1LL << 48) - 1;
1984
1985         }
1986
1987         /* indirect blocks */
1988         meta_blocks = 1;
1989         /* double indirect blocks */
1990         meta_blocks += 1 + (1LL << (bits-2));
1991         /* tripple indirect blocks */
1992         meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
1993
1994         upper_limit -= meta_blocks;
1995         upper_limit <<= bits;
1996
1997         res += 1LL << (bits-2);
1998         res += 1LL << (2*(bits-2));
1999         res += 1LL << (3*(bits-2));
2000         res <<= bits;
2001         if (res > upper_limit)
2002                 res = upper_limit;
2003
2004         if (res > MAX_LFS_FILESIZE)
2005                 res = MAX_LFS_FILESIZE;
2006
2007         return res;
2008 }
2009
2010 static ext4_fsblk_t descriptor_loc(struct super_block *sb,
2011                                 ext4_fsblk_t logical_sb_block, int nr)
2012 {
2013         struct ext4_sb_info *sbi = EXT4_SB(sb);
2014         ext4_group_t bg, first_meta_bg;
2015         int has_super = 0;
2016
2017         first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
2018
2019         if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) ||
2020             nr < first_meta_bg)
2021                 return logical_sb_block + nr + 1;
2022         bg = sbi->s_desc_per_block * nr;
2023         if (ext4_bg_has_super(sb, bg))
2024                 has_super = 1;
2025         return (has_super + ext4_group_first_block_no(sb, bg));
2026 }
2027
2028 /**
2029  * ext4_get_stripe_size: Get the stripe size.
2030  * @sbi: In memory super block info
2031  *
2032  * If we have specified it via mount option, then
2033  * use the mount option value. If the value specified at mount time is
2034  * greater than the blocks per group use the super block value.
2035  * If the super block value is greater than blocks per group return 0.
2036  * Allocator needs it be less than blocks per group.
2037  *
2038  */
2039 static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
2040 {
2041         unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride);
2042         unsigned long stripe_width =
2043                         le32_to_cpu(sbi->s_es->s_raid_stripe_width);
2044
2045         if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group)
2046                 return sbi->s_stripe;
2047
2048         if (stripe_width <= sbi->s_blocks_per_group)
2049                 return stripe_width;
2050
2051         if (stride <= sbi->s_blocks_per_group)
2052                 return stride;
2053
2054         return 0;
2055 }
2056
2057 /* sysfs supprt */
2058
2059 struct ext4_attr {
2060         struct attribute attr;
2061         ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *);
2062         ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *,
2063                          const char *, size_t);
2064         int offset;
2065 };
2066
2067 static int parse_strtoul(const char *buf,
2068                 unsigned long max, unsigned long *value)
2069 {
2070         char *endp;
2071
2072         while (*buf && isspace(*buf))
2073                 buf++;
2074         *value = simple_strtoul(buf, &endp, 0);
2075         while (*endp && isspace(*endp))
2076                 endp++;
2077         if (*endp || *value > max)
2078                 return -EINVAL;
2079
2080         return 0;
2081 }
2082
2083 static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a,
2084                                               struct ext4_sb_info *sbi,
2085                                               char *buf)
2086 {
2087         return snprintf(buf, PAGE_SIZE, "%llu\n",
2088                         (s64) percpu_counter_sum(&sbi->s_dirtyblocks_counter));
2089 }
2090
2091 static ssize_t session_write_kbytes_show(struct ext4_attr *a,
2092                                          struct ext4_sb_info *sbi, char *buf)
2093 {
2094         struct super_block *sb = sbi->s_buddy_cache->i_sb;
2095
2096         return snprintf(buf, PAGE_SIZE, "%lu\n",
2097                         (part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
2098                          sbi->s_sectors_written_start) >> 1);
2099 }
2100
2101 static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
2102                                           struct ext4_sb_info *sbi, char *buf)
2103 {
2104         struct super_block *sb = sbi->s_buddy_cache->i_sb;
2105
2106         return snprintf(buf, PAGE_SIZE, "%llu\n",
2107                         sbi->s_kbytes_written +
2108                         ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
2109                           EXT4_SB(sb)->s_sectors_written_start) >> 1));
2110 }
2111
2112 static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
2113                                           struct ext4_sb_info *sbi,
2114                                           const char *buf, size_t count)
2115 {
2116         unsigned long t;
2117
2118         if (parse_strtoul(buf, 0x40000000, &t))
2119                 return -EINVAL;
2120
2121         if (!is_power_of_2(t))
2122                 return -EINVAL;
2123
2124         sbi->s_inode_readahead_blks = t;
2125         return count;
2126 }
2127
2128 static ssize_t sbi_ui_show(struct ext4_attr *a,
2129                                 struct ext4_sb_info *sbi, char *buf)
2130 {
2131         unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset);
2132
2133         return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
2134 }
2135
2136 static ssize_t sbi_ui_store(struct ext4_attr *a,
2137                             struct ext4_sb_info *sbi,
2138                             const char *buf, size_t count)
2139 {
2140         unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset);
2141         unsigned long t;
2142
2143         if (parse_strtoul(buf, 0xffffffff, &t))
2144                 return -EINVAL;
2145         *ui = t;
2146         return count;
2147 }
2148
2149 #define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \
2150 static struct ext4_attr ext4_attr_##_name = {                   \
2151         .attr = {.name = __stringify(_name), .mode = _mode },   \
2152         .show   = _show,                                        \
2153         .store  = _store,                                       \
2154         .offset = offsetof(struct ext4_sb_info, _elname),       \
2155 }
2156 #define EXT4_ATTR(name, mode, show, store) \
2157 static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store)
2158
2159 #define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL)
2160 #define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store)
2161 #define EXT4_RW_ATTR_SBI_UI(name, elname)       \
2162         EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname)
2163 #define ATTR_LIST(name) &ext4_attr_##name.attr
2164
2165 EXT4_RO_ATTR(delayed_allocation_blocks);
2166 EXT4_RO_ATTR(session_write_kbytes);
2167 EXT4_RO_ATTR(lifetime_write_kbytes);
2168 EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
2169                  inode_readahead_blks_store, s_inode_readahead_blks);
2170 EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
2171 EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
2172 EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
2173 EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
2174 EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
2175 EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
2176
2177 static struct attribute *ext4_attrs[] = {
2178         ATTR_LIST(delayed_allocation_blocks),
2179         ATTR_LIST(session_write_kbytes),
2180         ATTR_LIST(lifetime_write_kbytes),
2181         ATTR_LIST(inode_readahead_blks),
2182         ATTR_LIST(mb_stats),
2183         ATTR_LIST(mb_max_to_scan),
2184         ATTR_LIST(mb_min_to_scan),
2185         ATTR_LIST(mb_order2_req),
2186         ATTR_LIST(mb_stream_req),
2187         ATTR_LIST(mb_group_prealloc),
2188         NULL,
2189 };
2190
2191 static ssize_t ext4_attr_show(struct kobject *kobj,
2192                               struct attribute *attr, char *buf)
2193 {
2194         struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
2195                                                 s_kobj);
2196         struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
2197
2198         return a->show ? a->show(a, sbi, buf) : 0;
2199 }
2200
2201 static ssize_t ext4_attr_store(struct kobject *kobj,
2202                                struct attribute *attr,
2203                                const char *buf, size_t len)
2204 {
2205         struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
2206                                                 s_kobj);
2207         struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
2208
2209         return a->store ? a->store(a, sbi, buf, len) : 0;
2210 }
2211
2212 static void ext4_sb_release(struct kobject *kobj)
2213 {
2214         struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
2215                                                 s_kobj);
2216         complete(&sbi->s_kobj_unregister);
2217 }
2218
2219
2220 static struct sysfs_ops ext4_attr_ops = {
2221         .show   = ext4_attr_show,
2222         .store  = ext4_attr_store,
2223 };
2224
2225 static struct kobj_type ext4_ktype = {
2226         .default_attrs  = ext4_attrs,
2227         .sysfs_ops      = &ext4_attr_ops,
2228         .release        = ext4_sb_release,
2229 };
2230
2231 static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2232                                 __releases(kernel_lock)
2233                                 __acquires(kernel_lock)
2234
2235 {
2236         struct buffer_head *bh;
2237         struct ext4_super_block *es = NULL;
2238         struct ext4_sb_info *sbi;
2239         ext4_fsblk_t block;
2240         ext4_fsblk_t sb_block = get_sb_block(&data);
2241         ext4_fsblk_t logical_sb_block;
2242         unsigned long offset = 0;
2243         unsigned long journal_devnum = 0;
2244         unsigned long def_mount_opts;
2245         struct inode *root;
2246         char *cp;
2247         const char *descr;
2248         int ret = -EINVAL;
2249         int blocksize;
2250         unsigned int db_count;
2251         unsigned int i;
2252         int needs_recovery, has_huge_files;
2253         int features;
2254         __u64 blocks_count;
2255         int err;
2256         unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
2257
2258         sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
2259         if (!sbi)
2260                 return -ENOMEM;
2261
2262         sbi->s_blockgroup_lock =
2263                 kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
2264         if (!sbi->s_blockgroup_lock) {
2265                 kfree(sbi);
2266                 return -ENOMEM;
2267         }
2268         sb->s_fs_info = sbi;
2269         sbi->s_mount_opt = 0;
2270         sbi->s_resuid = EXT4_DEF_RESUID;
2271         sbi->s_resgid = EXT4_DEF_RESGID;
2272         sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
2273         sbi->s_sb_block = sb_block;
2274         sbi->s_sectors_written_start = part_stat_read(sb->s_bdev->bd_part,
2275                                                       sectors[1]);
2276
2277         unlock_kernel();
2278
2279         /* Cleanup superblock name */
2280         for (cp = sb->s_id; (cp = strchr(cp, '/'));)
2281                 *cp = '!';
2282
2283         blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
2284         if (!blocksize) {
2285                 printk(KERN_ERR "EXT4-fs: unable to set blocksize\n");
2286                 goto out_fail;
2287         }
2288
2289         /*
2290          * The ext4 superblock will not be buffer aligned for other than 1kB
2291          * block sizes.  We need to calculate the offset from buffer start.
2292          */
2293         if (blocksize != EXT4_MIN_BLOCK_SIZE) {
2294                 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
2295                 offset = do_div(logical_sb_block, blocksize);
2296         } else {
2297                 logical_sb_block = sb_block;
2298         }
2299
2300         if (!(bh = sb_bread(sb, logical_sb_block))) {
2301                 printk(KERN_ERR "EXT4-fs: unable to read superblock\n");
2302                 goto out_fail;
2303         }
2304         /*
2305          * Note: s_es must be initialized as soon as possible because
2306          *       some ext4 macro-instructions depend on its value
2307          */
2308         es = (struct ext4_super_block *) (((char *)bh->b_data) + offset);
2309         sbi->s_es = es;
2310         sb->s_magic = le16_to_cpu(es->s_magic);
2311         if (sb->s_magic != EXT4_SUPER_MAGIC)
2312                 goto cantfind_ext4;
2313         sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written);
2314
2315         /* Set defaults before we parse the mount options */
2316         def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
2317         if (def_mount_opts & EXT4_DEFM_DEBUG)
2318                 set_opt(sbi->s_mount_opt, DEBUG);
2319         if (def_mount_opts & EXT4_DEFM_BSDGROUPS)
2320                 set_opt(sbi->s_mount_opt, GRPID);
2321         if (def_mount_opts & EXT4_DEFM_UID16)
2322                 set_opt(sbi->s_mount_opt, NO_UID32);
2323 #ifdef CONFIG_EXT4_FS_XATTR
2324         if (def_mount_opts & EXT4_DEFM_XATTR_USER)
2325                 set_opt(sbi->s_mount_opt, XATTR_USER);
2326 #endif
2327 #ifdef CONFIG_EXT4_FS_POSIX_ACL
2328         if (def_mount_opts & EXT4_DEFM_ACL)
2329                 set_opt(sbi->s_mount_opt, POSIX_ACL);
2330 #endif
2331         if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
2332                 sbi->s_mount_opt |= EXT4_MOUNT_JOURNAL_DATA;
2333         else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
2334                 sbi->s_mount_opt |= EXT4_MOUNT_ORDERED_DATA;
2335         else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
2336                 sbi->s_mount_opt |= EXT4_MOUNT_WRITEBACK_DATA;
2337
2338         if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC)
2339                 set_opt(sbi->s_mount_opt, ERRORS_PANIC);
2340         else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE)
2341                 set_opt(sbi->s_mount_opt, ERRORS_CONT);
2342         else
2343                 set_opt(sbi->s_mount_opt, ERRORS_RO);
2344
2345         sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
2346         sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
2347         sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
2348         sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
2349         sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
2350
2351         set_opt(sbi->s_mount_opt, BARRIER);
2352
2353         /*
2354          * enable delayed allocation by default
2355          * Use -o nodelalloc to turn it off
2356          */
2357         set_opt(sbi->s_mount_opt, DELALLOC);
2358
2359
2360         if (!parse_options((char *) data, sb, &journal_devnum,
2361                            &journal_ioprio, NULL, 0))
2362                 goto failed_mount;
2363
2364         sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
2365                 ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
2366
2367         if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
2368             (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) ||
2369              EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
2370              EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U)))
2371                 printk(KERN_WARNING
2372                        "EXT4-fs warning: feature flags set on rev 0 fs, "
2373                        "running e2fsck is recommended\n");
2374
2375         /*
2376          * Check feature flags regardless of the revision level, since we
2377          * previously didn't change the revision level when setting the flags,
2378          * so there is a chance incompat flags are set on a rev 0 filesystem.
2379          */
2380         features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP);
2381         if (features) {
2382                 printk(KERN_ERR "EXT4-fs: %s: couldn't mount because of "
2383                        "unsupported optional features (%x).\n", sb->s_id,
2384                         (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
2385                         ~EXT4_FEATURE_INCOMPAT_SUPP));
2386                 goto failed_mount;
2387         }
2388         features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP);
2389         if (!(sb->s_flags & MS_RDONLY) && features) {
2390                 printk(KERN_ERR "EXT4-fs: %s: couldn't mount RDWR because of "
2391                        "unsupported optional features (%x).\n", sb->s_id,
2392                         (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
2393                         ~EXT4_FEATURE_RO_COMPAT_SUPP));
2394                 goto failed_mount;
2395         }
2396         has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb,
2397                                     EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
2398         if (has_huge_files) {
2399                 /*
2400                  * Large file size enabled file system can only be
2401                  * mount if kernel is build with CONFIG_LBD
2402                  */
2403                 if (sizeof(root->i_blocks) < sizeof(u64) &&
2404                                 !(sb->s_flags & MS_RDONLY)) {
2405                         printk(KERN_ERR "EXT4-fs: %s: Filesystem with huge "
2406                                         "files cannot be mounted read-write "
2407                                         "without CONFIG_LBD.\n", sb->s_id);
2408                         goto failed_mount;
2409                 }
2410         }
2411         blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
2412
2413         if (blocksize < EXT4_MIN_BLOCK_SIZE ||
2414             blocksize > EXT4_MAX_BLOCK_SIZE) {
2415                 printk(KERN_ERR
2416                        "EXT4-fs: Unsupported filesystem blocksize %d on %s.\n",
2417                        blocksize, sb->s_id);
2418                 goto failed_mount;
2419         }
2420
2421         if (sb->s_blocksize != blocksize) {
2422
2423                 /* Validate the filesystem blocksize */
2424                 if (!sb_set_blocksize(sb, blocksize)) {
2425                         printk(KERN_ERR "EXT4-fs: bad block size %d.\n",
2426                                         blocksize);
2427                         goto failed_mount;
2428                 }
2429
2430                 brelse(bh);
2431                 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
2432                 offset = do_div(logical_sb_block, blocksize);
2433                 bh = sb_bread(sb, logical_sb_block);
2434                 if (!bh) {
2435                         printk(KERN_ERR
2436                                "EXT4-fs: Can't read superblock on 2nd try.\n");
2437                         goto failed_mount;
2438                 }
2439                 es = (struct ext4_super_block *)(((char *)bh->b_data) + offset);
2440                 sbi->s_es = es;
2441                 if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
2442                         printk(KERN_ERR
2443                                "EXT4-fs: Magic mismatch, very weird !\n");
2444                         goto failed_mount;
2445                 }
2446         }
2447
2448         sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
2449                                                       has_huge_files);
2450         sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
2451
2452         if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
2453                 sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
2454                 sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
2455         } else {
2456                 sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
2457                 sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
2458                 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
2459                     (!is_power_of_2(sbi->s_inode_size)) ||
2460                     (sbi->s_inode_size > blocksize)) {
2461                         printk(KERN_ERR
2462                                "EXT4-fs: unsupported inode size: %d\n",
2463                                sbi->s_inode_size);
2464                         goto failed_mount;
2465                 }
2466                 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
2467                         sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2);
2468         }
2469         sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
2470         if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) {
2471                 if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
2472                     sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
2473                     !is_power_of_2(sbi->s_desc_size)) {
2474                         printk(KERN_ERR
2475                                "EXT4-fs: unsupported descriptor size %lu\n",
2476                                sbi->s_desc_size);
2477                         goto failed_mount;
2478                 }
2479         } else
2480                 sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
2481         sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
2482         sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
2483         if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0)
2484                 goto cantfind_ext4;
2485         sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb);
2486         if (sbi->s_inodes_per_block == 0)
2487                 goto cantfind_ext4;
2488         sbi->s_itb_per_group = sbi->s_inodes_per_group /
2489                                         sbi->s_inodes_per_block;
2490         sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb);
2491         sbi->s_sbh = bh;
2492         sbi->s_mount_state = le16_to_cpu(es->s_state);
2493         sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
2494         sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
2495         for (i = 0; i < 4; i++)
2496                 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
2497         sbi->s_def_hash_version = es->s_def_hash_version;
2498         i = le32_to_cpu(es->s_flags);
2499         if (i & EXT2_FLAGS_UNSIGNED_HASH)
2500                 sbi->s_hash_unsigned = 3;
2501         else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
2502 #ifdef __CHAR_UNSIGNED__
2503                 es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
2504                 sbi->s_hash_unsigned = 3;
2505 #else
2506                 es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
2507 #endif
2508                 sb->s_dirt = 1;
2509         }
2510
2511         if (sbi->s_blocks_per_group > blocksize * 8) {
2512                 printk(KERN_ERR
2513                        "EXT4-fs: #blocks per group too big: %lu\n",
2514                        sbi->s_blocks_per_group);
2515                 goto failed_mount;
2516         }
2517         if (sbi->s_inodes_per_group > blocksize * 8) {
2518                 printk(KERN_ERR
2519                        "EXT4-fs: #inodes per group too big: %lu\n",
2520                        sbi->s_inodes_per_group);
2521                 goto failed_mount;
2522         }
2523
2524         if (ext4_blocks_count(es) >
2525                     (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
2526                 printk(KERN_ERR "EXT4-fs: filesystem on %s:"
2527                         " too large to mount safely\n", sb->s_id);
2528                 if (sizeof(sector_t) < 8)
2529                         printk(KERN_WARNING "EXT4-fs: CONFIG_LBD not "
2530                                         "enabled\n");
2531                 goto failed_mount;
2532         }
2533
2534         if (EXT4_BLOCKS_PER_GROUP(sb) == 0)
2535                 goto cantfind_ext4;
2536
2537         /* check blocks count against device size */
2538         blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits;
2539         if (blocks_count && ext4_blocks_count(es) > blocks_count) {
2540                 printk(KERN_WARNING "EXT4-fs: bad geometry: block count %llu "
2541                        "exceeds size of device (%llu blocks)\n",
2542                        ext4_blocks_count(es), blocks_count);
2543                 goto failed_mount;
2544         }
2545
2546         /*
2547          * It makes no sense for the first data block to be beyond the end
2548          * of the filesystem.
2549          */
2550         if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
2551                 printk(KERN_WARNING "EXT4-fs: bad geometry: first data"
2552                        "block %u is beyond end of filesystem (%llu)\n",
2553                        le32_to_cpu(es->s_first_data_block),
2554                        ext4_blocks_count(es));
2555                 goto failed_mount;
2556         }
2557         blocks_count = (ext4_blocks_count(es) -
2558                         le32_to_cpu(es->s_first_data_block) +
2559                         EXT4_BLOCKS_PER_GROUP(sb) - 1);
2560         do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
2561         if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
2562                 printk(KERN_WARNING "EXT4-fs: groups count too large: %u "
2563                        "(block count %llu, first data block %u, "
2564                        "blocks per group %lu)\n", sbi->s_groups_count,
2565                        ext4_blocks_count(es),
2566                        le32_to_cpu(es->s_first_data_block),
2567                        EXT4_BLOCKS_PER_GROUP(sb));
2568                 goto failed_mount;
2569         }
2570         sbi->s_groups_count = blocks_count;
2571         db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
2572                    EXT4_DESC_PER_BLOCK(sb);
2573         sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *),
2574                                     GFP_KERNEL);
2575         if (sbi->s_group_desc == NULL) {
2576                 printk(KERN_ERR "EXT4-fs: not enough memory\n");
2577                 goto failed_mount;
2578         }
2579
2580 #ifdef CONFIG_PROC_FS
2581         if (ext4_proc_root)
2582                 sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
2583 #endif
2584
2585         bgl_lock_init(sbi->s_blockgroup_lock);
2586
2587         for (i = 0; i < db_count; i++) {
2588                 block = descriptor_loc(sb, logical_sb_block, i);
2589                 sbi->s_group_desc[i] = sb_bread(sb, block);
2590                 if (!sbi->s_group_desc[i]) {
2591                         printk(KERN_ERR "EXT4-fs: "
2592                                "can't read group descriptor %d\n", i);
2593                         db_count = i;
2594                         goto failed_mount2;
2595                 }
2596         }
2597         if (!ext4_check_descriptors(sb)) {
2598                 printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n");
2599                 goto failed_mount2;
2600         }
2601         if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
2602                 if (!ext4_fill_flex_info(sb)) {
2603                         printk(KERN_ERR
2604                                "EXT4-fs: unable to initialize "
2605                                "flex_bg meta info!\n");
2606                         goto failed_mount2;
2607                 }
2608
2609         sbi->s_gdb_count = db_count;
2610         get_random_bytes(&sbi->s_next_generation, sizeof(u32));
2611         spin_lock_init(&sbi->s_next_gen_lock);
2612
2613         err = percpu_counter_init(&sbi->s_freeblocks_counter,
2614                         ext4_count_free_blocks(sb));
2615         if (!err) {
2616                 err = percpu_counter_init(&sbi->s_freeinodes_counter,
2617                                 ext4_count_free_inodes(sb));
2618         }
2619         if (!err) {
2620                 err = percpu_counter_init(&sbi->s_dirs_counter,
2621                                 ext4_count_dirs(sb));
2622         }
2623         if (!err) {
2624                 err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
2625         }
2626         if (err) {
2627                 printk(KERN_ERR "EXT4-fs: insufficient memory\n");
2628                 goto failed_mount3;
2629         }
2630
2631         sbi->s_stripe = ext4_get_stripe_size(sbi);
2632
2633         /*
2634          * set up enough so that it can read an inode
2635          */
2636         if (!test_opt(sb, NOLOAD) &&
2637             EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
2638                 sb->s_op = &ext4_sops;
2639         else
2640                 sb->s_op = &ext4_nojournal_sops;
2641         sb->s_export_op = &ext4_export_ops;
2642         sb->s_xattr = ext4_xattr_handlers;
2643 #ifdef CONFIG_QUOTA
2644         sb->s_qcop = &ext4_qctl_operations;
2645         sb->dq_op = &ext4_quota_operations;
2646 #endif
2647         INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
2648
2649         sb->s_root = NULL;
2650
2651         needs_recovery = (es->s_last_orphan != 0 ||
2652                           EXT4_HAS_INCOMPAT_FEATURE(sb,
2653                                     EXT4_FEATURE_INCOMPAT_RECOVER));
2654
2655         /*
2656          * The first inode we look at is the journal inode.  Don't try
2657          * root first: it may be modified in the journal!
2658          */
2659         if (!test_opt(sb, NOLOAD) &&
2660             EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
2661                 if (ext4_load_journal(sb, es, journal_devnum))
2662                         goto failed_mount3;
2663                 if (!(sb->s_flags & MS_RDONLY) &&
2664                     EXT4_SB(sb)->s_journal->j_failed_commit) {
2665                         printk(KERN_CRIT "EXT4-fs error (device %s): "
2666                                "ext4_fill_super: Journal transaction "
2667                                "%u is corrupt\n", sb->s_id,
2668                                EXT4_SB(sb)->s_journal->j_failed_commit);
2669                         if (test_opt(sb, ERRORS_RO)) {
2670                                 printk(KERN_CRIT
2671                                        "Mounting filesystem read-only\n");
2672                                 sb->s_flags |= MS_RDONLY;
2673                                 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
2674                                 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
2675                         }
2676                         if (test_opt(sb, ERRORS_PANIC)) {
2677                                 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
2678                                 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
2679                                 ext4_commit_super(sb, 1);
2680                                 goto failed_mount4;
2681                         }
2682                 }
2683         } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
2684               EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
2685                 printk(KERN_ERR "EXT4-fs: required journal recovery "
2686                        "suppressed and not mounted read-only\n");
2687                 goto failed_mount4;
2688         } else {
2689                 clear_opt(sbi->s_mount_opt, DATA_FLAGS);
2690                 set_opt(sbi->s_mount_opt, WRITEBACK_DATA);
2691                 sbi->s_journal = NULL;
2692                 needs_recovery = 0;
2693                 goto no_journal;
2694         }
2695
2696         if (ext4_blocks_count(es) > 0xffffffffULL &&
2697             !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
2698                                        JBD2_FEATURE_INCOMPAT_64BIT)) {
2699                 printk(KERN_ERR "EXT4-fs: Failed to set 64-bit journal feature\n");
2700                 goto failed_mount4;
2701         }
2702
2703         if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
2704                 jbd2_journal_set_features(sbi->s_journal,
2705                                 JBD2_FEATURE_COMPAT_CHECKSUM, 0,
2706                                 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
2707         } else if (test_opt(sb, JOURNAL_CHECKSUM)) {
2708                 jbd2_journal_set_features(sbi->s_journal,
2709                                 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0);
2710                 jbd2_journal_clear_features(sbi->s_journal, 0, 0,
2711                                 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
2712         } else {
2713                 jbd2_journal_clear_features(sbi->s_journal,
2714                                 JBD2_FEATURE_COMPAT_CHECKSUM, 0,
2715                                 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
2716         }
2717
2718         /* We have now updated the journal if required, so we can
2719          * validate the data journaling mode. */
2720         switch (test_opt(sb, DATA_FLAGS)) {
2721         case 0:
2722                 /* No mode set, assume a default based on the journal
2723                  * capabilities: ORDERED_DATA if the journal can
2724                  * cope, else JOURNAL_DATA
2725                  */
2726                 if (jbd2_journal_check_available_features
2727                     (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE))
2728                         set_opt(sbi->s_mount_opt, ORDERED_DATA);
2729                 else
2730                         set_opt(sbi->s_mount_opt, JOURNAL_DATA);
2731                 break;
2732
2733         case EXT4_MOUNT_ORDERED_DATA:
2734         case EXT4_MOUNT_WRITEBACK_DATA:
2735                 if (!jbd2_journal_check_available_features
2736                     (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
2737                         printk(KERN_ERR "EXT4-fs: Journal does not support "
2738                                "requested data journaling mode\n");
2739                         goto failed_mount4;
2740                 }
2741         default:
2742                 break;
2743         }
2744         set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
2745
2746 no_journal:
2747
2748         if (test_opt(sb, NOBH)) {
2749                 if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) {
2750                         printk(KERN_WARNING "EXT4-fs: Ignoring nobh option - "
2751                                 "its supported only with writeback mode\n");
2752                         clear_opt(sbi->s_mount_opt, NOBH);
2753                 }
2754         }
2755         /*
2756          * The jbd2_journal_load will have done any necessary log recovery,
2757          * so we can safely mount the rest of the filesystem now.
2758          */
2759
2760         root = ext4_iget(sb, EXT4_ROOT_INO);
2761         if (IS_ERR(root)) {
2762                 printk(KERN_ERR "EXT4-fs: get root inode failed\n");
2763                 ret = PTR_ERR(root);
2764                 goto failed_mount4;
2765         }
2766         if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
2767                 iput(root);
2768                 printk(KERN_ERR "EXT4-fs: corrupt root inode, run e2fsck\n");
2769                 goto failed_mount4;
2770         }
2771         sb->s_root = d_alloc_root(root);
2772         if (!sb->s_root) {
2773                 printk(KERN_ERR "EXT4-fs: get root dentry failed\n");
2774                 iput(root);
2775                 ret = -ENOMEM;
2776                 goto failed_mount4;
2777         }
2778
2779         ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY);
2780
2781         /* determine the minimum size of new large inodes, if present */
2782         if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
2783                 sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
2784                                                      EXT4_GOOD_OLD_INODE_SIZE;
2785                 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
2786                                        EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) {
2787                         if (sbi->s_want_extra_isize <
2788                             le16_to_cpu(es->s_want_extra_isize))
2789                                 sbi->s_want_extra_isize =
2790                                         le16_to_cpu(es->s_want_extra_isize);
2791                         if (sbi->s_want_extra_isize <
2792                             le16_to_cpu(es->s_min_extra_isize))
2793                                 sbi->s_want_extra_isize =
2794                                         le16_to_cpu(es->s_min_extra_isize);
2795                 }
2796         }
2797         /* Check if enough inode space is available */
2798         if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize >
2799                                                         sbi->s_inode_size) {
2800                 sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
2801                                                        EXT4_GOOD_OLD_INODE_SIZE;
2802                 printk(KERN_INFO "EXT4-fs: required extra inode space not"
2803                         "available.\n");
2804         }
2805
2806         if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
2807                 printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - "
2808                                 "requested data journaling mode\n");
2809                 clear_opt(sbi->s_mount_opt, DELALLOC);
2810         } else if (test_opt(sb, DELALLOC))
2811                 printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n");
2812
2813         ext4_ext_init(sb);
2814         err = ext4_mb_init(sb, needs_recovery);
2815         if (err) {
2816                 printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n",
2817                        err);
2818                 goto failed_mount4;
2819         }
2820
2821         sbi->s_kobj.kset = ext4_kset;
2822         init_completion(&sbi->s_kobj_unregister);
2823         err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL,
2824                                    "%s", sb->s_id);
2825         if (err) {
2826                 ext4_mb_release(sb);
2827                 ext4_ext_release(sb);
2828                 goto failed_mount4;
2829         };
2830
2831         /*
2832          * akpm: core read_super() calls in here with the superblock locked.
2833          * That deadlocks, because orphan cleanup needs to lock the superblock
2834          * in numerous places.  Here we just pop the lock - it's relatively
2835          * harmless, because we are now ready to accept write_super() requests,
2836          * and aviro says that's the only reason for hanging onto the
2837          * superblock lock.
2838          */
2839         EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
2840         ext4_orphan_cleanup(sb, es);
2841         EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
2842         if (needs_recovery) {
2843                 printk(KERN_INFO "EXT4-fs: recovery complete.\n");
2844                 ext4_mark_recovery_complete(sb, es);
2845         }
2846         if (EXT4_SB(sb)->s_journal) {
2847                 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
2848                         descr = " journalled data mode";
2849                 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
2850                         descr = " ordered data mode";
2851                 else
2852                         descr = " writeback data mode";
2853         } else
2854                 descr = "out journal";
2855
2856         printk(KERN_INFO "EXT4-fs: mounted filesystem %s with%s\n",
2857                sb->s_id, descr);
2858
2859         lock_kernel();
2860         return 0;
2861
2862 cantfind_ext4:
2863         if (!silent)
2864                 printk(KERN_ERR "VFS: Can't find ext4 filesystem on dev %s.\n",
2865                        sb->s_id);
2866         goto failed_mount;
2867
2868 failed_mount4:
2869         printk(KERN_ERR "EXT4-fs (device %s): mount failed\n", sb->s_id);
2870         if (sbi->s_journal) {
2871                 jbd2_journal_destroy(sbi->s_journal);
2872                 sbi->s_journal = NULL;
2873         }
2874 failed_mount3:
2875         if (sbi->s_flex_groups) {
2876                 if (is_vmalloc_addr(sbi->s_flex_groups))
2877                         vfree(sbi->s_flex_groups);
2878                 else
2879                         kfree(sbi->s_flex_groups);
2880         }
2881         percpu_counter_destroy(&sbi->s_freeblocks_counter);
2882         percpu_counter_destroy(&sbi->s_freeinodes_counter);
2883         percpu_counter_destroy(&sbi->s_dirs_counter);
2884         percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
2885 failed_mount2:
2886         for (i = 0; i < db_count; i++)
2887                 brelse(sbi->s_group_desc[i]);
2888         kfree(sbi->s_group_desc);
2889 failed_mount:
2890         if (sbi->s_proc) {
2891                 remove_proc_entry(sb->s_id, ext4_proc_root);
2892         }
2893 #ifdef CONFIG_QUOTA
2894         for (i = 0; i < MAXQUOTAS; i++)
2895                 kfree(sbi->s_qf_names[i]);
2896 #endif
2897         ext4_blkdev_remove(sbi);
2898         brelse(bh);
2899 out_fail:
2900         sb->s_fs_info = NULL;
2901         kfree(sbi);
2902         lock_kernel();
2903         return ret;
2904 }
2905
2906 /*
2907  * Setup any per-fs journal parameters now.  We'll do this both on
2908  * initial mount, once the journal has been initialised but before we've
2909  * done any recovery; and again on any subsequent remount.
2910  */
2911 static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
2912 {
2913         struct ext4_sb_info *sbi = EXT4_SB(sb);
2914
2915         journal->j_commit_interval = sbi->s_commit_interval;
2916         journal->j_min_batch_time = sbi->s_min_batch_time;
2917         journal->j_max_batch_time = sbi->s_max_batch_time;
2918
2919         spin_lock(&journal->j_state_lock);
2920         if (test_opt(sb, BARRIER))
2921                 journal->j_flags |= JBD2_BARRIER;
2922         else
2923                 journal->j_flags &= ~JBD2_BARRIER;
2924         if (test_opt(sb, DATA_ERR_ABORT))
2925                 journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
2926         else
2927                 journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
2928         spin_unlock(&journal->j_state_lock);
2929 }
2930
2931 static journal_t *ext4_get_journal(struct super_block *sb,
2932                                    unsigned int journal_inum)
2933 {
2934         struct inode *journal_inode;
2935         journal_t *journal;
2936
2937         BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
2938
2939         /* First, test for the existence of a valid inode on disk.  Bad
2940          * things happen if we iget() an unused inode, as the subsequent
2941          * iput() will try to delete it. */
2942
2943         journal_inode = ext4_iget(sb, journal_inum);
2944         if (IS_ERR(journal_inode)) {
2945                 printk(KERN_ERR "EXT4-fs: no journal found.\n");
2946                 return NULL;
2947         }
2948         if (!journal_inode->i_nlink) {
2949                 make_bad_inode(journal_inode);
2950                 iput(journal_inode);
2951                 printk(KERN_ERR "EXT4-fs: journal inode is deleted.\n");
2952                 return NULL;
2953         }
2954
2955         jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
2956                   journal_inode, journal_inode->i_size);
2957         if (!S_ISREG(journal_inode->i_mode)) {
2958                 printk(KERN_ERR "EXT4-fs: invalid journal inode.\n");
2959                 iput(journal_inode);
2960                 return NULL;
2961         }
2962
2963         journal = jbd2_journal_init_inode(journal_inode);
2964         if (!journal) {
2965                 printk(KERN_ERR "EXT4-fs: Could not load journal inode\n");
2966                 iput(journal_inode);
2967                 return NULL;
2968         }
2969         journal->j_private = sb;
2970         ext4_init_journal_params(sb, journal);
2971         return journal;
2972 }
2973
2974 static journal_t *ext4_get_dev_journal(struct super_block *sb,
2975                                        dev_t j_dev)
2976 {
2977         struct buffer_head *bh;
2978         journal_t *journal;
2979         ext4_fsblk_t start;
2980         ext4_fsblk_t len;
2981         int hblock, blocksize;
2982         ext4_fsblk_t sb_block;
2983         unsigned long offset;
2984         struct ext4_super_block *es;
2985         struct block_device *bdev;
2986
2987         BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
2988
2989         bdev = ext4_blkdev_get(j_dev);
2990         if (bdev == NULL)
2991                 return NULL;
2992
2993         if (bd_claim(bdev, sb)) {
2994                 printk(KERN_ERR
2995                         "EXT4-fs: failed to claim external journal device.\n");
2996                 blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
2997                 return NULL;
2998         }
2999
3000         blocksize = sb->s_blocksize;
3001         hblock = bdev_hardsect_size(bdev);
3002         if (blocksize < hblock) {
3003                 printk(KERN_ERR
3004                         "EXT4-fs: blocksize too small for journal device.\n");
3005                 goto out_bdev;
3006         }
3007
3008         sb_block = EXT4_MIN_BLOCK_SIZE / blocksize;
3009         offset = EXT4_MIN_BLOCK_SIZE % blocksize;
3010         set_blocksize(bdev, blocksize);
3011         if (!(bh = __bread(bdev, sb_block, blocksize))) {
3012                 printk(KERN_ERR "EXT4-fs: couldn't read superblock of "
3013                        "external journal\n");
3014                 goto out_bdev;
3015         }
3016
3017         es = (struct ext4_super_block *) (((char *)bh->b_data) + offset);
3018         if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
3019             !(le32_to_cpu(es->s_feature_incompat) &
3020               EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
3021                 printk(KERN_ERR "EXT4-fs: external journal has "
3022                                         "bad superblock\n");
3023                 brelse(bh);
3024                 goto out_bdev;
3025         }
3026
3027         if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
3028                 printk(KERN_ERR "EXT4-fs: journal UUID does not match\n");
3029                 brelse(bh);
3030                 goto out_bdev;
3031         }
3032
3033         len = ext4_blocks_count(es);
3034         start = sb_block + 1;
3035         brelse(bh);     /* we're done with the superblock */
3036
3037         journal = jbd2_journal_init_dev(bdev, sb->s_bdev,
3038                                         start, len, blocksize);
3039         if (!journal) {
3040                 printk(KERN_ERR "EXT4-fs: failed to create device journal\n");
3041                 goto out_bdev;
3042         }
3043         journal->j_private = sb;
3044         ll_rw_block(READ, 1, &journal->j_sb_buffer);
3045         wait_on_buffer(journal->j_sb_buffer);
3046         if (!buffer_uptodate(journal->j_sb_buffer)) {
3047                 printk(KERN_ERR "EXT4-fs: I/O error on journal device\n");
3048                 goto out_journal;
3049         }
3050         if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
3051                 printk(KERN_ERR "EXT4-fs: External journal has more than one "
3052                                         "user (unsupported) - %d\n",
3053                         be32_to_cpu(journal->j_superblock->s_nr_users));
3054                 goto out_journal;
3055         }
3056         EXT4_SB(sb)->journal_bdev = bdev;
3057         ext4_init_journal_params(sb, journal);
3058         return journal;
3059 out_journal:
3060         jbd2_journal_destroy(journal);
3061 out_bdev:
3062         ext4_blkdev_put(bdev);
3063         return NULL;
3064 }
3065
3066 static int ext4_load_journal(struct super_block *sb,
3067                              struct ext4_super_block *es,
3068                              unsigned long journal_devnum)
3069 {
3070         journal_t *journal;
3071         unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
3072         dev_t journal_dev;
3073         int err = 0;
3074         int really_read_only;
3075
3076         BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
3077
3078         if (journal_devnum &&
3079             journal_devnum != le32_to_cpu(es->s_journal_dev)) {
3080                 printk(KERN_INFO "EXT4-fs: external journal device major/minor "
3081                         "numbers have changed\n");
3082                 journal_dev = new_decode_dev(journal_devnum);
3083         } else
3084                 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
3085
3086         really_read_only = bdev_read_only(sb->s_bdev);
3087
3088         /*
3089          * Are we loading a blank journal or performing recovery after a
3090          * crash?  For recovery, we need to check in advance whether we
3091          * can get read-write access to the device.
3092          */
3093
3094         if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
3095                 if (sb->s_flags & MS_RDONLY) {
3096                         printk(KERN_INFO "EXT4-fs: INFO: recovery "
3097                                         "required on readonly filesystem.\n");
3098                         if (really_read_only) {
3099                                 printk(KERN_ERR "EXT4-fs: write access "
3100                                         "unavailable, cannot proceed.\n");
3101                                 return -EROFS;
3102                         }
3103                         printk(KERN_INFO "EXT4-fs: write access will "
3104                                "be enabled during recovery.\n");
3105                 }
3106         }
3107
3108         if (journal_inum && journal_dev) {
3109                 printk(KERN_ERR "EXT4-fs: filesystem has both journal "
3110                        "and inode journals!\n");
3111                 return -EINVAL;
3112         }
3113
3114         if (journal_inum) {
3115                 if (!(journal = ext4_get_journal(sb, journal_inum)))
3116                         return -EINVAL;
3117         } else {
3118                 if (!(journal = ext4_get_dev_journal(sb, journal_dev)))
3119                         return -EINVAL;
3120         }
3121
3122         if (journal->j_flags & JBD2_BARRIER)
3123                 printk(KERN_INFO "EXT4-fs: barriers enabled\n");
3124         else
3125                 printk(KERN_INFO "EXT4-fs: barriers disabled\n");
3126
3127         if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
3128                 err = jbd2_journal_update_format(journal);
3129                 if (err)  {
3130                         printk(KERN_ERR "EXT4-fs: error updating journal.\n");
3131                         jbd2_journal_destroy(journal);
3132                         return err;
3133                 }
3134         }
3135
3136         if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER))
3137                 err = jbd2_journal_wipe(journal, !really_read_only);
3138         if (!err)
3139                 err = jbd2_journal_load(journal);
3140
3141         if (err) {
3142                 printk(KERN_ERR "EXT4-fs: error loading journal.\n");
3143                 jbd2_journal_destroy(journal);
3144                 return err;
3145         }
3146
3147         EXT4_SB(sb)->s_journal = journal;
3148         ext4_clear_journal_err(sb, es);
3149
3150         if (journal_devnum &&
3151             journal_devnum != le32_to_cpu(es->s_journal_dev)) {
3152                 es->s_journal_dev = cpu_to_le32(journal_devnum);
3153
3154                 /* Make sure we flush the recovery flag to disk. */
3155                 ext4_commit_super(sb, 1);
3156         }
3157
3158         return 0;
3159 }
3160
3161 static int ext4_commit_super(struct super_block *sb, int sync)
3162 {
3163         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
3164         struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
3165         int error = 0;
3166
3167         if (!sbh)
3168                 return error;
3169         if (buffer_write_io_error(sbh)) {
3170                 /*
3171                  * Oh, dear.  A previous attempt to write the
3172                  * superblock failed.  This could happen because the
3173                  * USB device was yanked out.  Or it could happen to
3174                  * be a transient write error and maybe the block will
3175                  * be remapped.  Nothing we can do but to retry the
3176                  * write and hope for the best.
3177                  */
3178                 printk(KERN_ERR "EXT4-fs: previous I/O error to "
3179                        "superblock detected for %s.\n", sb->s_id);
3180                 clear_buffer_write_io_error(sbh);
3181                 set_buffer_uptodate(sbh);
3182         }
3183         es->s_wtime = cpu_to_le32(get_seconds());
3184         es->s_kbytes_written =
3185                 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
3186                             ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
3187                               EXT4_SB(sb)->s_sectors_written_start) >> 1));
3188         ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
3189                                         &EXT4_SB(sb)->s_freeblocks_counter));
3190         es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive(
3191                                         &EXT4_SB(sb)->s_freeinodes_counter));
3192         sb->s_dirt = 0;
3193         BUFFER_TRACE(sbh, "marking dirty");
3194         mark_buffer_dirty(sbh);
3195         if (sync) {
3196                 error = sync_dirty_buffer(sbh);
3197                 if (error)
3198                         return error;
3199
3200                 error = buffer_write_io_error(sbh);
3201                 if (error) {
3202                         printk(KERN_ERR "EXT4-fs: I/O error while writing "
3203                                "superblock for %s.\n", sb->s_id);
3204                         clear_buffer_write_io_error(sbh);
3205                         set_buffer_uptodate(sbh);
3206                 }
3207         }
3208         return error;
3209 }
3210
3211
3212 /*
3213  * Have we just finished recovery?  If so, and if we are mounting (or
3214  * remounting) the filesystem readonly, then we will end up with a
3215  * consistent fs on disk.  Record that fact.
3216  */
3217 static void ext4_mark_recovery_complete(struct super_block *sb,
3218                                         struct ext4_super_block *es)
3219 {
3220         journal_t *journal = EXT4_SB(sb)->s_journal;
3221
3222         if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
3223                 BUG_ON(journal != NULL);
3224                 return;
3225         }
3226         jbd2_journal_lock_updates(journal);
3227         if (jbd2_journal_flush(journal) < 0)
3228                 goto out;
3229
3230         lock_super(sb);
3231         if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) &&
3232             sb->s_flags & MS_RDONLY) {
3233                 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
3234                 ext4_commit_super(sb, 1);
3235         }
3236         unlock_super(sb);
3237
3238 out:
3239         jbd2_journal_unlock_updates(journal);
3240 }
3241
3242 /*
3243  * If we are mounting (or read-write remounting) a filesystem whose journal
3244  * has recorded an error from a previous lifetime, move that error to the
3245  * main filesystem now.
3246  */
3247 static void ext4_clear_journal_err(struct super_block *sb,
3248                                    struct ext4_super_block *es)
3249 {
3250         journal_t *journal;
3251         int j_errno;
3252         const char *errstr;
3253
3254         BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
3255
3256         journal = EXT4_SB(sb)->s_journal;
3257
3258         /*
3259          * Now check for any error status which may have been recorded in the
3260          * journal by a prior ext4_error() or ext4_abort()
3261          */
3262
3263         j_errno = jbd2_journal_errno(journal);
3264         if (j_errno) {
3265                 char nbuf[16];
3266
3267                 errstr = ext4_decode_error(sb, j_errno, nbuf);
3268                 ext4_warning(sb, __func__, "Filesystem error recorded "
3269                              "from previous mount: %s", errstr);
3270                 ext4_warning(sb, __func__, "Marking fs in need of "
3271                              "filesystem check.");
3272
3273                 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
3274                 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
3275                 ext4_commit_super(sb, 1);
3276
3277                 jbd2_journal_clear_err(journal);
3278         }
3279 }
3280
3281 /*
3282  * Force the running and committing transactions to commit,
3283  * and wait on the commit.
3284  */
3285 int ext4_force_commit(struct super_block *sb)
3286 {
3287         journal_t *journal;
3288         int ret = 0;
3289
3290         if (sb->s_flags & MS_RDONLY)
3291                 return 0;
3292
3293         journal = EXT4_SB(sb)->s_journal;
3294         if (journal)
3295                 ret = ext4_journal_force_commit(journal);
3296
3297         return ret;
3298 }
3299
3300 static void ext4_write_super(struct super_block *sb)
3301 {
3302         ext4_commit_super(sb, 1);
3303 }
3304
3305 static int ext4_sync_fs(struct super_block *sb, int wait)
3306 {
3307         int ret = 0;
3308         tid_t target;
3309
3310         trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait);
3311         if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) {
3312                 if (wait)
3313                         jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target);
3314         }
3315         return ret;
3316 }
3317
3318 /*
3319  * LVM calls this function before a (read-only) snapshot is created.  This
3320  * gives us a chance to flush the journal completely and mark the fs clean.
3321  */
3322 static int ext4_freeze(struct super_block *sb)
3323 {
3324         int error = 0;
3325         journal_t *journal;
3326
3327         if (sb->s_flags & MS_RDONLY)
3328                 return 0;
3329
3330         journal = EXT4_SB(sb)->s_journal;
3331
3332         /* Now we set up the journal barrier. */
3333         jbd2_journal_lock_updates(journal);
3334
3335         /*
3336          * Don't clear the needs_recovery flag if we failed to flush
3337          * the journal.
3338          */
3339         error = jbd2_journal_flush(journal);
3340         if (error < 0) {
3341         out:
3342                 jbd2_journal_unlock_updates(journal);
3343                 return error;
3344         }
3345
3346         /* Journal blocked and flushed, clear needs_recovery flag. */
3347         EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
3348         error = ext4_commit_super(sb, 1);
3349         if (error)
3350                 goto out;
3351         return 0;
3352 }
3353
3354 /*
3355  * Called by LVM after the snapshot is done.  We need to reset the RECOVER
3356  * flag here, even though the filesystem is not technically dirty yet.
3357  */
3358 static int ext4_unfreeze(struct super_block *sb)
3359 {
3360         if (sb->s_flags & MS_RDONLY)
3361                 return 0;
3362
3363         lock_super(sb);
3364         /* Reset the needs_recovery flag before the fs is unlocked. */
3365         EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
3366         ext4_commit_super(sb, 1);
3367         unlock_super(sb);
3368         jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
3369         return 0;
3370 }
3371
3372 static int ext4_remount(struct super_block *sb, int *flags, char *data)
3373 {
3374         struct ext4_super_block *es;
3375         struct ext4_sb_info *sbi = EXT4_SB(sb);
3376         ext4_fsblk_t n_blocks_count = 0;
3377         unsigned long old_sb_flags;
3378         struct ext4_mount_options old_opts;
3379         ext4_group_t g;
3380         unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
3381         int err;
3382 #ifdef CONFIG_QUOTA
3383         int i;
3384 #endif
3385
3386         /* Store the original options */
3387         old_sb_flags = sb->s_flags;
3388         old_opts.s_mount_opt = sbi->s_mount_opt;
3389         old_opts.s_resuid = sbi->s_resuid;
3390         old_opts.s_resgid = sbi->s_resgid;
3391         old_opts.s_commit_interval = sbi->s_commit_interval;
3392         old_opts.s_min_batch_time = sbi->s_min_batch_time;
3393         old_opts.s_max_batch_time = sbi->s_max_batch_time;
3394 #ifdef CONFIG_QUOTA
3395         old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
3396         for (i = 0; i < MAXQUOTAS; i++)
3397                 old_opts.s_qf_names[i] = sbi->s_qf_names[i];
3398 #endif
3399         if (sbi->s_journal && sbi->s_journal->j_task->io_context)
3400                 journal_ioprio = sbi->s_journal->j_task->io_context->ioprio;
3401
3402         /*
3403          * Allow the "check" option to be passed as a remount option.
3404          */
3405         if (!parse_options(data, sb, NULL, &journal_ioprio,
3406                            &n_blocks_count, 1)) {
3407                 err = -EINVAL;
3408                 goto restore_opts;
3409         }
3410
3411         if (sbi->s_mount_opt & EXT4_MOUNT_ABORT)
3412                 ext4_abort(sb, __func__, "Abort forced by user");
3413
3414         sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
3415                 ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
3416
3417         es = sbi->s_es;
3418
3419         if (sbi->s_journal) {
3420                 ext4_init_journal_params(sb, sbi->s_journal);
3421                 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
3422         }
3423
3424         if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) ||
3425                 n_blocks_count > ext4_blocks_count(es)) {
3426                 if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) {
3427                         err = -EROFS;
3428                         goto restore_opts;
3429                 }
3430
3431                 if (*flags & MS_RDONLY) {
3432                         /*
3433                          * First of all, the unconditional stuff we have to do
3434                          * to disable replay of the journal when we next remount
3435                          */
3436                         sb->s_flags |= MS_RDONLY;
3437
3438                         /*
3439                          * OK, test if we are remounting a valid rw partition
3440                          * readonly, and if so set the rdonly flag and then
3441                          * mark the partition as valid again.
3442                          */
3443                         if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) &&
3444                             (sbi->s_mount_state & EXT4_VALID_FS))
3445                                 es->s_state = cpu_to_le16(sbi->s_mount_state);
3446
3447                         /*
3448                          * We have to unlock super so that we can wait for
3449                          * transactions.
3450                          */
3451                         if (sbi->s_journal) {
3452                                 unlock_super(sb);
3453                                 ext4_mark_recovery_complete(sb, es);
3454                                 lock_super(sb);
3455                         }
3456                 } else {
3457                         int ret;
3458                         if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb,
3459                                         ~EXT4_FEATURE_RO_COMPAT_SUPP))) {
3460                                 printk(KERN_WARNING "EXT4-fs: %s: couldn't "
3461                                        "remount RDWR because of unsupported "
3462                                        "optional features (%x).\n", sb->s_id,
3463                                 (le32_to_cpu(sbi->s_es->s_feature_ro_compat) &
3464                                         ~EXT4_FEATURE_RO_COMPAT_SUPP));
3465                                 err = -EROFS;
3466                                 goto restore_opts;
3467                         }
3468
3469                         /*
3470                          * Make sure the group descriptor checksums
3471                          * are sane.  If they aren't, refuse to
3472                          * remount r/w.
3473                          */
3474                         for (g = 0; g < sbi->s_groups_count; g++) {
3475                                 struct ext4_group_desc *gdp =
3476                                         ext4_get_group_desc(sb, g, NULL);
3477
3478                                 if (!ext4_group_desc_csum_verify(sbi, g, gdp)) {
3479                                         printk(KERN_ERR
3480                "EXT4-fs: ext4_remount: "
3481                 "Checksum for group %u failed (%u!=%u)\n",
3482                 g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)),
3483                                                le16_to_cpu(gdp->bg_checksum));
3484                                         err = -EINVAL;
3485                                         goto restore_opts;
3486                                 }
3487                         }
3488
3489                         /*
3490                          * If we have an unprocessed orphan list hanging
3491                          * around from a previously readonly bdev mount,
3492                          * require a full umount/remount for now.
3493                          */
3494                         if (es->s_last_orphan) {
3495                                 printk(KERN_WARNING "EXT4-fs: %s: couldn't "
3496                                        "remount RDWR because of unprocessed "
3497                                        "orphan inode list.  Please "
3498                                        "umount/remount instead.\n",
3499                                        sb->s_id);
3500                                 err = -EINVAL;
3501                                 goto restore_opts;
3502                         }
3503
3504                         /*
3505                          * Mounting a RDONLY partition read-write, so reread
3506                          * and store the current valid flag.  (It may have
3507                          * been changed by e2fsck since we originally mounted
3508                          * the partition.)
3509                          */
3510                         if (sbi->s_journal)
3511                                 ext4_clear_journal_err(sb, es);
3512                         sbi->s_mount_state = le16_to_cpu(es->s_state);
3513                         if ((err = ext4_group_extend(sb, es, n_blocks_count)))
3514                                 goto restore_opts;
3515                         if (!ext4_setup_super(sb, es, 0))
3516                                 sb->s_flags &= ~MS_RDONLY;
3517                 }
3518         }
3519         if (sbi->s_journal == NULL)
3520                 ext4_commit_super(sb, 1);
3521
3522 #ifdef CONFIG_QUOTA
3523         /* Release old quota file names */
3524         for (i = 0; i < MAXQUOTAS; i++)
3525                 if (old_opts.s_qf_names[i] &&
3526                     old_opts.s_qf_names[i] != sbi->s_qf_names[i])
3527                         kfree(old_opts.s_qf_names[i]);
3528 #endif
3529         return 0;
3530 restore_opts:
3531         sb->s_flags = old_sb_flags;
3532         sbi->s_mount_opt = old_opts.s_mount_opt;
3533         sbi->s_resuid = old_opts.s_resuid;
3534         sbi->s_resgid = old_opts.s_resgid;
3535         sbi->s_commit_interval = old_opts.s_commit_interval;
3536         sbi->s_min_batch_time = old_opts.s_min_batch_time;
3537         sbi->s_max_batch_time = old_opts.s_max_batch_time;
3538 #ifdef CONFIG_QUOTA
3539         sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
3540         for (i = 0; i < MAXQUOTAS; i++) {
3541                 if (sbi->s_qf_names[i] &&
3542                     old_opts.s_qf_names[i] != sbi->s_qf_names[i])
3543                         kfree(sbi->s_qf_names[i]);
3544                 sbi->s_qf_names[i] = old_opts.s_qf_names[i];
3545         }
3546 #endif
3547         return err;
3548 }
3549
3550 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
3551 {
3552         struct super_block *sb = dentry->d_sb;
3553         struct ext4_sb_info *sbi = EXT4_SB(sb);
3554         struct ext4_super_block *es = sbi->s_es;
3555         u64 fsid;
3556
3557         if (test_opt(sb, MINIX_DF)) {
3558                 sbi->s_overhead_last = 0;
3559         } else if (sbi->s_blocks_last != ext4_blocks_count(es)) {
3560                 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
3561                 ext4_fsblk_t overhead = 0;
3562
3563                 /*
3564                  * Compute the overhead (FS structures).  This is constant
3565                  * for a given filesystem unless the number of block groups
3566                  * changes so we cache the previous value until it does.
3567                  */
3568
3569                 /*
3570                  * All of the blocks before first_data_block are
3571                  * overhead
3572                  */
3573                 overhead = le32_to_cpu(es->s_first_data_block);
3574
3575                 /*
3576                  * Add the overhead attributed to the superblock and
3577                  * block group descriptors.  If the sparse superblocks
3578                  * feature is turned on, then not all groups have this.
3579                  */
3580                 for (i = 0; i < ngroups; i++) {
3581                         overhead += ext4_bg_has_super(sb, i) +
3582                                 ext4_bg_num_gdb(sb, i);
3583                         cond_resched();
3584                 }
3585
3586                 /*
3587                  * Every block group has an inode bitmap, a block
3588                  * bitmap, and an inode table.
3589                  */
3590                 overhead += ngroups * (2 + sbi->s_itb_per_group);
3591                 sbi->s_overhead_last = overhead;
3592                 smp_wmb();
3593                 sbi->s_blocks_last = ext4_blocks_count(es);
3594         }
3595
3596         buf->f_type = EXT4_SUPER_MAGIC;
3597         buf->f_bsize = sb->s_blocksize;
3598         buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last;
3599         buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) -
3600                        percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter);
3601         ext4_free_blocks_count_set(es, buf->f_bfree);
3602         buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
3603         if (buf->f_bfree < ext4_r_blocks_count(es))
3604                 buf->f_bavail = 0;
3605         buf->f_files = le32_to_cpu(es->s_inodes_count);
3606         buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
3607         es->s_free_inodes_count = cpu_to_le32(buf->f_ffree);
3608         buf->f_namelen = EXT4_NAME_LEN;
3609         fsid = le64_to_cpup((void *)es->s_uuid) ^
3610                le64_to_cpup((void *)es->s_uuid + sizeof(u64));
3611         buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
3612         buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
3613         return 0;
3614 }
3615
3616 /* Helper function for writing quotas on sync - we need to start transaction before quota file
3617  * is locked for write. Otherwise the are possible deadlocks:
3618  * Process 1                         Process 2
3619  * ext4_create()                     quota_sync()
3620  *   jbd2_journal_start()                  write_dquot()
3621  *   vfs_dq_init()                         down(dqio_mutex)
3622  *     down(dqio_mutex)                    jbd2_journal_start()
3623  *
3624  */
3625
3626 #ifdef CONFIG_QUOTA
3627
3628 static inline struct inode *dquot_to_inode(struct dquot *dquot)
3629 {
3630         return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
3631 }
3632
3633 static int ext4_write_dquot(struct dquot *dquot)
3634 {
3635         int ret, err;
3636         handle_t *handle;
3637         struct inode *inode;
3638
3639         inode = dquot_to_inode(dquot);
3640         handle = ext4_journal_start(inode,
3641                                         EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
3642         if (IS_ERR(handle))
3643                 return PTR_ERR(handle);
3644         ret = dquot_commit(dquot);
3645         err = ext4_journal_stop(handle);
3646         if (!ret)
3647                 ret = err;
3648         return ret;
3649 }
3650
3651 static int ext4_acquire_dquot(struct dquot *dquot)
3652 {
3653         int ret, err;
3654         handle_t *handle;
3655
3656         handle = ext4_journal_start(dquot_to_inode(dquot),
3657                                         EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
3658         if (IS_ERR(handle))
3659                 return PTR_ERR(handle);
3660         ret = dquot_acquire(dquot);
3661         err = ext4_journal_stop(handle);
3662         if (!ret)
3663                 ret = err;
3664         return ret;
3665 }
3666
3667 static int ext4_release_dquot(struct dquot *dquot)
3668 {
3669         int ret, err;
3670         handle_t *handle;
3671
3672         handle = ext4_journal_start(dquot_to_inode(dquot),
3673                                         EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
3674         if (IS_ERR(handle)) {
3675                 /* Release dquot anyway to avoid endless cycle in dqput() */
3676                 dquot_release(dquot);
3677                 return PTR_ERR(handle);
3678         }
3679         ret = dquot_release(dquot);
3680         err = ext4_journal_stop(handle);
3681         if (!ret)
3682                 ret = err;
3683         return ret;
3684 }
3685
3686 static int ext4_mark_dquot_dirty(struct dquot *dquot)
3687 {
3688         /* Are we journaling quotas? */
3689         if (EXT4_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
3690             EXT4_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
3691                 dquot_mark_dquot_dirty(dquot);
3692                 return ext4_write_dquot(dquot);
3693         } else {
3694                 return dquot_mark_dquot_dirty(dquot);
3695         }
3696 }
3697
3698 static int ext4_write_info(struct super_block *sb, int type)
3699 {
3700         int ret, err;
3701         handle_t *handle;
3702
3703         /* Data block + inode block */
3704         handle = ext4_journal_start(sb->s_root->d_inode, 2);
3705         if (IS_ERR(handle))
3706                 return PTR_ERR(handle);
3707         ret = dquot_commit_info(sb, type);
3708         err = ext4_journal_stop(handle);
3709         if (!ret)
3710                 ret = err;
3711         return ret;
3712 }
3713
3714 /*
3715  * Turn on quotas during mount time - we need to find
3716  * the quota file and such...
3717  */
3718 static int ext4_quota_on_mount(struct super_block *sb, int type)
3719 {
3720         return vfs_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type],
3721                         EXT4_SB(sb)->s_jquota_fmt, type);
3722 }
3723
3724 /*
3725  * Standard function to be called on quota_on
3726  */
3727 static int ext4_quota_on(struct super_block *sb, int type, int format_id,
3728                          char *name, int remount)
3729 {
3730         int err;
3731         struct path path;
3732
3733         if (!test_opt(sb, QUOTA))
3734                 return -EINVAL;
3735         /* When remounting, no checks are needed and in fact, name is NULL */
3736         if (remount)
3737                 return vfs_quota_on(sb, type, format_id, name, remount);
3738
3739         err = kern_path(name, LOOKUP_FOLLOW, &path);
3740         if (err)
3741                 return err;
3742
3743         /* Quotafile not on the same filesystem? */
3744         if (path.mnt->mnt_sb != sb) {
3745                 path_put(&path);
3746                 return -EXDEV;
3747         }
3748         /* Journaling quota? */
3749         if (EXT4_SB(sb)->s_qf_names[type]) {
3750                 /* Quotafile not in fs root? */
3751                 if (path.dentry->d_parent != sb->s_root)
3752                         printk(KERN_WARNING
3753                                 "EXT4-fs: Quota file not on filesystem root. "
3754                                 "Journaled quota will not work.\n");
3755         }
3756
3757         /*
3758          * When we journal data on quota file, we have to flush journal to see
3759          * all updates to the file when we bypass pagecache...
3760          */
3761         if (EXT4_SB(sb)->s_journal &&
3762             ext4_should_journal_data(path.dentry->d_inode)) {
3763                 /*
3764                  * We don't need to lock updates but journal_flush() could
3765                  * otherwise be livelocked...
3766                  */
3767                 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
3768                 err = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
3769                 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
3770                 if (err) {
3771                         path_put(&path);
3772                         return err;
3773                 }
3774         }
3775
3776         err = vfs_quota_on_path(sb, type, format_id, &path);
3777         path_put(&path);
3778         return err;
3779 }
3780
3781 /* Read data from quotafile - avoid pagecache and such because we cannot afford
3782  * acquiring the locks... As quota files are never truncated and quota code
3783  * itself serializes the operations (and noone else should touch the files)
3784  * we don't have to be afraid of races */
3785 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
3786                                size_t len, loff_t off)
3787 {
3788         struct inode *inode = sb_dqopt(sb)->files[type];
3789         ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
3790         int err = 0;
3791         int offset = off & (sb->s_blocksize - 1);
3792         int tocopy;
3793         size_t toread;
3794         struct buffer_head *bh;
3795         loff_t i_size = i_size_read(inode);
3796
3797         if (off > i_size)
3798                 return 0;
3799         if (off+len > i_size)
3800                 len = i_size-off;
3801         toread = len;
3802         while (toread > 0) {
3803                 tocopy = sb->s_blocksize - offset < toread ?
3804                                 sb->s_blocksize - offset : toread;
3805                 bh = ext4_bread(NULL, inode, blk, 0, &err);
3806                 if (err)
3807                         return err;
3808                 if (!bh)        /* A hole? */
3809                         memset(data, 0, tocopy);
3810                 else
3811                         memcpy(data, bh->b_data+offset, tocopy);
3812                 brelse(bh);
3813                 offset = 0;
3814                 toread -= tocopy;
3815                 data += tocopy;
3816                 blk++;
3817         }
3818         return len;
3819 }
3820
3821 /* Write to quotafile (we know the transaction is already started and has
3822  * enough credits) */
3823 static ssize_t ext4_quota_write(struct super_block *sb, int type,
3824                                 const char *data, size_t len, loff_t off)
3825 {
3826         struct inode *inode = sb_dqopt(sb)->files[type];
3827         ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
3828         int err = 0;
3829         int offset = off & (sb->s_blocksize - 1);
3830         int tocopy;
3831         int journal_quota = EXT4_SB(sb)->s_qf_names[type] != NULL;
3832         size_t towrite = len;
3833         struct buffer_head *bh;
3834         handle_t *handle = journal_current_handle();
3835
3836         if (EXT4_SB(sb)->s_journal && !handle) {
3837                 printk(KERN_WARNING "EXT4-fs: Quota write (off=%llu, len=%llu)"
3838                         " cancelled because transaction is not started.\n",
3839                         (unsigned long long)off, (unsigned long long)len);
3840                 return -EIO;
3841         }
3842         mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
3843         while (towrite > 0) {
3844                 tocopy = sb->s_blocksize - offset < towrite ?
3845                                 sb->s_blocksize - offset : towrite;
3846                 bh = ext4_bread(handle, inode, blk, 1, &err);
3847                 if (!bh)
3848                         goto out;
3849                 if (journal_quota) {
3850                         err = ext4_journal_get_write_access(handle, bh);
3851                         if (err) {
3852                                 brelse(bh);
3853                                 goto out;
3854                         }
3855                 }
3856                 lock_buffer(bh);
3857                 memcpy(bh->b_data+offset, data, tocopy);
3858                 flush_dcache_page(bh->b_page);
3859                 unlock_buffer(bh);
3860                 if (journal_quota)
3861                         err = ext4_handle_dirty_metadata(handle, NULL, bh);
3862                 else {
3863                         /* Always do at least ordered writes for quotas */
3864                         err = ext4_jbd2_file_inode(handle, inode);
3865                         mark_buffer_dirty(bh);
3866                 }
3867                 brelse(bh);
3868                 if (err)
3869                         goto out;
3870                 offset = 0;
3871                 towrite -= tocopy;
3872                 data += tocopy;
3873                 blk++;
3874         }
3875 out:
3876         if (len == towrite) {
3877                 mutex_unlock(&inode->i_mutex);
3878                 return err;
3879         }
3880         if (inode->i_size < off+len-towrite) {
3881                 i_size_write(inode, off+len-towrite);
3882                 EXT4_I(inode)->i_disksize = inode->i_size;
3883         }
3884         inode->i_mtime = inode->i_ctime = CURRENT_TIME;
3885         ext4_mark_inode_dirty(handle, inode);
3886         mutex_unlock(&inode->i_mutex);
3887         return len - towrite;
3888 }
3889
3890 #endif
3891
3892 static int ext4_get_sb(struct file_system_type *fs_type,
3893         int flags, const char *dev_name, void *data, struct vfsmount *mnt)
3894 {
3895         return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt);
3896 }
3897
3898 static struct file_system_type ext4_fs_type = {
3899         .owner          = THIS_MODULE,
3900         .name           = "ext4",
3901         .get_sb         = ext4_get_sb,
3902         .kill_sb        = kill_block_super,
3903         .fs_flags       = FS_REQUIRES_DEV,
3904 };
3905
3906 #ifdef CONFIG_EXT4DEV_COMPAT
3907 static int ext4dev_get_sb(struct file_system_type *fs_type,
3908         int flags, const char *dev_name, void *data, struct vfsmount *mnt)
3909 {
3910         printk(KERN_WARNING "EXT4-fs: Update your userspace programs "
3911                "to mount using ext4\n");
3912         printk(KERN_WARNING "EXT4-fs: ext4dev backwards compatibility "
3913                "will go away by 2.6.31\n");
3914         return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt);
3915 }
3916
3917 static struct file_system_type ext4dev_fs_type = {
3918         .owner          = THIS_MODULE,
3919         .name           = "ext4dev",
3920         .get_sb         = ext4dev_get_sb,
3921         .kill_sb        = kill_block_super,
3922         .fs_flags       = FS_REQUIRES_DEV,
3923 };
3924 MODULE_ALIAS("ext4dev");
3925 #endif
3926
3927 static int __init init_ext4_fs(void)
3928 {
3929         int err;
3930
3931         ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
3932         if (!ext4_kset)
3933                 return -ENOMEM;
3934         ext4_proc_root = proc_mkdir("fs/ext4", NULL);
3935         err = init_ext4_mballoc();
3936         if (err)
3937                 return err;
3938
3939         err = init_ext4_xattr();
3940         if (err)
3941                 goto out2;
3942         err = init_inodecache();
3943         if (err)
3944                 goto out1;
3945         err = register_filesystem(&ext4_fs_type);
3946         if (err)
3947                 goto out;
3948 #ifdef CONFIG_EXT4DEV_COMPAT
3949         err = register_filesystem(&ext4dev_fs_type);
3950         if (err) {
3951                 unregister_filesystem(&ext4_fs_type);
3952                 goto out;
3953         }
3954 #endif
3955         return 0;
3956 out:
3957         destroy_inodecache();
3958 out1:
3959         exit_ext4_xattr();
3960 out2:
3961         exit_ext4_mballoc();
3962         return err;
3963 }
3964
3965 static void __exit exit_ext4_fs(void)
3966 {
3967         unregister_filesystem(&ext4_fs_type);
3968 #ifdef CONFIG_EXT4DEV_COMPAT
3969         unregister_filesystem(&ext4dev_fs_type);
3970 #endif
3971         destroy_inodecache();
3972         exit_ext4_xattr();
3973         exit_ext4_mballoc();
3974         remove_proc_entry("fs/ext4", NULL);
3975         kset_unregister(ext4_kset);
3976 }
3977
3978 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
3979 MODULE_DESCRIPTION("Fourth Extended Filesystem");
3980 MODULE_LICENSE("GPL");
3981 module_init(init_ext4_fs)
3982 module_exit(exit_ext4_fs)