Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso...

author Linus Torvalds <torvalds@linux-foundation.org>

Tue, 10 Jan 2012 23:51:48 +0000 (15:51 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Tue, 10 Jan 2012 23:51:48 +0000 (15:51 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Tue, 10 Jan 2012 23:51:48 +0000 (15:51 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Tue, 10 Jan 2012 23:51:48 +0000 (15:51 -0800)
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt

index 4917cf24a5e0885518cf06a12e53d4057f5c91fa..10ec4639f1522dad34d66eea61443750435fe3ae 100644 (file)
--- a/Documentation/filesystems/ext4.txt
+++ b/Documentation/filesystems/ext4.txt
@@ -581,6 +581,13 @@ Table of Ext4 specific ioctls
                               behaviour may change in the future as it is
                               not necessary and has been done this way only
                               for sake of simplicity.
+
+ EXT4_IOC_RESIZE_FS          Resize the filesystem to a new size.  The number
+                             of blocks of resized filesystem is passed in via
+                             64 bit integer argument.  The kernel allocates
+                             bitmaps and inode table, the userspace tool thus
+                             just passes the new number of blocks.
+
  ..............................................................................
  
  References
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c

index 12ccacda44e0288e13247e3e79ebd414287eb548..f9e2cd8cf711d2f43a74f5a5f4cc830604cd82d9 100644 (file)
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -23,6 +23,8 @@
  
  #include <trace/events/ext4.h>
  
+static unsigned ext4_num_base_meta_clusters(struct super_block *sb,
+                                           ext4_group_t block_group);
  /*
   * balloc.c contains the blocks allocation and deallocation routines
   */
@@ -668,7 +670,7 @@ unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group)
   * This function returns the number of file system metadata clusters at
   * the beginning of a block group, including the reserved gdt blocks.
   */
-unsigned ext4_num_base_meta_clusters(struct super_block *sb,
+static unsigned ext4_num_base_meta_clusters(struct super_block *sb,
                                      ext4_group_t block_group)
  {
         struct ext4_sb_info *sbi = EXT4_SB(sb);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h

index 1554b15f91bce81f0a7b43e13bba1c738361a668..513004fc3d840ee03586a4fedcdb133d8031c642 100644 (file)
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -511,6 +511,14 @@ struct ext4_new_group_data {
         __u32 free_blocks_count;
  };
  
+/* Indexes used to index group tables in ext4_new_group_data */
+enum {
+       BLOCK_BITMAP = 0,       /* block bitmap */
+       INODE_BITMAP,           /* inode bitmap */
+       INODE_TABLE,            /* inode tables */
+       GROUP_TABLE_COUNT,
+};
+
  /*
   * Flags used by ext4_map_blocks()
   */
@@ -575,6 +583,7 @@ struct ext4_new_group_data {
   /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */
  #define EXT4_IOC_ALLOC_DA_BLKS         _IO('f', 12)
  #define EXT4_IOC_MOVE_EXT              _IOWR('f', 15, struct move_extent)
+#define EXT4_IOC_RESIZE_FS             _IOW('f', 16, __u64)
  
  #if defined(__KERNEL__) && defined(CONFIG_COMPAT)
  /*
@@ -957,12 +966,13 @@ struct ext4_inode_info {
  #define test_opt2(sb, opt)             (EXT4_SB(sb)->s_mount_opt2 & \
                                          EXT4_MOUNT2_##opt)
  
-#define ext4_set_bit                   __test_and_set_bit_le
+#define ext4_test_and_set_bit          __test_and_set_bit_le
+#define ext4_set_bit                   __set_bit_le
  #define ext4_set_bit_atomic            ext2_set_bit_atomic
-#define ext4_clear_bit                 __test_and_clear_bit_le
+#define ext4_test_and_clear_bit                __test_and_clear_bit_le
+#define ext4_clear_bit                 __clear_bit_le
  #define ext4_clear_bit_atomic          ext2_clear_bit_atomic
  #define ext4_test_bit                  test_bit_le
-#define ext4_find_first_zero_bit       find_first_zero_bit_le
  #define ext4_find_next_zero_bit                find_next_zero_bit_le
  #define ext4_find_next_bit             find_next_bit_le
  
@@ -1397,6 +1407,7 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
  #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE     0x0040
  #define EXT4_FEATURE_RO_COMPAT_QUOTA           0x0100
  #define EXT4_FEATURE_RO_COMPAT_BIGALLOC                0x0200
+#define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM   0x0400
  
  #define EXT4_FEATURE_INCOMPAT_COMPRESSION      0x0001
  #define EXT4_FEATURE_INCOMPAT_FILETYPE         0x0002
@@ -1409,6 +1420,8 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
  #define EXT4_FEATURE_INCOMPAT_FLEX_BG          0x0200
  #define EXT4_FEATURE_INCOMPAT_EA_INODE         0x0400 /* EA in inode */
  #define EXT4_FEATURE_INCOMPAT_DIRDATA          0x1000 /* data in dirent */
+#define EXT4_FEATURE_INCOMPAT_INLINEDATA       0x2000 /* data in inode */
+#define EXT4_FEATURE_INCOMPAT_LARGEDIR         0x4000 /* >2GB or 3-lvl htree */
  
  #define EXT2_FEATURE_COMPAT_SUPP       EXT4_FEATURE_COMPAT_EXT_ATTR
  #define EXT2_FEATURE_INCOMPAT_SUPP     (EXT4_FEATURE_INCOMPAT_FILETYPE| \
@@ -1790,8 +1803,6 @@ extern void ext4_init_block_bitmap(struct super_block *sb,
  extern unsigned ext4_free_clusters_after_init(struct super_block *sb,
                                               ext4_group_t block_group,
                                               struct ext4_group_desc *gdp);
-extern unsigned ext4_num_base_meta_clusters(struct super_block *sb,
-                                           ext4_group_t block_group);
  extern unsigned ext4_num_overhead_clusters(struct super_block *sb,
                                            ext4_group_t block_group,
                                            struct ext4_group_desc *gdp);
@@ -1880,16 +1891,9 @@ extern int ext4_alloc_da_blocks(struct inode *inode);
  extern void ext4_set_aops(struct inode *inode);
  extern int ext4_writepage_trans_blocks(struct inode *);
  extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
-extern int ext4_block_truncate_page(handle_t *handle,
-               struct address_space *mapping, loff_t from);
-extern int ext4_block_zero_page_range(handle_t *handle,
-               struct address_space *mapping, loff_t from, loff_t length);
  extern int ext4_discard_partial_page_buffers(handle_t *handle,
                 struct address_space *mapping, loff_t from,
                 loff_t length, int flags);
-extern int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
-               struct inode *inode, struct page *page, loff_t from,
-               loff_t length, int flags);
  extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
  extern qsize_t *ext4_get_reserved_space(struct inode *inode);
  extern void ext4_da_update_reserve_space(struct inode *inode,
@@ -1924,6 +1928,7 @@ extern int ext4_group_add(struct super_block *sb,
  extern int ext4_group_extend(struct super_block *sb,
                                 struct ext4_super_block *es,
                                 ext4_fsblk_t n_blocks_count);
+extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count);
  
  /* super.c */
  extern void *ext4_kvmalloc(size_t size, gfp_t flags);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c

index 841faf5fb785058477ba59b205fe0e55dd1c0f0d..74f23c292e1b3000bb7bd9cf0e953df27488b698 100644 (file)
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3280,6 +3280,9 @@ static int ext4_find_delalloc_range(struct inode *inode,
         ext4_lblk_t i, pg_lblk;
         pgoff_t index;
  
+       if (!test_opt(inode->i_sb, DELALLOC))
+               return 0;
+
         /* reverse search wont work if fs block size is less than page size */
         if (inode->i_blkbits < PAGE_CACHE_SHIFT)
                 search_hint_reverse = 0;
@@ -3452,8 +3455,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
         int err = 0;
         ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
  
-       ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical"
-                 "block %llu, max_blocks %u, flags %d, allocated %u",
+       ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical "
+                 "block %llu, max_blocks %u, flags %x, allocated %u\n",
                   inode->i_ino, (unsigned long long)map->m_lblk, map->m_len,
                   flags, allocated);
         ext4_ext_show_leaf(inode, path);
@@ -3624,7 +3627,7 @@ static int get_implied_cluster_alloc(struct super_block *sb,
         struct ext4_sb_info *sbi = EXT4_SB(sb);
         ext4_lblk_t c_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
         ext4_lblk_t ex_cluster_start, ex_cluster_end;
-       ext4_lblk_t rr_cluster_start, rr_cluster_end;
+       ext4_lblk_t rr_cluster_start;
         ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
         ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
         unsigned short ee_len = ext4_ext_get_actual_len(ex);
@@ -3635,7 +3638,6 @@ static int get_implied_cluster_alloc(struct super_block *sb,
  
         /* The requested region passed into ext4_map_blocks() */
         rr_cluster_start = EXT4_B2C(sbi, map->m_lblk);
-       rr_cluster_end = EXT4_B2C(sbi, map->m_lblk + map->m_len - 1);
  
         if ((rr_cluster_start == ex_cluster_end) ||
             (rr_cluster_start == ex_cluster_start)) {
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c

index 4637af036d9c8ae67e1a93dacc9607b3086d68f0..25d8c9781ad94ea758781f906a34412743b3cda0 100644 (file)
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -252,7 +252,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
                 fatal = ext4_journal_get_write_access(handle, bh2);
         }
         ext4_lock_group(sb, block_group);
-       cleared = ext4_clear_bit(bit, bitmap_bh->b_data);
+       cleared = ext4_test_and_clear_bit(bit, bitmap_bh->b_data);
         if (fatal || !cleared) {
                 ext4_unlock_group(sb, block_group);
                 goto out;
@@ -358,7 +358,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
         struct ext4_sb_info *sbi = EXT4_SB(sb);
         ext4_group_t real_ngroups = ext4_get_groups_count(sb);
         int inodes_per_group = EXT4_INODES_PER_GROUP(sb);
-       unsigned int freei, avefreei;
+       unsigned int freei, avefreei, grp_free;
         ext4_fsblk_t freeb, avefreec;
         unsigned int ndirs;
         int max_dirs, min_inodes;
@@ -477,8 +477,8 @@ fallback_retry:
         for (i = 0; i < ngroups; i++) {
                 grp = (parent_group + i) % ngroups;
                 desc = ext4_get_group_desc(sb, grp, NULL);
-               if (desc && ext4_free_inodes_count(sb, desc) &&
-                   ext4_free_inodes_count(sb, desc) >= avefreei) {
+               grp_free = ext4_free_inodes_count(sb, desc);
+               if (desc && grp_free && grp_free >= avefreei) {
                         *group = grp;
                         return 0;
                 }
@@ -618,7 +618,7 @@ static int ext4_claim_inode(struct super_block *sb,
          */
         down_read(&grp->alloc_sem);
         ext4_lock_group(sb, group);
-       if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) {
+       if (ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data)) {
                 /* not a free inode */
                 retval = 1;
                 goto err_ret;
@@ -885,8 +885,12 @@ got:
         if (IS_DIRSYNC(inode))
                 ext4_handle_sync(handle);
         if (insert_inode_locked(inode) < 0) {
-               err = -EINVAL;
-               goto fail_drop;
+               /*
+                * Likely a bitmap corruption causing inode to be allocated
+                * twice.
+                */
+               err = -EIO;
+               goto fail;
         }
         spin_lock(&sbi->s_next_gen_lock);
         inode->i_generation = sbi->s_next_generation++;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c

index aa8efa6572d6d835f5be2a4867d81441a71bb19f..feaa82fe629d067e0900744fcbb50da2768b0183 100644 (file)
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -71,6 +71,9 @@ static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode);
  static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
  static int __ext4_journalled_writepage(struct page *page, unsigned int len);
  static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh);
+static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
+               struct inode *inode, struct page *page, loff_t from,
+               loff_t length, int flags);
  
  /*
   * Test whether an inode is a fast symlink.
@@ -2759,7 +2762,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
         if (!io_end || !size)
                 goto out;
  
-       ext_debug("ext4_end_io_dio(): io_end 0x%p"
+       ext_debug("ext4_end_io_dio(): io_end 0x%p "
                   "for inode %lu, iocb 0x%p, offset %llu, size %llu\n",
                   iocb->private, io_end->inode->i_ino, iocb, offset,
                   size);
@@ -3160,7 +3163,7 @@ int ext4_discard_partial_page_buffers(handle_t *handle,
   *
   * Returns zero on sucess or negative on failure.
   */
-int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
+static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
                 struct inode *inode, struct page *page, loff_t from,
                 loff_t length, int flags)
  {
@@ -3300,126 +3303,6 @@ next:
         return err;
  }
  
-/*
- * ext4_block_truncate_page() zeroes out a mapping from file offset `from'
- * up to the end of the block which corresponds to `from'.
- * This required during truncate. We need to physically zero the tail end
- * of that block so it doesn't yield old data if the file is later grown.
- */
-int ext4_block_truncate_page(handle_t *handle,
-               struct address_space *mapping, loff_t from)
-{
-       unsigned offset = from & (PAGE_CACHE_SIZE-1);
-       unsigned length;
-       unsigned blocksize;
-       struct inode *inode = mapping->host;
-
-       blocksize = inode->i_sb->s_blocksize;
-       length = blocksize - (offset & (blocksize - 1));
-
-       return ext4_block_zero_page_range(handle, mapping, from, length);
-}
-
-/*
- * ext4_block_zero_page_range() zeros out a mapping of length 'length'
- * starting from file offset 'from'.  The range to be zero'd must
- * be contained with in one block.  If the specified range exceeds
- * the end of the block it will be shortened to end of the block
- * that cooresponds to 'from'
- */
-int ext4_block_zero_page_range(handle_t *handle,
-               struct address_space *mapping, loff_t from, loff_t length)
-{
-       ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
-       unsigned offset = from & (PAGE_CACHE_SIZE-1);
-       unsigned blocksize, max, pos;
-       ext4_lblk_t iblock;
-       struct inode *inode = mapping->host;
-       struct buffer_head *bh;
-       struct page *page;
-       int err = 0;
-
-       page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT,
-                                  mapping_gfp_mask(mapping) & ~__GFP_FS);
-       if (!page)
-               return -ENOMEM;
-
-       blocksize = inode->i_sb->s_blocksize;
-       max = blocksize - (offset & (blocksize - 1));
-
-       /*
-        * correct length if it does not fall between
-        * 'from' and the end of the block
-        */
-       if (length > max || length < 0)
-               length = max;
-
-       iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
-
-       if (!page_has_buffers(page))
-               create_empty_buffers(page, blocksize, 0);
-
-       /* Find the buffer that contains "offset" */
-       bh = page_buffers(page);
-       pos = blocksize;
-       while (offset >= pos) {
-               bh = bh->b_this_page;
-               iblock++;
-               pos += blocksize;
-       }
-
-       err = 0;
-       if (buffer_freed(bh)) {
-               BUFFER_TRACE(bh, "freed: skip");
-               goto unlock;
-       }
-
-       if (!buffer_mapped(bh)) {
-               BUFFER_TRACE(bh, "unmapped");
-               ext4_get_block(inode, iblock, bh, 0);
-               /* unmapped? It's a hole - nothing to do */
-               if (!buffer_mapped(bh)) {
-                       BUFFER_TRACE(bh, "still unmapped");
-                       goto unlock;
-               }
-       }
-
-       /* Ok, it's mapped. Make sure it's up-to-date */
-       if (PageUptodate(page))
-               set_buffer_uptodate(bh);
-
-       if (!buffer_uptodate(bh)) {
-               err = -EIO;
-               ll_rw_block(READ, 1, &bh);
-               wait_on_buffer(bh);
-               /* Uhhuh. Read error. Complain and punt. */
-               if (!buffer_uptodate(bh))
-                       goto unlock;
-       }
-
-       if (ext4_should_journal_data(inode)) {
-               BUFFER_TRACE(bh, "get write access");
-               err = ext4_journal_get_write_access(handle, bh);
-               if (err)
-                       goto unlock;
-       }
-
-       zero_user(page, offset, length);
-
-       BUFFER_TRACE(bh, "zeroed end of block");
-
-       err = 0;
-       if (ext4_should_journal_data(inode)) {
-               err = ext4_handle_dirty_metadata(handle, inode, bh);
-       } else
-               mark_buffer_dirty(bh);
-
-unlock:
-       unlock_page(page);
-       page_cache_release(page);
-       return err;
-}
-
  int ext4_can_truncate(struct inode *inode)
  {
         if (S_ISREG(inode->i_mode))
@@ -4646,9 +4529,19 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
                 return 0;
         if (is_journal_aborted(journal))
                 return -EROFS;
+       /* We have to allocate physical blocks for delalloc blocks
+        * before flushing journal. otherwise delalloc blocks can not
+        * be allocated any more. even more truncate on delalloc blocks
+        * could trigger BUG by flushing delalloc blocks in journal.
+        * There is no delalloc block in non-journal data mode.
+        */
+       if (val && test_opt(inode->i_sb, DELALLOC)) {
+               err = ext4_alloc_da_blocks(inode);
+               if (err < 0)
+                       return err;
+       }
  
         jbd2_journal_lock_updates(journal);
-       jbd2_journal_flush(journal);
  
         /*
          * OK, there are no updates running now, and all cached data is
@@ -4660,8 +4553,10 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
  
         if (val)
                 ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
-       else
+       else {
+               jbd2_journal_flush(journal);
                 ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
+       }
         ext4_set_aops(inode);
  
         jbd2_journal_unlock_updates(journal);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c

index e87a932b073bcf7db1916db8ef1ac469c676ca63..6eee25591b8159bc96d35a16f94f94c0855a35b9 100644 (file)
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -18,6 +18,8 @@
  #include "ext4_jbd2.h"
  #include "ext4.h"
  
+#define MAX_32_NUM ((((unsigned long long) 1) << 32) - 1)
+
  long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
  {
         struct inode *inode = filp->f_dentry->d_inode;
@@ -186,19 +188,22 @@ setversion_out:
                 if (err)
                         return err;
  
-               if (get_user(n_blocks_count, (__u32 __user *)arg))
-                       return -EFAULT;
+               if (get_user(n_blocks_count, (__u32 __user *)arg)) {
+                       err = -EFAULT;
+                       goto group_extend_out;
+               }
  
                 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
                                EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
                         ext4_msg(sb, KERN_ERR,
                                  "Online resizing not supported with bigalloc");
-                       return -EOPNOTSUPP;
+                       err = -EOPNOTSUPP;
+                       goto group_extend_out;
                 }
  
                 err = mnt_want_write_file(filp);
                 if (err)
-                       return err;
+                       goto group_extend_out;
  
                 err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count);
                 if (EXT4_SB(sb)->s_journal) {
@@ -209,8 +214,8 @@ setversion_out:
                 if (err == 0)
                         err = err2;
                 mnt_drop_write_file(filp);
+group_extend_out:
                 ext4_resize_end(sb);
-
                 return err;
         }
  
@@ -251,8 +256,7 @@ setversion_out:
                 err = ext4_move_extents(filp, donor_filp, me.orig_start,
                                         me.donor_start, me.len, &me.moved_len);
                 mnt_drop_write_file(filp);
-               if (me.moved_len > 0)
-                       file_remove_suid(donor_filp);
+               mnt_drop_write(filp->f_path.mnt);
  
                 if (copy_to_user((struct move_extent __user *)arg,
                                  &me, sizeof(me)))
@@ -271,19 +275,22 @@ mext_out:
                         return err;
  
                 if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg,
-                               sizeof(input)))
-                       return -EFAULT;
+                               sizeof(input))) {
+                       err = -EFAULT;
+                       goto group_add_out;
+               }
  
                 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
                                EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
                         ext4_msg(sb, KERN_ERR,
                                  "Online resizing not supported with bigalloc");
-                       return -EOPNOTSUPP;
+                       err = -EOPNOTSUPP;
+                       goto group_add_out;
                 }
  
                 err = mnt_want_write_file(filp);
                 if (err)
-                       return err;
+                       goto group_add_out;
  
                 err = ext4_group_add(sb, &input);
                 if (EXT4_SB(sb)->s_journal) {
@@ -294,8 +301,8 @@ mext_out:
                 if (err == 0)
                         err = err2;
                 mnt_drop_write_file(filp);
+group_add_out:
                 ext4_resize_end(sb);
-
                 return err;
         }
  
@@ -335,6 +342,60 @@ mext_out:
                 return err;
         }
  
+       case EXT4_IOC_RESIZE_FS: {
+               ext4_fsblk_t n_blocks_count;
+               struct super_block *sb = inode->i_sb;
+               int err = 0, err2 = 0;
+
+               if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
+                              EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
+                       ext4_msg(sb, KERN_ERR,
+                                "Online resizing not (yet) supported with bigalloc");
+                       return -EOPNOTSUPP;
+               }
+
+               if (EXT4_HAS_INCOMPAT_FEATURE(sb,
+                              EXT4_FEATURE_INCOMPAT_META_BG)) {
+                       ext4_msg(sb, KERN_ERR,
+                                "Online resizing not (yet) supported with meta_bg");
+                       return -EOPNOTSUPP;
+               }
+
+               if (copy_from_user(&n_blocks_count, (__u64 __user *)arg,
+                                  sizeof(__u64))) {
+                       return -EFAULT;
+               }
+
+               if (n_blocks_count > MAX_32_NUM &&
+                   !EXT4_HAS_INCOMPAT_FEATURE(sb,
+                                              EXT4_FEATURE_INCOMPAT_64BIT)) {
+                       ext4_msg(sb, KERN_ERR,
+                                "File system only supports 32-bit block numbers");
+                       return -EOPNOTSUPP;
+               }
+
+               err = ext4_resize_begin(sb);
+               if (err)
+                       return err;
+
+               err = mnt_want_write(filp->f_path.mnt);
+               if (err)
+                       goto resizefs_out;
+
+               err = ext4_resize_fs(sb, n_blocks_count);
+               if (EXT4_SB(sb)->s_journal) {
+                       jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
+                       err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
+                       jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
+               }
+               if (err == 0)
+                       err = err2;
+               mnt_drop_write(filp->f_path.mnt);
+resizefs_out:
+               ext4_resize_end(sb);
+               return err;
+       }
+
         case FITRIM:
         {
                 struct request_queue *q = bdev_get_queue(sb->s_bdev);
@@ -433,6 +494,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
         }
         case EXT4_IOC_MOVE_EXT:
         case FITRIM:
+       case EXT4_IOC_RESIZE_FS:
                 break;
         default:
                 return -ENOIOCTLCMD;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c

index e2d8be8f28bfb8555644bef5100b2f9b8c2cbe9a..cb990b21c698bd9dd1ec0e4bb8488f6e82bbe2f7 100644 (file)
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3671,7 +3671,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
         ext4_group_t group;
         ext4_grpblk_t bit;
  
-       trace_ext4_mb_release_group_pa(pa);
+       trace_ext4_mb_release_group_pa(sb, pa);
         BUG_ON(pa->pa_deleted == 0);
         ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
         BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c

index 996780ab4f4e83cdfc114e83ab8cad35242f4813..f9d948f0eb861f08de3ef7b589b401846dc627f6 100644 (file)
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -134,6 +134,172 @@ static int verify_group_input(struct super_block *sb,
         return err;
  }
  
+/*
+ * ext4_new_flex_group_data is used by 64bit-resize interface to add a flex
+ * group each time.
+ */
+struct ext4_new_flex_group_data {
+       struct ext4_new_group_data *groups;     /* new_group_data for groups
+                                                  in the flex group */
+       __u16 *bg_flags;                        /* block group flags of groups
+                                                  in @groups */
+       ext4_group_t count;                     /* number of groups in @groups
+                                                */
+};
+
+/*
+ * alloc_flex_gd() allocates a ext4_new_flex_group_data with size of
+ * @flexbg_size.
+ *
+ * Returns NULL on failure otherwise address of the allocated structure.
+ */
+static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size)
+{
+       struct ext4_new_flex_group_data *flex_gd;
+
+       flex_gd = kmalloc(sizeof(*flex_gd), GFP_NOFS);
+       if (flex_gd == NULL)
+               goto out3;
+
+       flex_gd->count = flexbg_size;
+
+       flex_gd->groups = kmalloc(sizeof(struct ext4_new_group_data) *
+                                 flexbg_size, GFP_NOFS);
+       if (flex_gd->groups == NULL)
+               goto out2;
+
+       flex_gd->bg_flags = kmalloc(flexbg_size * sizeof(__u16), GFP_NOFS);
+       if (flex_gd->bg_flags == NULL)
+               goto out1;
+
+       return flex_gd;
+
+out1:
+       kfree(flex_gd->groups);
+out2:
+       kfree(flex_gd);
+out3:
+       return NULL;
+}
+
+static void free_flex_gd(struct ext4_new_flex_group_data *flex_gd)
+{
+       kfree(flex_gd->bg_flags);
+       kfree(flex_gd->groups);
+       kfree(flex_gd);
+}
+
+/*
+ * ext4_alloc_group_tables() allocates block bitmaps, inode bitmaps
+ * and inode tables for a flex group.
+ *
+ * This function is used by 64bit-resize.  Note that this function allocates
+ * group tables from the 1st group of groups contained by @flexgd, which may
+ * be a partial of a flex group.
+ *
+ * @sb: super block of fs to which the groups belongs
+ */
+static void ext4_alloc_group_tables(struct super_block *sb,
+                               struct ext4_new_flex_group_data *flex_gd,
+                               int flexbg_size)
+{
+       struct ext4_new_group_data *group_data = flex_gd->groups;
+       struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+       ext4_fsblk_t start_blk;
+       ext4_fsblk_t last_blk;
+       ext4_group_t src_group;
+       ext4_group_t bb_index = 0;
+       ext4_group_t ib_index = 0;
+       ext4_group_t it_index = 0;
+       ext4_group_t group;
+       ext4_group_t last_group;
+       unsigned overhead;
+
+       BUG_ON(flex_gd->count == 0 || group_data == NULL);
+
+       src_group = group_data[0].group;
+       last_group  = src_group + flex_gd->count - 1;
+
+       BUG_ON((flexbg_size > 1) && ((src_group & ~(flexbg_size - 1)) !=
+              (last_group & ~(flexbg_size - 1))));
+next_group:
+       group = group_data[0].group;
+       start_blk = ext4_group_first_block_no(sb, src_group);
+       last_blk = start_blk + group_data[src_group - group].blocks_count;
+
+       overhead = ext4_bg_has_super(sb, src_group) ?
+                  (1 + ext4_bg_num_gdb(sb, src_group) +
+                   le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
+
+       start_blk += overhead;
+
+       BUG_ON(src_group >= group_data[0].group + flex_gd->count);
+       /* We collect contiguous blocks as much as possible. */
+       src_group++;
+       for (; src_group <= last_group; src_group++)
+               if (!ext4_bg_has_super(sb, src_group))
+                       last_blk += group_data[src_group - group].blocks_count;
+               else
+                       break;
+
+       /* Allocate block bitmaps */
+       for (; bb_index < flex_gd->count; bb_index++) {
+               if (start_blk >= last_blk)
+                       goto next_group;
+               group_data[bb_index].block_bitmap = start_blk++;
+               ext4_get_group_no_and_offset(sb, start_blk - 1, &group, NULL);
+               group -= group_data[0].group;
+               group_data[group].free_blocks_count--;
+               if (flexbg_size > 1)
+                       flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT;
+       }
+
+       /* Allocate inode bitmaps */
+       for (; ib_index < flex_gd->count; ib_index++) {
+               if (start_blk >= last_blk)
+                       goto next_group;
+               group_data[ib_index].inode_bitmap = start_blk++;
+               ext4_get_group_no_and_offset(sb, start_blk - 1, &group, NULL);
+               group -= group_data[0].group;
+               group_data[group].free_blocks_count--;
+               if (flexbg_size > 1)
+                       flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT;
+       }
+
+       /* Allocate inode tables */
+       for (; it_index < flex_gd->count; it_index++) {
+               if (start_blk + EXT4_SB(sb)->s_itb_per_group > last_blk)
+                       goto next_group;
+               group_data[it_index].inode_table = start_blk;
+               ext4_get_group_no_and_offset(sb, start_blk, &group, NULL);
+               group -= group_data[0].group;
+               group_data[group].free_blocks_count -=
+                                       EXT4_SB(sb)->s_itb_per_group;
+               if (flexbg_size > 1)
+                       flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT;
+
+               start_blk += EXT4_SB(sb)->s_itb_per_group;
+       }
+
+       if (test_opt(sb, DEBUG)) {
+               int i;
+               group = group_data[0].group;
+
+               printk(KERN_DEBUG "EXT4-fs: adding a flex group with "
+                      "%d groups, flexbg size is %d:\n", flex_gd->count,
+                      flexbg_size);
+
+               for (i = 0; i < flex_gd->count; i++) {
+                       printk(KERN_DEBUG "adding %s group %u: %u "
+                              "blocks (%d free)\n",
+                              ext4_bg_has_super(sb, group + i) ? "normal" :
+                              "no-super", group + i,
+                              group_data[i].blocks_count,
+                              group_data[i].free_blocks_count);
+               }
+       }
+}
+
  static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
                                   ext4_fsblk_t blk)
  {
@@ -179,131 +345,250 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh)
  }
  
  /*
- * Set up the block and inode bitmaps, and the inode table for the new group.
+ * set_flexbg_block_bitmap() mark @count blocks starting from @block used.
+ *
+ * Helper function for ext4_setup_new_group_blocks() which set .
+ *
+ * @sb: super block
+ * @handle: journal handle
+ * @flex_gd: flex group data
+ */
+static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle,
+                       struct ext4_new_flex_group_data *flex_gd,
+                       ext4_fsblk_t block, ext4_group_t count)
+{
+       ext4_group_t count2;
+
+       ext4_debug("mark blocks [%llu/%u] used\n", block, count);
+       for (count2 = count; count > 0; count -= count2, block += count2) {
+               ext4_fsblk_t start;
+               struct buffer_head *bh;
+               ext4_group_t group;
+               int err;
+
+               ext4_get_group_no_and_offset(sb, block, &group, NULL);
+               start = ext4_group_first_block_no(sb, group);
+               group -= flex_gd->groups[0].group;
+
+               count2 = sb->s_blocksize * 8 - (block - start);
+               if (count2 > count)
+                       count2 = count;
+
+               if (flex_gd->bg_flags[group] & EXT4_BG_BLOCK_UNINIT) {
+                       BUG_ON(flex_gd->count > 1);
+                       continue;
+               }
+
+               err = extend_or_restart_transaction(handle, 1);
+               if (err)
+                       return err;
+
+               bh = sb_getblk(sb, flex_gd->groups[group].block_bitmap);
+               if (!bh)
+                       return -EIO;
+
+               err = ext4_journal_get_write_access(handle, bh);
+               if (err)
+                       return err;
+               ext4_debug("mark block bitmap %#04llx (+%llu/%u)\n", block,
+                          block - start, count2);
+               ext4_set_bits(bh->b_data, block - start, count2);
+
+               err = ext4_handle_dirty_metadata(handle, NULL, bh);
+               if (unlikely(err))
+                       return err;
+               brelse(bh);
+       }
+
+       return 0;
+}
+
+/*
+ * Set up the block and inode bitmaps, and the inode table for the new groups.
   * This doesn't need to be part of the main transaction, since we are only
   * changing blocks outside the actual filesystem.  We still do journaling to
   * ensure the recovery is correct in case of a failure just after resize.
   * If any part of this fails, we simply abort the resize.
+ *
+ * setup_new_flex_group_blocks handles a flex group as follow:
+ *  1. copy super block and GDT, and initialize group tables if necessary.
+ *     In this step, we only set bits in blocks bitmaps for blocks taken by
+ *     super block and GDT.
+ *  2. allocate group tables in block bitmaps, that is, set bits in block
+ *     bitmap for blocks taken by group tables.
   */
-static int setup_new_group_blocks(struct super_block *sb,
-                                 struct ext4_new_group_data *input)
+static int setup_new_flex_group_blocks(struct super_block *sb,
+                               struct ext4_new_flex_group_data *flex_gd)
  {
+       int group_table_count[] = {1, 1, EXT4_SB(sb)->s_itb_per_group};
+       ext4_fsblk_t start;
+       ext4_fsblk_t block;
         struct ext4_sb_info *sbi = EXT4_SB(sb);
-       ext4_fsblk_t start = ext4_group_first_block_no(sb, input->group);
-       int reserved_gdb = ext4_bg_has_super(sb, input->group) ?
-               le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0;
-       unsigned long gdblocks = ext4_bg_num_gdb(sb, input->group);
-       struct buffer_head *bh;
+       struct ext4_super_block *es = sbi->s_es;
+       struct ext4_new_group_data *group_data = flex_gd->groups;
+       __u16 *bg_flags = flex_gd->bg_flags;
         handle_t *handle;
-       ext4_fsblk_t block;
-       ext4_grpblk_t bit;
-       int i;
-       int err = 0, err2;
+       ext4_group_t group, count;
+       struct buffer_head *bh = NULL;
+       int reserved_gdb, i, j, err = 0, err2;
+
+       BUG_ON(!flex_gd->count || !group_data ||
+              group_data[0].group != sbi->s_groups_count);
+
+       reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks);
  
         /* This transaction may be extended/restarted along the way */
         handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA);
-
         if (IS_ERR(handle))
                 return PTR_ERR(handle);
  
-       BUG_ON(input->group != sbi->s_groups_count);
+       group = group_data[0].group;
+       for (i = 0; i < flex_gd->count; i++, group++) {
+               unsigned long gdblocks;
  
-       /* Copy all of the GDT blocks into the backup in this group */
-       for (i = 0, bit = 1, block = start + 1;
-            i < gdblocks; i++, block++, bit++) {
-               struct buffer_head *gdb;
+               gdblocks = ext4_bg_num_gdb(sb, group);
+               start = ext4_group_first_block_no(sb, group);
  
-               ext4_debug("update backup group %#04llx (+%d)\n", block, bit);
-               err = extend_or_restart_transaction(handle, 1);
-               if (err)
-                       goto exit_journal;
+               /* Copy all of the GDT blocks into the backup in this group */
+               for (j = 0, block = start + 1; j < gdblocks; j++, block++) {
+                       struct buffer_head *gdb;
  
-               gdb = sb_getblk(sb, block);
-               if (!gdb) {
-                       err = -EIO;
-                       goto exit_journal;
-               }
-               if ((err = ext4_journal_get_write_access(handle, gdb))) {
+                       ext4_debug("update backup group %#04llx\n", block);
+                       err = extend_or_restart_transaction(handle, 1);
+                       if (err)
+                               goto out;
+
+                       gdb = sb_getblk(sb, block);
+                       if (!gdb) {
+                               err = -EIO;
+                               goto out;
+                       }
+
+                       err = ext4_journal_get_write_access(handle, gdb);
+                       if (err) {
+                               brelse(gdb);
+                               goto out;
+                       }
+                       memcpy(gdb->b_data, sbi->s_group_desc[j]->b_data,
+                              gdb->b_size);
+                       set_buffer_uptodate(gdb);
+
+                       err = ext4_handle_dirty_metadata(handle, NULL, gdb);
+                       if (unlikely(err)) {
+                               brelse(gdb);
+                               goto out;
+                       }
                         brelse(gdb);
-                       goto exit_journal;
                 }
-               memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size);
-               set_buffer_uptodate(gdb);
-               err = ext4_handle_dirty_metadata(handle, NULL, gdb);
-               if (unlikely(err)) {
-                       brelse(gdb);
-                       goto exit_journal;
+
+               /* Zero out all of the reserved backup group descriptor
+                * table blocks
+                */
+               if (ext4_bg_has_super(sb, group)) {
+                       err = sb_issue_zeroout(sb, gdblocks + start + 1,
+                                       reserved_gdb, GFP_NOFS);
+                       if (err)
+                               goto out;
                 }
-               brelse(gdb);
-       }
  
-       /* Zero out all of the reserved backup group descriptor table blocks */
-       ext4_debug("clear inode table blocks %#04llx -> %#04lx\n",
-                       block, sbi->s_itb_per_group);
-       err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb,
-                              GFP_NOFS);
-       if (err)
-               goto exit_journal;
+               /* Initialize group tables of the grop @group */
+               if (!(bg_flags[i] & EXT4_BG_INODE_ZEROED))
+                       goto handle_bb;
  
-       err = extend_or_restart_transaction(handle, 2);
-       if (err)
-               goto exit_journal;
+               /* Zero out all of the inode table blocks */
+               block = group_data[i].inode_table;
+               ext4_debug("clear inode table blocks %#04llx -> %#04lx\n",
+                          block, sbi->s_itb_per_group);
+               err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group,
+                                      GFP_NOFS);
+               if (err)
+                       goto out;
  
-       bh = bclean(handle, sb, input->block_bitmap);
-       if (IS_ERR(bh)) {
-               err = PTR_ERR(bh);
-               goto exit_journal;
-       }
+handle_bb:
+               if (bg_flags[i] & EXT4_BG_BLOCK_UNINIT)
+                       goto handle_ib;
  
-       if (ext4_bg_has_super(sb, input->group)) {
-               ext4_debug("mark backup group tables %#04llx (+0)\n", start);
-               ext4_set_bits(bh->b_data, 0, gdblocks + reserved_gdb + 1);
-       }
+               /* Initialize block bitmap of the @group */
+               block = group_data[i].block_bitmap;
+               err = extend_or_restart_transaction(handle, 1);
+               if (err)
+                       goto out;
  
-       ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap,
-                  input->block_bitmap - start);
-       ext4_set_bit(input->block_bitmap - start, bh->b_data);
-       ext4_debug("mark inode bitmap %#04llx (+%llu)\n", input->inode_bitmap,
-                  input->inode_bitmap - start);
-       ext4_set_bit(input->inode_bitmap - start, bh->b_data);
-
-       /* Zero out all of the inode table blocks */
-       block = input->inode_table;
-       ext4_debug("clear inode table blocks %#04llx -> %#04lx\n",
-                       block, sbi->s_itb_per_group);
-       err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS);
-       if (err)
-               goto exit_bh;
-       ext4_set_bits(bh->b_data, input->inode_table - start,
-                     sbi->s_itb_per_group);
+               bh = bclean(handle, sb, block);
+               if (IS_ERR(bh)) {
+                       err = PTR_ERR(bh);
+                       goto out;
+               }
+               if (ext4_bg_has_super(sb, group)) {
+                       ext4_debug("mark backup superblock %#04llx (+0)\n",
+                                  start);
+                       ext4_set_bits(bh->b_data, 0, gdblocks + reserved_gdb +
+                                                    1);
+               }
+               ext4_mark_bitmap_end(group_data[i].blocks_count,
+                                    sb->s_blocksize * 8, bh->b_data);
+               err = ext4_handle_dirty_metadata(handle, NULL, bh);
+               if (err)
+                       goto out;
+               brelse(bh);
  
+handle_ib:
+               if (bg_flags[i] & EXT4_BG_INODE_UNINIT)
+                       continue;
  
-       ext4_mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8,
-                            bh->b_data);
-       err = ext4_handle_dirty_metadata(handle, NULL, bh);
-       if (unlikely(err)) {
-               ext4_std_error(sb, err);
-               goto exit_bh;
+               /* Initialize inode bitmap of the @group */
+               block = group_data[i].inode_bitmap;
+               err = extend_or_restart_transaction(handle, 1);
+               if (err)
+                       goto out;
+               /* Mark unused entries in inode bitmap used */
+               bh = bclean(handle, sb, block);
+               if (IS_ERR(bh)) {
+                       err = PTR_ERR(bh);
+                       goto out;
+               }
+
+               ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb),
+                                    sb->s_blocksize * 8, bh->b_data);
+               err = ext4_handle_dirty_metadata(handle, NULL, bh);
+               if (err)
+                       goto out;
+               brelse(bh);
         }
-       brelse(bh);
-       /* Mark unused entries in inode bitmap used */
-       ext4_debug("clear inode bitmap %#04llx (+%llu)\n",
-                  input->inode_bitmap, input->inode_bitmap - start);
-       if (IS_ERR(bh = bclean(handle, sb, input->inode_bitmap))) {
-               err = PTR_ERR(bh);
-               goto exit_journal;
+       bh = NULL;
+
+       /* Mark group tables in block bitmap */
+       for (j = 0; j < GROUP_TABLE_COUNT; j++) {
+               count = group_table_count[j];
+               start = (&group_data[0].block_bitmap)[j];
+               block = start;
+               for (i = 1; i < flex_gd->count; i++) {
+                       block += group_table_count[j];
+                       if (block == (&group_data[i].block_bitmap)[j]) {
+                               count += group_table_count[j];
+                               continue;
+                       }
+                       err = set_flexbg_block_bitmap(sb, handle,
+                                               flex_gd, start, count);
+                       if (err)
+                               goto out;
+                       count = group_table_count[j];
+                       start = group_data[i].block_bitmap;
+                       block = start;
+               }
+
+               if (count) {
+                       err = set_flexbg_block_bitmap(sb, handle,
+                                               flex_gd, start, count);
+                       if (err)
+                               goto out;
+               }
         }
  
-       ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
-                            bh->b_data);
-       err = ext4_handle_dirty_metadata(handle, NULL, bh);
-       if (unlikely(err))
-               ext4_std_error(sb, err);
-exit_bh:
+out:
         brelse(bh);
-
-exit_journal:
-       if ((err2 = ext4_journal_stop(handle)) && !err)
+       err2 = ext4_journal_stop(handle);
+       if (err2 && !err)
                 err = err2;
  
         return err;
@@ -351,10 +636,10 @@ static unsigned ext4_list_backups(struct super_block *sb, unsigned *three,
   * groups in current filesystem that have BACKUPS, or -ve error code.
   */
  static int verify_reserved_gdb(struct super_block *sb,
+                              ext4_group_t end,
                                struct buffer_head *primary)
  {
         const ext4_fsblk_t blk = primary->b_blocknr;
-       const ext4_group_t end = EXT4_SB(sb)->s_groups_count;
         unsigned three = 1;
         unsigned five = 5;
         unsigned seven = 7;
@@ -429,7 +714,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
         if (!gdb_bh)
                 return -EIO;
  
-       gdbackups = verify_reserved_gdb(sb, gdb_bh);
+       gdbackups = verify_reserved_gdb(sb, group, gdb_bh);
         if (gdbackups < 0) {
                 err = gdbackups;
                 goto exit_bh;
@@ -592,7 +877,8 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
                         err = -EIO;
                         goto exit_bh;
                 }
-               if ((gdbackups = verify_reserved_gdb(sb, primary[res])) < 0) {
+               gdbackups = verify_reserved_gdb(sb, group, primary[res]);
+               if (gdbackups < 0) {
                         brelse(primary[res]);
                         err = gdbackups;
                         goto exit_bh;
@@ -735,6 +1021,348 @@ exit_err:
         }
  }
  
+/*
+ * ext4_add_new_descs() adds @count group descriptor of groups
+ * starting at @group
+ *
+ * @handle: journal handle
+ * @sb: super block
+ * @group: the group no. of the first group desc to be added
+ * @resize_inode: the resize inode
+ * @count: number of group descriptors to be added
+ */
+static int ext4_add_new_descs(handle_t *handle, struct super_block *sb,
+                             ext4_group_t group, struct inode *resize_inode,
+                             ext4_group_t count)
+{
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       struct ext4_super_block *es = sbi->s_es;
+       struct buffer_head *gdb_bh;
+       int i, gdb_off, gdb_num, err = 0;
+
+       for (i = 0; i < count; i++, group++) {
+               int reserved_gdb = ext4_bg_has_super(sb, group) ?
+                       le16_to_cpu(es->s_reserved_gdt_blocks) : 0;
+
+               gdb_off = group % EXT4_DESC_PER_BLOCK(sb);
+               gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
+
+               /*
+                * We will only either add reserved group blocks to a backup group
+                * or remove reserved blocks for the first group in a new group block.
+                * Doing both would be mean more complex code, and sane people don't
+                * use non-sparse filesystems anymore.  This is already checked above.
+                */
+               if (gdb_off) {
+                       gdb_bh = sbi->s_group_desc[gdb_num];
+                       err = ext4_journal_get_write_access(handle, gdb_bh);
+
+                       if (!err && reserved_gdb && ext4_bg_num_gdb(sb, group))
+                               err = reserve_backup_gdb(handle, resize_inode, group);
+               } else
+                       err = add_new_gdb(handle, resize_inode, group);
+               if (err)
+                       break;
+       }
+       return err;
+}
+
+/*
+ * ext4_setup_new_descs() will set up the group descriptor descriptors of a flex bg
+ */
+static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb,
+                               struct ext4_new_flex_group_data *flex_gd)
+{
+       struct ext4_new_group_data      *group_data = flex_gd->groups;
+       struct ext4_group_desc          *gdp;
+       struct ext4_sb_info             *sbi = EXT4_SB(sb);
+       struct buffer_head              *gdb_bh;
+       ext4_group_t                    group;
+       __u16                           *bg_flags = flex_gd->bg_flags;
+       int                             i, gdb_off, gdb_num, err = 0;
+       
+
+       for (i = 0; i < flex_gd->count; i++, group_data++, bg_flags++) {
+               group = group_data->group;
+
+               gdb_off = group % EXT4_DESC_PER_BLOCK(sb);
+               gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
+
+               /*
+                * get_write_access() has been called on gdb_bh by ext4_add_new_desc().
+                */
+               gdb_bh = sbi->s_group_desc[gdb_num];
+               /* Update group descriptor block for new group */
+               gdp = (struct ext4_group_desc *)((char *)gdb_bh->b_data +
+                                                gdb_off * EXT4_DESC_SIZE(sb));
+
+               memset(gdp, 0, EXT4_DESC_SIZE(sb));
+               ext4_block_bitmap_set(sb, gdp, group_data->block_bitmap);
+               ext4_inode_bitmap_set(sb, gdp, group_data->inode_bitmap);
+               ext4_inode_table_set(sb, gdp, group_data->inode_table);
+               ext4_free_group_clusters_set(sb, gdp,
+                                            EXT4_B2C(sbi, group_data->free_blocks_count));
+               ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb));
+               gdp->bg_flags = cpu_to_le16(*bg_flags);
+               gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
+
+               err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh);
+               if (unlikely(err)) {
+                       ext4_std_error(sb, err);
+                       break;
+               }
+
+               /*
+                * We can allocate memory for mb_alloc based on the new group
+                * descriptor
+                */
+               err = ext4_mb_add_groupinfo(sb, group, gdp);
+               if (err)
+                       break;
+       }
+       return err;
+}
+
+/*
+ * ext4_update_super() updates the super block so that the newly added
+ * groups can be seen by the filesystem.
+ *
+ * @sb: super block
+ * @flex_gd: new added groups
+ */
+static void ext4_update_super(struct super_block *sb,
+                            struct ext4_new_flex_group_data *flex_gd)
+{
+       ext4_fsblk_t blocks_count = 0;
+       ext4_fsblk_t free_blocks = 0;
+       ext4_fsblk_t reserved_blocks = 0;
+       struct ext4_new_group_data *group_data = flex_gd->groups;
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       struct ext4_super_block *es = sbi->s_es;
+       int i;
+
+       BUG_ON(flex_gd->count == 0 || group_data == NULL);
+       /*
+        * Make the new blocks and inodes valid next.  We do this before
+        * increasing the group count so that once the group is enabled,
+        * all of its blocks and inodes are already valid.
+        *
+        * We always allocate group-by-group, then block-by-block or
+        * inode-by-inode within a group, so enabling these
+        * blocks/inodes before the group is live won't actually let us
+        * allocate the new space yet.
+        */
+       for (i = 0; i < flex_gd->count; i++) {
+               blocks_count += group_data[i].blocks_count;
+               free_blocks += group_data[i].free_blocks_count;
+       }
+
+       reserved_blocks = ext4_r_blocks_count(es) * 100;
+       do_div(reserved_blocks, ext4_blocks_count(es));
+       reserved_blocks *= blocks_count;
+       do_div(reserved_blocks, 100);
+
+       ext4_blocks_count_set(es, ext4_blocks_count(es) + blocks_count);
+       le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb) *
+                    flex_gd->count);
+
+       /*
+        * We need to protect s_groups_count against other CPUs seeing
+        * inconsistent state in the superblock.
+        *
+        * The precise rules we use are:
+        *
+        * * Writers must perform a smp_wmb() after updating all
+        *   dependent data and before modifying the groups count
+        *
+        * * Readers must perform an smp_rmb() after reading the groups
+        *   count and before reading any dependent data.
+        *
+        * NB. These rules can be relaxed when checking the group count
+        * while freeing data, as we can only allocate from a block
+        * group after serialising against the group count, and we can
+        * only then free after serialising in turn against that
+        * allocation.
+        */
+       smp_wmb();
+
+       /* Update the global fs size fields */
+       sbi->s_groups_count += flex_gd->count;
+
+       /* Update the reserved block counts only once the new group is
+        * active. */
+       ext4_r_blocks_count_set(es, ext4_r_blocks_count(es) +
+                               reserved_blocks);
+
+       /* Update the free space counts */
+       percpu_counter_add(&sbi->s_freeclusters_counter,
+                          EXT4_B2C(sbi, free_blocks));
+       percpu_counter_add(&sbi->s_freeinodes_counter,
+                          EXT4_INODES_PER_GROUP(sb) * flex_gd->count);
+
+       if (EXT4_HAS_INCOMPAT_FEATURE(sb,
+                                     EXT4_FEATURE_INCOMPAT_FLEX_BG) &&
+           sbi->s_log_groups_per_flex) {
+               ext4_group_t flex_group;
+               flex_group = ext4_flex_group(sbi, group_data[0].group);
+               atomic_add(EXT4_B2C(sbi, free_blocks),
+                          &sbi->s_flex_groups[flex_group].free_clusters);
+               atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count,
+                          &sbi->s_flex_groups[flex_group].free_inodes);
+       }
+
+       if (test_opt(sb, DEBUG))
+               printk(KERN_DEBUG "EXT4-fs: added group %u:"
+                      "%llu blocks(%llu free %llu reserved)\n", flex_gd->count,
+                      blocks_count, free_blocks, reserved_blocks);
+}
+
+/* Add a flex group to an fs. Ensure we handle all possible error conditions
+ * _before_ we start modifying the filesystem, because we cannot abort the
+ * transaction and not have it write the data to disk.
+ */
+static int ext4_flex_group_add(struct super_block *sb,
+                              struct inode *resize_inode,
+                              struct ext4_new_flex_group_data *flex_gd)
+{
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       struct ext4_super_block *es = sbi->s_es;
+       ext4_fsblk_t o_blocks_count;
+       ext4_grpblk_t last;
+       ext4_group_t group;
+       handle_t *handle;
+       unsigned reserved_gdb;
+       int err = 0, err2 = 0, credit;
+
+       BUG_ON(!flex_gd->count || !flex_gd->groups || !flex_gd->bg_flags);
+
+       reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks);
+       o_blocks_count = ext4_blocks_count(es);
+       ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last);
+       BUG_ON(last);
+
+       err = setup_new_flex_group_blocks(sb, flex_gd);
+       if (err)
+               goto exit;
+       /*
+        * We will always be modifying at least the superblock and  GDT
+        * block.  If we are adding a group past the last current GDT block,
+        * we will also modify the inode and the dindirect block.  If we
+        * are adding a group with superblock/GDT backups  we will also
+        * modify each of the reserved GDT dindirect blocks.
+        */
+       credit = flex_gd->count * 4 + reserved_gdb;
+       handle = ext4_journal_start_sb(sb, credit);
+       if (IS_ERR(handle)) {
+               err = PTR_ERR(handle);
+               goto exit;
+       }
+
+       err = ext4_journal_get_write_access(handle, sbi->s_sbh);
+       if (err)
+               goto exit_journal;
+
+       group = flex_gd->groups[0].group;
+       BUG_ON(group != EXT4_SB(sb)->s_groups_count);
+       err = ext4_add_new_descs(handle, sb, group,
+                               resize_inode, flex_gd->count);
+       if (err)
+               goto exit_journal;
+
+       err = ext4_setup_new_descs(handle, sb, flex_gd);
+       if (err)
+               goto exit_journal;
+
+       ext4_update_super(sb, flex_gd);
+
+       err = ext4_handle_dirty_super(handle, sb);
+
+exit_journal:
+       err2 = ext4_journal_stop(handle);
+       if (!err)
+               err = err2;
+
+       if (!err) {
+               int i;
+               update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es,
+                              sizeof(struct ext4_super_block));
+               for (i = 0; i < flex_gd->count; i++, group++) {
+                       struct buffer_head *gdb_bh;
+                       int gdb_num;
+                       gdb_num = group / EXT4_BLOCKS_PER_GROUP(sb);
+                       gdb_bh = sbi->s_group_desc[gdb_num];
+                       update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data,
+                                      gdb_bh->b_size);
+               }
+       }
+exit:
+       return err;
+}
+
+static int ext4_setup_next_flex_gd(struct super_block *sb,
+                                   struct ext4_new_flex_group_data *flex_gd,
+                                   ext4_fsblk_t n_blocks_count,
+                                   unsigned long flexbg_size)
+{
+       struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+       struct ext4_new_group_data *group_data = flex_gd->groups;
+       ext4_fsblk_t o_blocks_count;
+       ext4_group_t n_group;
+       ext4_group_t group;
+       ext4_group_t last_group;
+       ext4_grpblk_t last;
+       ext4_grpblk_t blocks_per_group;
+       unsigned long i;
+
+       blocks_per_group = EXT4_BLOCKS_PER_GROUP(sb);
+
+       o_blocks_count = ext4_blocks_count(es);
+
+       if (o_blocks_count == n_blocks_count)
+               return 0;
+
+       ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last);
+       BUG_ON(last);
+       ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &last);
+
+       last_group = group | (flexbg_size - 1);
+       if (last_group > n_group)
+               last_group = n_group;
+
+       flex_gd->count = last_group - group + 1;
+
+       for (i = 0; i < flex_gd->count; i++) {
+               int overhead;
+
+               group_data[i].group = group + i;
+               group_data[i].blocks_count = blocks_per_group;
+               overhead = ext4_bg_has_super(sb, group + i) ?
+                          (1 + ext4_bg_num_gdb(sb, group + i) +
+                           le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
+               group_data[i].free_blocks_count = blocks_per_group - overhead;
+               if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
+                                              EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
+                       flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT |
+                                              EXT4_BG_INODE_UNINIT;
+               else
+                       flex_gd->bg_flags[i] = EXT4_BG_INODE_ZEROED;
+       }
+
+       if (last_group == n_group &&
+           EXT4_HAS_RO_COMPAT_FEATURE(sb,
+                                      EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
+               /* We need to initialize block bitmap of last group. */
+               flex_gd->bg_flags[i - 1] &= ~EXT4_BG_BLOCK_UNINIT;
+
+       if ((last_group == n_group) && (last != blocks_per_group - 1)) {
+               group_data[i - 1].blocks_count = last + 1;
+               group_data[i - 1].free_blocks_count -= blocks_per_group-
+                                       last - 1;
+       }
+
+       return 1;
+}
+
  /* Add group descriptor data to an existing or new group descriptor block.
   * Ensure we handle all possible error conditions _before_ we start modifying
   * the filesystem, because we cannot abort the transaction and not have it
@@ -750,16 +1378,15 @@ exit_err:
   */
  int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
  {
+       struct ext4_new_flex_group_data flex_gd;
         struct ext4_sb_info *sbi = EXT4_SB(sb);
         struct ext4_super_block *es = sbi->s_es;
         int reserved_gdb = ext4_bg_has_super(sb, input->group) ?
                 le16_to_cpu(es->s_reserved_gdt_blocks) : 0;
-       struct buffer_head *primary = NULL;
-       struct ext4_group_desc *gdp;
         struct inode *inode = NULL;
-       handle_t *handle;
         int gdb_off, gdb_num;
-       int err, err2;
+       int err;
+       __u16 bg_flags = 0;
  
         gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb);
         gdb_off = input->group % EXT4_DESC_PER_BLOCK(sb);
@@ -798,175 +1425,69 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
         }
  
  
-       if ((err = verify_group_input(sb, input)))
-               goto exit_put;
+       err = verify_group_input(sb, input);
+       if (err)
+               goto out;
  
-       if ((err = setup_new_group_blocks(sb, input)))
-               goto exit_put;
+       flex_gd.count = 1;
+       flex_gd.groups = input;
+       flex_gd.bg_flags = &bg_flags;
+       err = ext4_flex_group_add(sb, inode, &flex_gd);
+out:
+       iput(inode);
+       return err;
+} /* ext4_group_add */
  
-       /*
-        * We will always be modifying at least the superblock and a GDT
-        * block.  If we are adding a group past the last current GDT block,
-        * we will also modify the inode and the dindirect block.  If we
-        * are adding a group with superblock/GDT backups  we will also
-        * modify each of the reserved GDT dindirect blocks.
+/*
+ * extend a group without checking assuming that checking has been done.
+ */
+static int ext4_group_extend_no_check(struct super_block *sb,
+                                     ext4_fsblk_t o_blocks_count, ext4_grpblk_t add)
+{
+       struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+       handle_t *handle;
+       int err = 0, err2;
+
+       /* We will update the superblock, one block bitmap, and
+        * one group descriptor via ext4_group_add_blocks().
          */
-       handle = ext4_journal_start_sb(sb,
-                                      ext4_bg_has_super(sb, input->group) ?
-                                      3 + reserved_gdb : 4);
+       handle = ext4_journal_start_sb(sb, 3);
         if (IS_ERR(handle)) {
                 err = PTR_ERR(handle);
-               goto exit_put;
+               ext4_warning(sb, "error %d on journal start", err);
+               return err;
         }
  
-       if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh)))
-               goto exit_journal;
-
-        /*
-         * We will only either add reserved group blocks to a backup group
-         * or remove reserved blocks for the first group in a new group block.
-         * Doing both would be mean more complex code, and sane people don't
-         * use non-sparse filesystems anymore.  This is already checked above.
-         */
-       if (gdb_off) {
-               primary = sbi->s_group_desc[gdb_num];
-               if ((err = ext4_journal_get_write_access(handle, primary)))
-                       goto exit_journal;
-
-               if (reserved_gdb && ext4_bg_num_gdb(sb, input->group)) {
-                       err = reserve_backup_gdb(handle, inode, input->group);
-                       if (err)
-                               goto exit_journal;
-               }
-       } else {
-               /*
-                * Note that we can access new group descriptor block safely
-                * only if add_new_gdb() succeeds.
-                */
-               err = add_new_gdb(handle, inode, input->group);
-               if (err)
-                       goto exit_journal;
-               primary = sbi->s_group_desc[gdb_num];
+       err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
+       if (err) {
+               ext4_warning(sb, "error %d on journal write access", err);
+               goto errout;
         }
  
-        /*
-         * OK, now we've set up the new group.  Time to make it active.
-         *
-         * so we have to be safe wrt. concurrent accesses the group
-         * data.  So we need to be careful to set all of the relevant
-         * group descriptor data etc. *before* we enable the group.
-         *
-         * The key field here is sbi->s_groups_count: as long as
-         * that retains its old value, nobody is going to access the new
-         * group.
-         *
-         * So first we update all the descriptor metadata for the new
-         * group; then we update the total disk blocks count; then we
-         * update the groups count to enable the group; then finally we
-         * update the free space counts so that the system can start
-         * using the new disk blocks.
-         */
-
-       /* Update group descriptor block for new group */
-       gdp = (struct ext4_group_desc *)((char *)primary->b_data +
-                                        gdb_off * EXT4_DESC_SIZE(sb));
-
-       memset(gdp, 0, EXT4_DESC_SIZE(sb));
-       ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */
-       ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */
-       ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */
-       ext4_free_group_clusters_set(sb, gdp, input->free_blocks_count);
-       ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb));
-       gdp->bg_flags = cpu_to_le16(EXT4_BG_INODE_ZEROED);
-       gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp);
-
-       /*
-        * We can allocate memory for mb_alloc based on the new group
-        * descriptor
-        */
-       err = ext4_mb_add_groupinfo(sb, input->group, gdp);
+       ext4_blocks_count_set(es, o_blocks_count + add);
+       ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
+                  o_blocks_count + add);
+       /* We add the blocks to the bitmap and set the group need init bit */
+       err = ext4_group_add_blocks(handle, sb, o_blocks_count, add);
         if (err)
-               goto exit_journal;
-
-       /*
-        * Make the new blocks and inodes valid next.  We do this before
-        * increasing the group count so that once the group is enabled,
-        * all of its blocks and inodes are already valid.
-        *
-        * We always allocate group-by-group, then block-by-block or
-        * inode-by-inode within a group, so enabling these
-        * blocks/inodes before the group is live won't actually let us
-        * allocate the new space yet.
-        */
-       ext4_blocks_count_set(es, ext4_blocks_count(es) +
-               input->blocks_count);
-       le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb));
-
-       /*
-        * We need to protect s_groups_count against other CPUs seeing
-        * inconsistent state in the superblock.
-        *
-        * The precise rules we use are:
-        *
-        * * Writers must perform a smp_wmb() after updating all dependent
-        *   data and before modifying the groups count
-        *
-        * * Readers must perform an smp_rmb() after reading the groups count
-        *   and before reading any dependent data.
-        *
-        * NB. These rules can be relaxed when checking the group count
-        * while freeing data, as we can only allocate from a block
-        * group after serialising against the group count, and we can
-        * only then free after serialising in turn against that
-        * allocation.
-        */
-       smp_wmb();
-
-       /* Update the global fs size fields */
-       sbi->s_groups_count++;
-
-       err = ext4_handle_dirty_metadata(handle, NULL, primary);
-       if (unlikely(err)) {
-               ext4_std_error(sb, err);
-               goto exit_journal;
-       }
-
-       /* Update the reserved block counts only once the new group is
-        * active. */
-       ext4_r_blocks_count_set(es, ext4_r_blocks_count(es) +
-               input->reserved_blocks);
-
-       /* Update the free space counts */
-       percpu_counter_add(&sbi->s_freeclusters_counter,
-                          EXT4_B2C(sbi, input->free_blocks_count));
-       percpu_counter_add(&sbi->s_freeinodes_counter,
-                          EXT4_INODES_PER_GROUP(sb));
-
-       if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) &&
-           sbi->s_log_groups_per_flex) {
-               ext4_group_t flex_group;
-               flex_group = ext4_flex_group(sbi, input->group);
-               atomic_add(EXT4_B2C(sbi, input->free_blocks_count),
-                          &sbi->s_flex_groups[flex_group].free_clusters);
-               atomic_add(EXT4_INODES_PER_GROUP(sb),
-                          &sbi->s_flex_groups[flex_group].free_inodes);
-       }
-
+               goto errout;
         ext4_handle_dirty_super(handle, sb);
-
-exit_journal:
-       if ((err2 = ext4_journal_stop(handle)) && !err)
+       ext4_debug("freed blocks %llu through %llu\n", o_blocks_count,
+                  o_blocks_count + add);
+errout:
+       err2 = ext4_journal_stop(handle);
+       if (err2 && !err)
                 err = err2;
-       if (!err && primary) {
-               update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es,
+
+       if (!err) {
+               if (test_opt(sb, DEBUG))
+                       printk(KERN_DEBUG "EXT4-fs: extended group to %llu "
+                              "blocks\n", ext4_blocks_count(es));
+               update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, (char *)es,
                                sizeof(struct ext4_super_block));
-               update_backups(sb, primary->b_blocknr, primary->b_data,
-                              primary->b_size);
         }
-exit_put:
-       iput(inode);
         return err;
-} /* ext4_group_add */
+}
  
  /*
   * Extend the filesystem to the new number of blocks specified.  This entry
@@ -985,8 +1506,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
         ext4_grpblk_t last;
         ext4_grpblk_t add;
         struct buffer_head *bh;
-       handle_t *handle;
-       int err, err2;
+       int err;
         ext4_group_t group;
  
         o_blocks_count = ext4_blocks_count(es);
@@ -1042,42 +1562,119 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
         }
         brelse(bh);
  
-       /* We will update the superblock, one block bitmap, and
-        * one group descriptor via ext4_free_blocks().
-        */
-       handle = ext4_journal_start_sb(sb, 3);
-       if (IS_ERR(handle)) {
-               err = PTR_ERR(handle);
-               ext4_warning(sb, "error %d on journal start", err);
-               goto exit_put;
+       err = ext4_group_extend_no_check(sb, o_blocks_count, add);
+       return err;
+} /* ext4_group_extend */
+
+/*
+ * ext4_resize_fs() resizes a fs to new size specified by @n_blocks_count
+ *
+ * @sb: super block of the fs to be resized
+ * @n_blocks_count: the number of blocks resides in the resized fs
+ */
+int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
+{
+       struct ext4_new_flex_group_data *flex_gd = NULL;
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       struct ext4_super_block *es = sbi->s_es;
+       struct buffer_head *bh;
+       struct inode *resize_inode;
+       ext4_fsblk_t o_blocks_count;
+       ext4_group_t o_group;
+       ext4_group_t n_group;
+       ext4_grpblk_t offset;
+       unsigned long n_desc_blocks;
+       unsigned long o_desc_blocks;
+       unsigned long desc_blocks;
+       int err = 0, flexbg_size = 1;
+
+       o_blocks_count = ext4_blocks_count(es);
+
+       if (test_opt(sb, DEBUG))
+               printk(KERN_DEBUG "EXT4-fs: resizing filesystem from %llu "
+                      "upto %llu blocks\n", o_blocks_count, n_blocks_count);
+
+       if (n_blocks_count < o_blocks_count) {
+               /* On-line shrinking not supported */
+               ext4_warning(sb, "can't shrink FS - resize aborted");
+               return -EINVAL;
         }
  
-       if ((err = ext4_journal_get_write_access(handle,
-                                                EXT4_SB(sb)->s_sbh))) {
-               ext4_warning(sb, "error %d on journal write access", err);
-               ext4_journal_stop(handle);
-               goto exit_put;
+       if (n_blocks_count == o_blocks_count)
+               /* Nothing need to do */
+               return 0;
+
+       ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &offset);
+       ext4_get_group_no_and_offset(sb, o_blocks_count, &o_group, &offset);
+
+       n_desc_blocks = (n_group + EXT4_DESC_PER_BLOCK(sb)) /
+                       EXT4_DESC_PER_BLOCK(sb);
+       o_desc_blocks = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
+                       EXT4_DESC_PER_BLOCK(sb);
+       desc_blocks = n_desc_blocks - o_desc_blocks;
+
+       if (desc_blocks &&
+           (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE) ||
+            le16_to_cpu(es->s_reserved_gdt_blocks) < desc_blocks)) {
+               ext4_warning(sb, "No reserved GDT blocks, can't resize");
+               return -EPERM;
         }
-       ext4_blocks_count_set(es, o_blocks_count + add);
-       ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
-                  o_blocks_count + add);
-       /* We add the blocks to the bitmap and set the group need init bit */
-       err = ext4_group_add_blocks(handle, sb, o_blocks_count, add);
-       ext4_handle_dirty_super(handle, sb);
-       ext4_debug("freed blocks %llu through %llu\n", o_blocks_count,
-                  o_blocks_count + add);
-       err2 = ext4_journal_stop(handle);
-       if (!err && err2)
-               err = err2;
  
-       if (err)
-               goto exit_put;
+       resize_inode = ext4_iget(sb, EXT4_RESIZE_INO);
+       if (IS_ERR(resize_inode)) {
+               ext4_warning(sb, "Error opening resize inode");
+               return PTR_ERR(resize_inode);
+       }
  
+       /* See if the device is actually as big as what was requested */
+       bh = sb_bread(sb, n_blocks_count - 1);
+       if (!bh) {
+               ext4_warning(sb, "can't read last block, resize aborted");
+               return -ENOSPC;
+       }
+       brelse(bh);
+
+       if (offset != 0) {
+               /* extend the last group */
+               ext4_grpblk_t add;
+               add = EXT4_BLOCKS_PER_GROUP(sb) - offset;
+               err = ext4_group_extend_no_check(sb, o_blocks_count, add);
+               if (err)
+                       goto out;
+       }
+
+       if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) &&
+           es->s_log_groups_per_flex)
+               flexbg_size = 1 << es->s_log_groups_per_flex;
+
+       o_blocks_count = ext4_blocks_count(es);
+       if (o_blocks_count == n_blocks_count)
+               goto out;
+
+       flex_gd = alloc_flex_gd(flexbg_size);
+       if (flex_gd == NULL) {
+               err = -ENOMEM;
+               goto out;
+       }
+
+       /* Add flex groups. Note that a regular group is a
+        * flex group with 1 group.
+        */
+       while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count,
+                                             flexbg_size)) {
+               ext4_alloc_group_tables(sb, flex_gd, flexbg_size);
+               err = ext4_flex_group_add(sb, resize_inode, flex_gd);
+               if (unlikely(err))
+                       break;
+       }
+
+out:
+       if (flex_gd)
+               free_flex_gd(flex_gd);
+
+       iput(resize_inode);
         if (test_opt(sb, DEBUG))
-               printk(KERN_DEBUG "EXT4-fs: extended group to %llu blocks\n",
-                      ext4_blocks_count(es));
-       update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, (char *)es,
-                      sizeof(struct ext4_super_block));
-exit_put:
+               printk(KERN_DEBUG "EXT4-fs: resized filesystem from %llu "
+                      "upto %llu blocks\n", o_blocks_count, n_blocks_count);
         return err;
-} /* ext4_group_extend */
+}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c

index ed3ce82e2de4ce8ef303075028db8651a1571e17..502c61fd739262306f7665b355cbdeba4d1e1c43 100644 (file)
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1095,7 +1095,7 @@ static int ext4_show_options(struct seq_file *seq, struct dentry *root)
         }
         if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) {
                 seq_printf(seq, ",max_batch_time=%u",
-                          (unsigned) sbi->s_min_batch_time);
+                          (unsigned) sbi->s_max_batch_time);
         }
  
         /*
@@ -2005,17 +2005,16 @@ static int ext4_fill_flex_info(struct super_block *sb)
         struct ext4_group_desc *gdp = NULL;
         ext4_group_t flex_group_count;
         ext4_group_t flex_group;
-       int groups_per_flex = 0;
+       unsigned int groups_per_flex = 0;
         size_t size;
         int i;
  
         sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
-       groups_per_flex = 1 << sbi->s_log_groups_per_flex;
-
-       if (groups_per_flex < 2) {
+       if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) {
                 sbi->s_log_groups_per_flex = 0;
                 return 1;
         }
+       groups_per_flex = 1 << sbi->s_log_groups_per_flex;
  
         /* We allocate both existing and potentially added groups */
         flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
@@ -3506,7 +3505,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
          * of the filesystem.
          */
         if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
-                ext4_msg(sb, KERN_WARNING, "bad geometry: first data"
+               ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
                          "block %u is beyond end of filesystem (%llu)",
                          le32_to_cpu(es->s_first_data_block),
                          ext4_blocks_count(es));
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c

index b60f9f81e33c4371985d83621020ed69f88de779..d2a200624af59a61ef29b88e4320609ddf2c02d0 100644 (file)
--- a/fs/ext4/xattr_security.c
+++ b/fs/ext4/xattr_security.c
@@ -47,8 +47,9 @@ ext4_xattr_security_set(struct dentry *dentry, const char *name,
                               name, value, size, flags);
  }
  
-int ext4_initxattrs(struct inode *inode, const struct xattr *xattr_array,
-                   void *fs_info)
+static int
+ext4_initxattrs(struct inode *inode, const struct xattr *xattr_array,
+               void *fs_info)
  {
         const struct xattr *xattr;
         handle_t *handle = fs_info;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c

index 68d704db787f108350f9cc47fd506628685399c7..5069b84751509e65bb9689c153ac87e36c26b843 100644 (file)
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -429,6 +429,12 @@ void jbd2_journal_commit_transaction(journal_t *journal)
  
         jbd_debug(3, "JBD2: commit phase 1\n");
  
+       /*
+        * Clear revoked flag to reflect there is no revoked buffers
+        * in the next transaction which is going to be started.
+        */
+       jbd2_clear_buffer_revoked_flags(journal);
+
         /*
          * Switch to a new revoke table.
          */
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c

index 69fd93588118027961923d26685b8edecfe3c268..30b2867d6cc950cb7eeceeb528f18d073405117b 100644 (file)
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -47,6 +47,10 @@
   *   overwriting the new data.  We don't even need to clear the revoke
   *   bit here.
   *
+ * We cache revoke status of a buffer in the current transaction in b_states
+ * bits.  As the name says, revokevalid flag indicates that the cached revoke
+ * status of a buffer is valid and we can rely on the cached status.
+ *
   * Revoke information on buffers is a tri-state value:
   *
   * RevokeValid clear:  no cached revoke status, need to look it up
@@ -478,6 +482,36 @@ int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
         return did_revoke;
  }
  
+/*
+ * journal_clear_revoked_flag clears revoked flag of buffers in
+ * revoke table to reflect there is no revoked buffers in the next
+ * transaction which is going to be started.
+ */
+void jbd2_clear_buffer_revoked_flags(journal_t *journal)
+{
+       struct jbd2_revoke_table_s *revoke = journal->j_revoke;
+       int i = 0;
+
+       for (i = 0; i < revoke->hash_size; i++) {
+               struct list_head *hash_list;
+               struct list_head *list_entry;
+               hash_list = &revoke->hash_table[i];
+
+               list_for_each(list_entry, hash_list) {
+                       struct jbd2_revoke_record_s *record;
+                       struct buffer_head *bh;
+                       record = (struct jbd2_revoke_record_s *)list_entry;
+                       bh = __find_get_block(journal->j_fs_dev,
+                                             record->blocknr,
+                                             journal->j_blocksize);
+                       if (bh) {
+                               clear_buffer_revoked(bh);
+                               __brelse(bh);
+                       }
+               }
+       }
+}
+
  /* journal_switch_revoke table select j_revoke for next transaction
   * we do not want to suspend any processing until all revokes are
   * written -bzzz
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c

index a0e41a4c080e9b2d3a51f5b4c6404cb8b8bc9c2b..35ae096bed5dca819181c1d7bfa94194fa3b0c01 100644 (file)
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -517,12 +517,13 @@ void jbd2_journal_lock_updates(journal_t *journal)
                         break;
  
                 spin_lock(&transaction->t_handle_lock);
+               prepare_to_wait(&journal->j_wait_updates, &wait,
+                               TASK_UNINTERRUPTIBLE);
                 if (!atomic_read(&transaction->t_updates)) {
                         spin_unlock(&transaction->t_handle_lock);
+                       finish_wait(&journal->j_wait_updates, &wait);
                         break;
                 }
-               prepare_to_wait(&journal->j_wait_updates, &wait,
-                               TASK_UNINTERRUPTIBLE);
                 spin_unlock(&transaction->t_handle_lock);
                 write_unlock(&journal->j_state_lock);
                 schedule();
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h

index 2092ea21e469eeeaa415a885ead161fed0ac7d38..5557baefed60b5f5bb3dd0984bbbe7f2e0a4c8fc 100644 (file)
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1151,6 +1151,7 @@ extern int        jbd2_journal_set_revoke(journal_t *, unsigned long long, tid_t);
  extern int     jbd2_journal_test_revoke(journal_t *, unsigned long long, tid_t);
  extern void    jbd2_journal_clear_revoke(journal_t *);
  extern void    jbd2_journal_switch_revoke_table(journal_t *journal);
+extern void    jbd2_clear_buffer_revoked_flags(journal_t *journal);
  
  /*
   * The log thread user interface:
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h

index 748ff7cbe5557989e6fd40fcacb70e5b8760956f..319538bf17d219d1bb3c11525c94cecdf669b978 100644 (file)
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -573,9 +573,9 @@ TRACE_EVENT(ext4_mb_release_inode_pa,
  );
  
  TRACE_EVENT(ext4_mb_release_group_pa,
-       TP_PROTO(struct ext4_prealloc_space *pa),
+       TP_PROTO(struct super_block *sb, struct ext4_prealloc_space *pa),
  
-       TP_ARGS(pa),
+       TP_ARGS(sb, pa),
  
         TP_STRUCT__entry(
                 __field(        dev_t,  dev                     )
@@ -585,7 +585,7 @@ TRACE_EVENT(ext4_mb_release_group_pa,
         ),
  
         TP_fast_assign(
-               __entry->dev            = pa->pa_inode->i_sb->s_dev;
+               __entry->dev            = sb->s_dev;
                 __entry->pa_pstart      = pa->pa_pstart;
                 __entry->pa_len         = pa->pa_len;
         ),
author	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 10 Jan 2012 23:51:48 +0000 (15:51 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 10 Jan 2012 23:51:48 +0000 (15:51 -0800)
Documentation/filesystems/ext4.txt		patch \| blob \| blame \| history
fs/ext4/balloc.c		patch \| blob \| blame \| history
fs/ext4/ext4.h		patch \| blob \| blame \| history
fs/ext4/extents.c		patch \| blob \| blame \| history
fs/ext4/ialloc.c		patch \| blob \| blame \| history
fs/ext4/inode.c		patch \| blob \| blame \| history
fs/ext4/ioctl.c		patch \| blob \| blame \| history
fs/ext4/mballoc.c		patch \| blob \| blame \| history
fs/ext4/resize.c		patch \| blob \| blame \| history
fs/ext4/super.c		patch \| blob \| blame \| history
fs/ext4/xattr_security.c		patch \| blob \| blame \| history
fs/jbd2/commit.c		patch \| blob \| blame \| history
fs/jbd2/revoke.c		patch \| blob \| blame \| history
fs/jbd2/transaction.c		patch \| blob \| blame \| history
include/linux/jbd2.h		patch \| blob \| blame \| history
include/trace/events/ext4.h		patch \| blob \| blame \| history