]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/commitdiff
Merge tag 'ext4_for_linus_stable' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 21 Apr 2014 03:43:47 +0000 (20:43 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 21 Apr 2014 03:43:47 +0000 (20:43 -0700)
Pull ext4 fixes from Ted Ts'o:
 "These are regression and bug fixes for ext4.

  We had a number of new features in ext4 during this merge window
  (ZERO_RANGE and COLLAPSE_RANGE fallocate modes, renameat, etc.) so
  there were many more regression and bug fixes this time around.  It
  didn't help that xfstests hadn't been fully updated to fully stress
  test COLLAPSE_RANGE until after -rc1"

* tag 'ext4_for_linus_stable' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (31 commits)
  ext4: disable COLLAPSE_RANGE for bigalloc
  ext4: fix COLLAPSE_RANGE failure with 1KB block size
  ext4: use EINVAL if not a regular file in ext4_collapse_range()
  ext4: enforce we are operating on a regular file in ext4_zero_range()
  ext4: fix extent merging in ext4_ext_shift_path_extents()
  ext4: discard preallocations after removing space
  ext4: no need to truncate pagecache twice in collapse range
  ext4: fix removing status extents in ext4_collapse_range()
  ext4: use filemap_write_and_wait_range() correctly in collapse range
  ext4: use truncate_pagecache() in collapse range
  ext4: remove temporary shim used to merge COLLAPSE_RANGE and ZERO_RANGE
  ext4: fix ext4_count_free_clusters() with EXT4FS_DEBUG and bigalloc enabled
  ext4: always check ext4_ext_find_extent result
  ext4: fix error handling in ext4_ext_shift_extents
  ext4: silence sparse check warning for function ext4_trim_extent
  ext4: COLLAPSE_RANGE only works on extent-based files
  ext4: fix byte order problems introduced by the COLLAPSE_RANGE patches
  ext4: use i_size_read in ext4_unaligned_aio()
  fs: disallow all fallocate operation on active swapfile
  fs: move falloc collapse range check into the filesystem methods
  ...

14 files changed:
fs/ceph/file.c
fs/ext4/balloc.c
fs/ext4/ext4.h
fs/ext4/extents.c
fs/ext4/extents_status.c
fs/ext4/file.c
fs/ext4/inode.c
fs/ext4/mballoc.c
fs/ext4/page-io.c
fs/ext4/super.c
fs/ext4/xattr.c
fs/open.c
fs/xfs/xfs_file.c
include/trace/events/ext4.h

index 39da1c2efa5030216d18bc6bb3020a78afb4c5f6..88a6df4cbe6d8a52bd083a756ac452b798c33708 100644 (file)
@@ -1221,9 +1221,6 @@ static long ceph_fallocate(struct file *file, int mode,
        if (!S_ISREG(inode->i_mode))
                return -EOPNOTSUPP;
 
-       if (IS_SWAPFILE(inode))
-               return -ETXTBSY;
-
        mutex_lock(&inode->i_mutex);
 
        if (ceph_snap(inode) != CEPH_NOSNAP) {
index 6ea7b1436bbc201e872d6ee18f7321b2e099f156..5c56785007e0e36fec78e6535aa210e09247a983 100644 (file)
@@ -667,7 +667,7 @@ ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb)
                        continue;
 
                x = ext4_count_free(bitmap_bh->b_data,
-                                   EXT4_BLOCKS_PER_GROUP(sb) / 8);
+                                   EXT4_CLUSTERS_PER_GROUP(sb) / 8);
                printk(KERN_DEBUG "group %u: stored = %d, counted = %u\n",
                        i, ext4_free_group_clusters(sb, gdp), x);
                bitmap_count += x;
index f1c65dc7cc0ad268a9fccc7b6f1aeaf078d84a0a..66946aa621270716c580a2617bceecbcadb6bda7 100644 (file)
@@ -2466,23 +2466,6 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
        up_write(&EXT4_I(inode)->i_data_sem);
 }
 
-/*
- * Update i_disksize after writeback has been started. Races with truncate
- * are avoided by checking i_size under i_data_sem.
- */
-static inline void ext4_wb_update_i_disksize(struct inode *inode, loff_t newsize)
-{
-       loff_t i_size;
-
-       down_write(&EXT4_I(inode)->i_data_sem);
-       i_size = i_size_read(inode);
-       if (newsize > i_size)
-               newsize = i_size;
-       if (newsize > EXT4_I(inode)->i_disksize)
-               EXT4_I(inode)->i_disksize = newsize;
-       up_write(&EXT4_I(inode)->i_data_sem);
-}
-
 struct ext4_group_info {
        unsigned long   bb_state;
        struct rb_root  bb_free_root;
index 82df3ce9874ab7f3a65abc10e2bd2238b1ae2af3..01b0c208f62507e12f50ddd4fd3669972797f823 100644 (file)
@@ -3313,6 +3313,11 @@ static int ext4_split_extent(handle_t *handle,
                return PTR_ERR(path);
        depth = ext_depth(inode);
        ex = path[depth].p_ext;
+       if (!ex) {
+               EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
+                                (unsigned long) map->m_lblk);
+               return -EIO;
+       }
        uninitialized = ext4_ext_is_uninitialized(ex);
        split_flag1 = 0;
 
@@ -3694,6 +3699,12 @@ static int ext4_convert_initialized_extents(handle_t *handle,
                }
                depth = ext_depth(inode);
                ex = path[depth].p_ext;
+               if (!ex) {
+                       EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
+                                        (unsigned long) map->m_lblk);
+                       err = -EIO;
+                       goto out;
+               }
        }
 
        err = ext4_ext_get_access(handle, inode, path + depth);
@@ -4730,6 +4741,9 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 
        trace_ext4_zero_range(inode, offset, len, mode);
 
+       if (!S_ISREG(inode->i_mode))
+               return -EINVAL;
+
        /*
         * Write out all dirty pages to avoid race conditions
         * Then release them.
@@ -4878,9 +4892,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
        if (mode & FALLOC_FL_PUNCH_HOLE)
                return ext4_punch_hole(inode, offset, len);
 
-       if (mode & FALLOC_FL_COLLAPSE_RANGE)
-               return ext4_collapse_range(inode, offset, len);
-
        ret = ext4_convert_inline_data(inode);
        if (ret)
                return ret;
@@ -4892,6 +4903,9 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
        if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
                return -EOPNOTSUPP;
 
+       if (mode & FALLOC_FL_COLLAPSE_RANGE)
+               return ext4_collapse_range(inode, offset, len);
+
        if (mode & FALLOC_FL_ZERO_RANGE)
                return ext4_zero_range(file, offset, len, mode);
 
@@ -5229,18 +5243,19 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
                        if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr))
                                update = 1;
 
-                       *start = ex_last->ee_block +
+                       *start = le32_to_cpu(ex_last->ee_block) +
                                ext4_ext_get_actual_len(ex_last);
 
                        while (ex_start <= ex_last) {
-                               ex_start->ee_block -= shift;
-                               if (ex_start >
-                                       EXT_FIRST_EXTENT(path[depth].p_hdr)) {
-                                       if (ext4_ext_try_to_merge_right(inode,
-                                               path, ex_start - 1))
-                                               ex_last--;
-                               }
-                               ex_start++;
+                               le32_add_cpu(&ex_start->ee_block, -shift);
+                               /* Try to merge to the left. */
+                               if ((ex_start >
+                                    EXT_FIRST_EXTENT(path[depth].p_hdr)) &&
+                                   ext4_ext_try_to_merge_right(inode,
+                                                       path, ex_start - 1))
+                                       ex_last--;
+                               else
+                                       ex_start++;
                        }
                        err = ext4_ext_dirty(handle, inode, path + depth);
                        if (err)
@@ -5255,7 +5270,7 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
                if (err)
                        goto out;
 
-               path[depth].p_idx->ei_block -= shift;
+               le32_add_cpu(&path[depth].p_idx->ei_block, -shift);
                err = ext4_ext_dirty(handle, inode, path + depth);
                if (err)
                        goto out;
@@ -5300,7 +5315,8 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
                return ret;
        }
 
-       stop_block = extent->ee_block + ext4_ext_get_actual_len(extent);
+       stop_block = le32_to_cpu(extent->ee_block) +
+                       ext4_ext_get_actual_len(extent);
        ext4_ext_drop_refs(path);
        kfree(path);
 
@@ -5313,10 +5329,18 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
         * enough to accomodate the shift.
         */
        path = ext4_ext_find_extent(inode, start - 1, NULL, 0);
+       if (IS_ERR(path))
+               return PTR_ERR(path);
        depth = path->p_depth;
        extent =  path[depth].p_ext;
-       ex_start = extent->ee_block;
-       ex_end = extent->ee_block + ext4_ext_get_actual_len(extent);
+       if (extent) {
+               ex_start = le32_to_cpu(extent->ee_block);
+               ex_end = le32_to_cpu(extent->ee_block) +
+                       ext4_ext_get_actual_len(extent);
+       } else {
+               ex_start = 0;
+               ex_end = 0;
+       }
        ext4_ext_drop_refs(path);
        kfree(path);
 
@@ -5331,7 +5355,13 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
                        return PTR_ERR(path);
                depth = path->p_depth;
                extent = path[depth].p_ext;
-               current_block = extent->ee_block;
+               if (!extent) {
+                       EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
+                                        (unsigned long) start);
+                       return -EIO;
+               }
+
+               current_block = le32_to_cpu(extent->ee_block);
                if (start > current_block) {
                        /* Hole, move to the next extent */
                        ret = mext_next_extent(inode, path, &extent);
@@ -5365,17 +5395,18 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
        ext4_lblk_t punch_start, punch_stop;
        handle_t *handle;
        unsigned int credits;
-       loff_t new_size;
+       loff_t new_size, ioffset;
        int ret;
 
-       BUG_ON(offset + len > i_size_read(inode));
-
        /* Collapse range works only on fs block size aligned offsets. */
        if (offset & (EXT4_BLOCK_SIZE(sb) - 1) ||
            len & (EXT4_BLOCK_SIZE(sb) - 1))
                return -EINVAL;
 
        if (!S_ISREG(inode->i_mode))
+               return -EINVAL;
+
+       if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1)
                return -EOPNOTSUPP;
 
        trace_ext4_collapse_range(inode, offset, len);
@@ -5383,22 +5414,34 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
        punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb);
        punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb);
 
+       /* Call ext4_force_commit to flush all data in case of data=journal. */
+       if (ext4_should_journal_data(inode)) {
+               ret = ext4_force_commit(inode->i_sb);
+               if (ret)
+                       return ret;
+       }
+
+       /*
+        * Need to round down offset to be aligned with page size boundary
+        * for page size > block size.
+        */
+       ioffset = round_down(offset, PAGE_SIZE);
+
        /* Write out all dirty pages */
-       ret = filemap_write_and_wait_range(inode->i_mapping, offset, -1);
+       ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
+                                          LLONG_MAX);
        if (ret)
                return ret;
 
        /* Take mutex lock */
        mutex_lock(&inode->i_mutex);
 
-       /* It's not possible punch hole on append only file */
-       if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
-               ret = -EPERM;
-               goto out_mutex;
-       }
-
-       if (IS_SWAPFILE(inode)) {
-               ret = -ETXTBSY;
+       /*
+        * There is no need to overlap collapse range with EOF, in which case
+        * it is effectively a truncate operation
+        */
+       if (offset + len >= i_size_read(inode)) {
+               ret = -EINVAL;
                goto out_mutex;
        }
 
@@ -5408,7 +5451,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
                goto out_mutex;
        }
 
-       truncate_pagecache_range(inode, offset, -1);
+       truncate_pagecache(inode, ioffset);
 
        /* Wait for existing dio to complete */
        ext4_inode_block_unlocked_dio(inode);
@@ -5425,7 +5468,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
        ext4_discard_preallocations(inode);
 
        ret = ext4_es_remove_extent(inode, punch_start,
-                                   EXT_MAX_BLOCKS - punch_start - 1);
+                                   EXT_MAX_BLOCKS - punch_start);
        if (ret) {
                up_write(&EXT4_I(inode)->i_data_sem);
                goto out_stop;
@@ -5436,6 +5479,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
                up_write(&EXT4_I(inode)->i_data_sem);
                goto out_stop;
        }
+       ext4_discard_preallocations(inode);
 
        ret = ext4_ext_shift_extents(inode, handle, punch_stop,
                                     punch_stop - punch_start);
@@ -5445,10 +5489,9 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
        }
 
        new_size = i_size_read(inode) - len;
-       truncate_setsize(inode, new_size);
+       i_size_write(inode, new_size);
        EXT4_I(inode)->i_disksize = new_size;
 
-       ext4_discard_preallocations(inode);
        up_write(&EXT4_I(inode)->i_data_sem);
        if (IS_SYNC(inode))
                ext4_handle_sync(handle);
index 0a014a7194b28cac95e56f21b59f3776fcf8c9fc..0ebc21204b5184841405f890fa11dd5ae11ef54c 100644 (file)
@@ -810,7 +810,7 @@ retry:
 
                        newes.es_lblk = end + 1;
                        newes.es_len = len2;
-                       block = 0x7FDEADBEEF;
+                       block = 0x7FDEADBEEFULL;
                        if (ext4_es_is_written(&orig_es) ||
                            ext4_es_is_unwritten(&orig_es))
                                block = ext4_es_pblock(&orig_es) +
index ca7502d89fdee07b96585c768854375b207daaf6..063fc1538355972d912553ad6c8e419390f057de 100644 (file)
@@ -82,7 +82,7 @@ ext4_unaligned_aio(struct inode *inode, const struct iovec *iov,
        size_t count = iov_length(iov, nr_segs);
        loff_t final_size = pos + count;
 
-       if (pos >= inode->i_size)
+       if (pos >= i_size_read(inode))
                return 0;
 
        if ((pos & blockmask) || (final_size & blockmask))
index 5b0d2c7d54080dea4080909fe8ec6a74ecf19b56..d7b7462a0e13e11e7131f2b148d1323a3de5c996 100644 (file)
@@ -522,6 +522,10 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
        if (unlikely(map->m_len > INT_MAX))
                map->m_len = INT_MAX;
 
+       /* We can handle the block number less than EXT_MAX_BLOCKS */
+       if (unlikely(map->m_lblk >= EXT_MAX_BLOCKS))
+               return -EIO;
+
        /* Lookup extent status tree firstly */
        if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
                ext4_es_lru_add(inode);
@@ -2243,13 +2247,23 @@ static int mpage_map_and_submit_extent(handle_t *handle,
                        return err;
        } while (map->m_len);
 
-       /* Update on-disk size after IO is submitted */
+       /*
+        * Update on-disk size after IO is submitted.  Races with
+        * truncate are avoided by checking i_size under i_data_sem.
+        */
        disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT;
        if (disksize > EXT4_I(inode)->i_disksize) {
                int err2;
-
-               ext4_wb_update_i_disksize(inode, disksize);
+               loff_t i_size;
+
+               down_write(&EXT4_I(inode)->i_data_sem);
+               i_size = i_size_read(inode);
+               if (disksize > i_size)
+                       disksize = i_size;
+               if (disksize > EXT4_I(inode)->i_disksize)
+                       EXT4_I(inode)->i_disksize = disksize;
                err2 = ext4_mark_inode_dirty(handle, inode);
+               up_write(&EXT4_I(inode)->i_data_sem);
                if (err2)
                        ext4_error(inode->i_sb,
                                   "Failed to mark inode %lu dirty",
@@ -3527,15 +3541,6 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
        }
 
        mutex_lock(&inode->i_mutex);
-       /* It's not possible punch hole on append only file */
-       if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
-               ret = -EPERM;
-               goto out_mutex;
-       }
-       if (IS_SWAPFILE(inode)) {
-               ret = -ETXTBSY;
-               goto out_mutex;
-       }
 
        /* No need to punch hole beyond i_size */
        if (offset >= inode->i_size)
@@ -3616,7 +3621,6 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
                ret = ext4_free_hole_blocks(handle, inode, first_block,
                                            stop_block);
 
-       ext4_discard_preallocations(inode);
        up_write(&EXT4_I(inode)->i_data_sem);
        if (IS_SYNC(inode))
                ext4_handle_sync(handle);
@@ -4423,21 +4427,20 @@ out_brelse:
  *
  * We are called from a few places:
  *
- * - Within generic_file_write() for O_SYNC files.
+ * - Within generic_file_aio_write() -> generic_write_sync() for O_SYNC files.
  *   Here, there will be no transaction running. We wait for any running
  *   transaction to commit.
  *
- * - Within sys_sync(), kupdate and such.
- *   We wait on commit, if tol to.
+ * - Within flush work (sys_sync(), kupdate and such).
+ *   We wait on commit, if told to.
  *
- * - Within prune_icache() (PF_MEMALLOC == true)
- *   Here we simply return.  We can't afford to block kswapd on the
- *   journal commit.
+ * - Within iput_final() -> write_inode_now()
+ *   We wait on commit, if told to.
  *
  * In all cases it is actually safe for us to return without doing anything,
  * because the inode has been copied into a raw inode buffer in
- * ext4_mark_inode_dirty().  This is a correctness thing for O_SYNC and for
- * knfsd.
+ * ext4_mark_inode_dirty().  This is a correctness thing for WB_SYNC_ALL
+ * writeback.
  *
  * Note that we are absolutely dependent upon all inode dirtiers doing the
  * right thing: they *must* call mark_inode_dirty() after dirtying info in
@@ -4449,15 +4452,15 @@ out_brelse:
  *     stuff();
  *     inode->i_size = expr;
  *
- * is in error because a kswapd-driven write_inode() could occur while
- * `stuff()' is running, and the new i_size will be lost.  Plus the inode
- * will no longer be on the superblock's dirty inode list.
+ * is in error because write_inode() could occur while `stuff()' is running,
+ * and the new i_size will be lost.  Plus the inode will no longer be on the
+ * superblock's dirty inode list.
  */
 int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
        int err;
 
-       if (current->flags & PF_MEMALLOC)
+       if (WARN_ON_ONCE(current->flags & PF_MEMALLOC))
                return 0;
 
        if (EXT4_SB(inode->i_sb)->s_journal) {
index a888cac76e9c55c34002f930a7bc8a8df53376bf..c8238a26818cd9ef7567d0552a60a461bfd1f76e 100644 (file)
@@ -989,7 +989,7 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
        poff = block % blocks_per_page;
        page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
        if (!page)
-               return -EIO;
+               return -ENOMEM;
        BUG_ON(page->mapping != inode->i_mapping);
        e4b->bd_bitmap_page = page;
        e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
@@ -1003,7 +1003,7 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
        pnum = block / blocks_per_page;
        page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
        if (!page)
-               return -EIO;
+               return -ENOMEM;
        BUG_ON(page->mapping != inode->i_mapping);
        e4b->bd_buddy_page = page;
        return 0;
@@ -1168,7 +1168,11 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
                        unlock_page(page);
                }
        }
-       if (page == NULL || !PageUptodate(page)) {
+       if (page == NULL) {
+               ret = -ENOMEM;
+               goto err;
+       }
+       if (!PageUptodate(page)) {
                ret = -EIO;
                goto err;
        }
@@ -1197,7 +1201,11 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
                        unlock_page(page);
                }
        }
-       if (page == NULL || !PageUptodate(page)) {
+       if (page == NULL) {
+               ret = -ENOMEM;
+               goto err;
+       }
+       if (!PageUptodate(page)) {
                ret = -EIO;
                goto err;
        }
@@ -5008,6 +5016,8 @@ error_return:
  */
 static int ext4_trim_extent(struct super_block *sb, int start, int count,
                             ext4_group_t group, struct ext4_buddy *e4b)
+__releases(bitlock)
+__acquires(bitlock)
 {
        struct ext4_free_extent ex;
        int ret = 0;
index ab95508e3d4018eab92647c6d2308e98524080d1..c18d95b5054081c75e0c7a2fab975976838f9b02 100644 (file)
@@ -308,13 +308,14 @@ static void ext4_end_bio(struct bio *bio, int error)
        if (error) {
                struct inode *inode = io_end->inode;
 
-               ext4_warning(inode->i_sb, "I/O error writing to inode %lu "
+               ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu "
                             "(offset %llu size %ld starting block %llu)",
-                            inode->i_ino,
+                            error, inode->i_ino,
                             (unsigned long long) io_end->offset,
                             (long) io_end->size,
                             (unsigned long long)
                             bi_sector >> (inode->i_blkbits - 9));
+               mapping_set_error(inode->i_mapping, error);
        }
 
        if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
index f3c667091618d8b26e09964dafe2f673a4c6cbd3..6f9e6fadac04e1c8af1d4a98d6258cdbc2f45dea 100644 (file)
@@ -3869,19 +3869,38 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                        goto failed_mount2;
                }
        }
+
+       /*
+        * set up enough so that it can read an inode,
+        * and create new inode for buddy allocator
+        */
+       sbi->s_gdb_count = db_count;
+       if (!test_opt(sb, NOLOAD) &&
+           EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
+               sb->s_op = &ext4_sops;
+       else
+               sb->s_op = &ext4_nojournal_sops;
+
+       ext4_ext_init(sb);
+       err = ext4_mb_init(sb);
+       if (err) {
+               ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
+                        err);
+               goto failed_mount2;
+       }
+
        if (!ext4_check_descriptors(sb, &first_not_zeroed)) {
                ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
-               goto failed_mount2;
+               goto failed_mount2a;
        }
        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
                if (!ext4_fill_flex_info(sb)) {
                        ext4_msg(sb, KERN_ERR,
                               "unable to initialize "
                               "flex_bg meta info!");
-                       goto failed_mount2;
+                       goto failed_mount2a;
                }
 
-       sbi->s_gdb_count = db_count;
        get_random_bytes(&sbi->s_next_generation, sizeof(u32));
        spin_lock_init(&sbi->s_next_gen_lock);
 
@@ -3916,14 +3935,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        sbi->s_stripe = ext4_get_stripe_size(sbi);
        sbi->s_extent_max_zeroout_kb = 32;
 
-       /*
-        * set up enough so that it can read an inode
-        */
-       if (!test_opt(sb, NOLOAD) &&
-           EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
-               sb->s_op = &ext4_sops;
-       else
-               sb->s_op = &ext4_nojournal_sops;
        sb->s_export_op = &ext4_export_ops;
        sb->s_xattr = ext4_xattr_handlers;
 #ifdef CONFIG_QUOTA
@@ -4113,21 +4124,13 @@ no_journal:
        if (err) {
                ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for "
                         "reserved pool", ext4_calculate_resv_clusters(sb));
-               goto failed_mount4a;
+               goto failed_mount5;
        }
 
        err = ext4_setup_system_zone(sb);
        if (err) {
                ext4_msg(sb, KERN_ERR, "failed to initialize system "
                         "zone (%d)", err);
-               goto failed_mount4a;
-       }
-
-       ext4_ext_init(sb);
-       err = ext4_mb_init(sb);
-       if (err) {
-               ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
-                        err);
                goto failed_mount5;
        }
 
@@ -4204,11 +4207,8 @@ failed_mount8:
 failed_mount7:
        ext4_unregister_li_request(sb);
 failed_mount6:
-       ext4_mb_release(sb);
-failed_mount5:
-       ext4_ext_release(sb);
        ext4_release_system_zone(sb);
-failed_mount4a:
+failed_mount5:
        dput(sb->s_root);
        sb->s_root = NULL;
 failed_mount4:
@@ -4232,11 +4232,14 @@ failed_mount3:
        percpu_counter_destroy(&sbi->s_extent_cache_cnt);
        if (sbi->s_mmp_tsk)
                kthread_stop(sbi->s_mmp_tsk);
+failed_mount2a:
+       ext4_mb_release(sb);
 failed_mount2:
        for (i = 0; i < db_count; i++)
                brelse(sbi->s_group_desc[i]);
        ext4_kvfree(sbi->s_group_desc);
 failed_mount:
+       ext4_ext_release(sb);
        if (sbi->s_chksum_driver)
                crypto_free_shash(sbi->s_chksum_driver);
        if (sbi->s_proc) {
index 1f5cf5880718d28c8ca7893f7165807f78101c6b..4eec399ec807bc6733d1a90b8c3d0d205eb795c1 100644 (file)
@@ -520,8 +520,8 @@ static void ext4_xattr_update_super_block(handle_t *handle,
 }
 
 /*
- * Release the xattr block BH: If the reference count is > 1, decrement
- * it; otherwise free the block.
+ * Release the xattr block BH: If the reference count is > 1, decrement it;
+ * otherwise free the block.
  */
 static void
 ext4_xattr_release_block(handle_t *handle, struct inode *inode,
@@ -542,16 +542,31 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
                if (ce)
                        mb_cache_entry_free(ce);
                get_bh(bh);
+               unlock_buffer(bh);
                ext4_free_blocks(handle, inode, bh, 0, 1,
                                 EXT4_FREE_BLOCKS_METADATA |
                                 EXT4_FREE_BLOCKS_FORGET);
-               unlock_buffer(bh);
        } else {
                le32_add_cpu(&BHDR(bh)->h_refcount, -1);
                if (ce)
                        mb_cache_entry_release(ce);
+               /*
+                * Beware of this ugliness: Releasing of xattr block references
+                * from different inodes can race and so we have to protect
+                * from a race where someone else frees the block (and releases
+                * its journal_head) before we are done dirtying the buffer. In
+                * nojournal mode this race is harmless and we actually cannot
+                * call ext4_handle_dirty_xattr_block() with locked buffer as
+                * that function can call sync_dirty_buffer() so for that case
+                * we handle the dirtying after unlocking the buffer.
+                */
+               if (ext4_handle_valid(handle))
+                       error = ext4_handle_dirty_xattr_block(handle, inode,
+                                                             bh);
                unlock_buffer(bh);
-               error = ext4_handle_dirty_xattr_block(handle, inode, bh);
+               if (!ext4_handle_valid(handle))
+                       error = ext4_handle_dirty_xattr_block(handle, inode,
+                                                             bh);
                if (IS_SYNC(inode))
                        ext4_handle_sync(handle);
                dquot_free_block(inode, EXT4_C2B(EXT4_SB(inode->i_sb), 1));
index 3d30eb1fc95e383e50e91605d3526161bcfdebde..9d64679cec73b00fc4685e23d69374ca122fed09 100644 (file)
--- a/fs/open.c
+++ b/fs/open.c
@@ -254,16 +254,21 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
                return -EBADF;
 
        /*
-        * It's not possible to punch hole or perform collapse range
-        * on append only file
+        * We can only allow pure fallocate on append only files
         */
-       if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE)
-           && IS_APPEND(inode))
+       if ((mode & ~FALLOC_FL_KEEP_SIZE) && IS_APPEND(inode))
                return -EPERM;
 
        if (IS_IMMUTABLE(inode))
                return -EPERM;
 
+       /*
+        * We can not allow to do any fallocate operation on an active
+        * swapfile
+        */
+       if (IS_SWAPFILE(inode))
+               ret = -ETXTBSY;
+
        /*
         * Revalidate the write permissions, in case security policy has
         * changed since the files were opened.
@@ -286,14 +291,6 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
        if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
                return -EFBIG;
 
-       /*
-        * There is no need to overlap collapse range with EOF, in which case
-        * it is effectively a truncate operation
-        */
-       if ((mode & FALLOC_FL_COLLAPSE_RANGE) &&
-           (offset + len >= i_size_read(inode)))
-               return -EINVAL;
-
        if (!file->f_op->fallocate)
                return -EOPNOTSUPP;
 
index 82afdcb33183951350df18d3ce05b3aeecdf3e76..951a2321ee010f35c1d3395c09d0830d74197cfd 100644 (file)
@@ -841,7 +841,15 @@ xfs_file_fallocate(
                        goto out_unlock;
                }
 
-               ASSERT(offset + len < i_size_read(inode));
+               /*
+                * There is no need to overlap collapse range with EOF,
+                * in which case it is effectively a truncate operation
+                */
+               if (offset + len >= i_size_read(inode)) {
+                       error = -EINVAL;
+                       goto out_unlock;
+               }
+
                new_size = i_size_read(inode) - len;
 
                error = xfs_collapse_file_space(ip, offset, len);
index 010ea89eeb0e407a85a052e6b8905dedb6ac5991..6a1a0245474feee8f32fe040e56f0044f53a20f4 100644 (file)
@@ -16,15 +16,6 @@ struct mpage_da_data;
 struct ext4_map_blocks;
 struct extent_status;
 
-/* shim until we merge in the xfs_collapse_range branch */
-#ifndef FALLOC_FL_COLLAPSE_RANGE
-#define FALLOC_FL_COLLAPSE_RANGE       0x08
-#endif
-
-#ifndef FALLOC_FL_ZERO_RANGE
-#define FALLOC_FL_ZERO_RANGE           0x10
-#endif
-
 #define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode))
 
 #define show_mballoc_flags(flags) __print_flags(flags, "|",    \