Merge uncontroversial parts of branch 'readlink' of git://git.kernel.org/pub/scm...

[mirror_ubuntu-artful-kernel.git] / fs / ext4 / inode.c
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c

index 9c064727ed62978e95f27f2fe73c5e946ce672b0..88d57af1b516c5bbfd7b2f1bc471a9d76382c3bc 100644 (file)
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -37,6 +37,7 @@
  #include <linux/printk.h>
  #include <linux/slab.h>
  #include <linux/bitops.h>
+#include <linux/iomap.h>
  
  #include "ext4_jbd2.h"
  #include "xattr.h"
@@ -71,10 +72,9 @@ static __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw,
                         csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum,
                                            csum_size);
                         offset += csum_size;
-                       csum = ext4_chksum(sbi, csum, (__u8 *)raw + offset,
-                                          EXT4_INODE_SIZE(inode->i_sb) -
-                                          offset);
                 }
+               csum = ext4_chksum(sbi, csum, (__u8 *)raw + offset,
+                                  EXT4_INODE_SIZE(inode->i_sb) - offset);
         }
  
         return csum;
@@ -261,8 +261,15 @@ void ext4_evict_inode(struct inode *inode)
                              "couldn't mark inode dirty (err %d)", err);
                 goto stop_handle;
         }
-       if (inode->i_blocks)
-               ext4_truncate(inode);
+       if (inode->i_blocks) {
+               err = ext4_truncate(inode);
+               if (err) {
+                       ext4_error(inode->i_sb,
+                                  "couldn't truncate inode %lu (err %d)",
+                                  inode->i_ino, err);
+                       goto stop_handle;
+               }
+       }
  
         /*
          * ext4_ext_truncate() doesn't reserve any slop when it
@@ -654,12 +661,8 @@ found:
                 if (flags & EXT4_GET_BLOCKS_ZERO &&
                     map->m_flags & EXT4_MAP_MAPPED &&
                     map->m_flags & EXT4_MAP_NEW) {
-                       ext4_lblk_t i;
-
-                       for (i = 0; i < map->m_len; i++) {
-                               unmap_underlying_metadata(inode->i_sb->s_bdev,
-                                                         map->m_pblk + i);
-                       }
+                       clean_bdev_aliases(inode->i_sb->s_bdev, map->m_pblk,
+                                          map->m_len);
                         ret = ext4_issue_zeroout(inode, map->m_lblk,
                                                  map->m_pblk, map->m_len);
                         if (ret) {
@@ -767,6 +770,9 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
                 ext4_update_bh_state(bh, map.m_flags);
                 bh->b_size = inode->i_sb->s_blocksize * map.m_len;
                 ret = 0;
+       } else if (ret == 0) {
+               /* hole case, need to fill in bh->b_size */
+               bh->b_size = inode->i_sb->s_blocksize * map.m_len;
         }
         return ret;
  }
@@ -1127,8 +1133,7 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
                         if (err)
                                 break;
                         if (buffer_new(bh)) {
-                               unmap_underlying_metadata(bh->b_bdev,
-                                                         bh->b_blocknr);
+                               clean_bdev_bh_alias(bh);
                                 if (PageUptodate(page)) {
                                         clear_buffer_new(bh);
                                         set_buffer_uptodate(bh);
@@ -1166,7 +1171,8 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
         if (unlikely(err))
                 page_zero_new_buffers(page, from, to);
         else if (decrypt)
-               err = fscrypt_decrypt_page(page);
+               err = fscrypt_decrypt_page(page->mapping->host, page,
+                               PAGE_SIZE, 0, page->index);
         return err;
  }
  #endif
@@ -2360,11 +2366,8 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
  
         BUG_ON(map->m_len == 0);
         if (map->m_flags & EXT4_MAP_NEW) {
-               struct block_device *bdev = inode->i_sb->s_bdev;
-               int i;
-
-               for (i = 0; i < map->m_len; i++)
-                       unmap_underlying_metadata(bdev, map->m_pblk + i);
+               clean_bdev_aliases(inode->i_sb->s_bdev, map->m_pblk,
+                                  map->m_len);
         }
         return 0;
  }
@@ -2891,7 +2894,8 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
  
         index = pos >> PAGE_SHIFT;
  
-       if (ext4_nonda_switch(inode->i_sb)) {
+       if (ext4_nonda_switch(inode->i_sb) ||
+           S_ISLNK(inode->i_mode)) {
                 *fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
                 return ext4_write_begin(file, mapping, pos,
                                         len, flags, pagep, fsdata);
@@ -3268,53 +3272,159 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
  }
  
  #ifdef CONFIG_FS_DAX
-/*
- * Get block function for DAX IO and mmap faults. It takes care of converting
- * unwritten extents to written ones and initializes new / converted blocks
- * to zeros.
- */
-int ext4_dax_get_block(struct inode *inode, sector_t iblock,
-                      struct buffer_head *bh_result, int create)
+static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
+                           unsigned flags, struct iomap *iomap)
  {
+       unsigned int blkbits = inode->i_blkbits;
+       unsigned long first_block = offset >> blkbits;
+       unsigned long last_block = (offset + length - 1) >> blkbits;
+       struct ext4_map_blocks map;
         int ret;
  
-       ext4_debug("inode %lu, create flag %d\n", inode->i_ino, create);
-       if (!create)
-               return _ext4_get_block(inode, iblock, bh_result, 0);
+       if (WARN_ON_ONCE(ext4_has_inline_data(inode)))
+               return -ERANGE;
  
-       ret = ext4_get_block_trans(inode, iblock, bh_result,
-                                  EXT4_GET_BLOCKS_PRE_IO |
-                                  EXT4_GET_BLOCKS_CREATE_ZERO);
-       if (ret < 0)
-               return ret;
+       map.m_lblk = first_block;
+       map.m_len = last_block - first_block + 1;
  
-       if (buffer_unwritten(bh_result)) {
+       if (!(flags & IOMAP_WRITE)) {
+               ret = ext4_map_blocks(NULL, inode, &map, 0);
+       } else {
+               int dio_credits;
+               handle_t *handle;
+               int retries = 0;
+
+               /* Trim mapping request to maximum we can map at once for DIO */
+               if (map.m_len > DIO_MAX_BLOCKS)
+                       map.m_len = DIO_MAX_BLOCKS;
+               dio_credits = ext4_chunk_trans_blocks(inode, map.m_len);
+retry:
                 /*
-                * We are protected by i_mmap_sem or i_mutex so we know block
-                * cannot go away from under us even though we dropped
-                * i_data_sem. Convert extent to written and write zeros there.
+                * Either we allocate blocks and then we don't get unwritten
+                * extent so we have reserved enough credits, or the blocks
+                * are already allocated and unwritten and in that case
+                * extent conversion fits in the credits as well.
                  */
-               ret = ext4_get_block_trans(inode, iblock, bh_result,
-                                          EXT4_GET_BLOCKS_CONVERT |
-                                          EXT4_GET_BLOCKS_CREATE_ZERO);
-               if (ret < 0)
+               handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
+                                           dio_credits);
+               if (IS_ERR(handle))
+                       return PTR_ERR(handle);
+
+               ret = ext4_map_blocks(handle, inode, &map,
+                                     EXT4_GET_BLOCKS_CREATE_ZERO);
+               if (ret < 0) {
+                       ext4_journal_stop(handle);
+                       if (ret == -ENOSPC &&
+                           ext4_should_retry_alloc(inode->i_sb, &retries))
+                               goto retry;
                         return ret;
+               }
+
+               /*
+                * If we added blocks beyond i_size, we need to make sure they
+                * will get truncated if we crash before updating i_size in
+                * ext4_iomap_end(). For faults we don't need to do that (and
+                * even cannot because for orphan list operations inode_lock is
+                * required) - if we happen to instantiate block beyond i_size,
+                * it is because we race with truncate which has already added
+                * the inode to the orphan list.
+                */
+               if (!(flags & IOMAP_FAULT) && first_block + map.m_len >
+                   (i_size_read(inode) + (1 << blkbits) - 1) >> blkbits) {
+                       int err;
+
+                       err = ext4_orphan_add(handle, inode);
+                       if (err < 0) {
+                               ext4_journal_stop(handle);
+                               return err;
+                       }
+               }
+               ext4_journal_stop(handle);
         }
-       /*
-        * At least for now we have to clear BH_New so that DAX code
-        * doesn't attempt to zero blocks again in a racy way.
-        */
-       clear_buffer_new(bh_result);
+
+       iomap->flags = 0;
+       iomap->bdev = inode->i_sb->s_bdev;
+       iomap->offset = first_block << blkbits;
+
+       if (ret == 0) {
+               iomap->type = IOMAP_HOLE;
+               iomap->blkno = IOMAP_NULL_BLOCK;
+               iomap->length = (u64)map.m_len << blkbits;
+       } else {
+               if (map.m_flags & EXT4_MAP_MAPPED) {
+                       iomap->type = IOMAP_MAPPED;
+               } else if (map.m_flags & EXT4_MAP_UNWRITTEN) {
+                       iomap->type = IOMAP_UNWRITTEN;
+               } else {
+                       WARN_ON_ONCE(1);
+                       return -EIO;
+               }
+               iomap->blkno = (sector_t)map.m_pblk << (blkbits - 9);
+               iomap->length = (u64)map.m_len << blkbits;
+       }
+
+       if (map.m_flags & EXT4_MAP_NEW)
+               iomap->flags |= IOMAP_F_NEW;
         return 0;
  }
-#else
-/* Just define empty function, it will never get called. */
-int ext4_dax_get_block(struct inode *inode, sector_t iblock,
-                      struct buffer_head *bh_result, int create)
+
+static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length,
+                         ssize_t written, unsigned flags, struct iomap *iomap)
  {
-       BUG();
-       return 0;
+       int ret = 0;
+       handle_t *handle;
+       int blkbits = inode->i_blkbits;
+       bool truncate = false;
+
+       if (!(flags & IOMAP_WRITE) || (flags & IOMAP_FAULT))
+               return 0;
+
+       handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
+       if (IS_ERR(handle)) {
+               ret = PTR_ERR(handle);
+               goto orphan_del;
+       }
+       if (ext4_update_inode_size(inode, offset + written))
+               ext4_mark_inode_dirty(handle, inode);
+       /*
+        * We may need to truncate allocated but not written blocks beyond EOF.
+        */
+       if (iomap->offset + iomap->length > 
+           ALIGN(inode->i_size, 1 << blkbits)) {
+               ext4_lblk_t written_blk, end_blk;
+
+               written_blk = (offset + written) >> blkbits;
+               end_blk = (offset + length) >> blkbits;
+               if (written_blk < end_blk && ext4_can_truncate(inode))
+                       truncate = true;
+       }
+       /*
+        * Remove inode from orphan list if we were extending a inode and
+        * everything went fine.
+        */
+       if (!truncate && inode->i_nlink &&
+           !list_empty(&EXT4_I(inode)->i_orphan))
+               ext4_orphan_del(handle, inode);
+       ext4_journal_stop(handle);
+       if (truncate) {
+               ext4_truncate_failed_write(inode);
+orphan_del:
+               /*
+                * If truncate failed early the inode might still be on the
+                * orphan list; we need to make sure the inode is removed from
+                * the orphan list in that case.
+                */
+               if (inode->i_nlink)
+                       ext4_orphan_del(NULL, inode);
+       }
+       return ret;
  }
+
+struct iomap_ops ext4_iomap_ops = {
+       .iomap_begin            = ext4_iomap_begin,
+       .iomap_end              = ext4_iomap_end,
+};
+
  #endif
  
  static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
@@ -3436,19 +3546,7 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter)
         iocb->private = NULL;
         if (overwrite)
                 get_block_func = ext4_dio_get_block_overwrite;
-       else if (IS_DAX(inode)) {
-               /*
-                * We can avoid zeroing for aligned DAX writes beyond EOF. Other
-                * writes need zeroing either because they can race with page
-                * faults or because they use partial blocks.
-                */
-               if (round_down(offset, 1<<inode->i_blkbits) >= inode->i_size &&
-                   ext4_aligned_io(inode, offset, count))
-                       get_block_func = ext4_dio_get_block;
-               else
-                       get_block_func = ext4_dax_get_block;
-               dio_flags = DIO_LOCKING;
-       } else if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) ||
+       else if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) ||
                    round_down(offset, 1 << inode->i_blkbits) >= inode->i_size) {
                 get_block_func = ext4_dio_get_block;
                 dio_flags = DIO_LOCKING | DIO_SKIP_HOLES;
@@ -3462,14 +3560,9 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter)
  #ifdef CONFIG_EXT4_FS_ENCRYPTION
         BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode));
  #endif
-       if (IS_DAX(inode)) {
-               ret = dax_do_io(iocb, inode, iter, get_block_func,
-                               ext4_end_io_dio, dio_flags);
-       } else
-               ret = __blockdev_direct_IO(iocb, inode,
-                                          inode->i_sb->s_bdev, iter,
-                                          get_block_func,
-                                          ext4_end_io_dio, NULL, dio_flags);
+       ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
+                                  get_block_func, ext4_end_io_dio, NULL,
+                                  dio_flags);
  
         if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
                                                 EXT4_STATE_DIO_UNWRITTEN)) {
@@ -3538,6 +3631,7 @@ static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter)
  {
         struct address_space *mapping = iocb->ki_filp->f_mapping;
         struct inode *inode = mapping->host;
+       size_t count = iov_iter_count(iter);
         ssize_t ret;
  
         /*
@@ -3546,19 +3640,12 @@ static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter)
          * we are protected against page writeback as well.
          */
         inode_lock_shared(inode);
-       if (IS_DAX(inode)) {
-               ret = dax_do_io(iocb, inode, iter, ext4_dio_get_block, NULL, 0);
-       } else {
-               size_t count = iov_iter_count(iter);
-
-               ret = filemap_write_and_wait_range(mapping, iocb->ki_pos,
-                                                  iocb->ki_pos + count);
-               if (ret)
-                       goto out_unlock;
-               ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
-                                          iter, ext4_dio_get_block,
-                                          NULL, NULL, 0);
-       }
+       ret = filemap_write_and_wait_range(mapping, iocb->ki_pos,
+                                          iocb->ki_pos + count);
+       if (ret)
+               goto out_unlock;
+       ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
+                                  iter, ext4_dio_get_block, NULL, NULL, 0);
  out_unlock:
         inode_unlock_shared(inode);
         return ret;
@@ -3587,6 +3674,10 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
         if (ext4_has_inline_data(inode))
                 return 0;
  
+       /* DAX uses iomap path now */
+       if (WARN_ON_ONCE(IS_DAX(inode)))
+               return 0;
+
         trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
         if (iov_iter_rw(iter) == READ)
                 ret = ext4_direct_IO_read(iocb, iter);
@@ -3615,6 +3706,13 @@ static int ext4_journalled_set_page_dirty(struct page *page)
         return __set_page_dirty_nobuffers(page);
  }
  
+static int ext4_set_page_dirty(struct page *page)
+{
+       WARN_ON_ONCE(!PageLocked(page) && !PageDirty(page));
+       WARN_ON_ONCE(!page_has_buffers(page));
+       return __set_page_dirty_buffers(page);
+}
+
  static const struct address_space_operations ext4_aops = {
         .readpage               = ext4_readpage,
         .readpages              = ext4_readpages,
@@ -3622,6 +3720,7 @@ static const struct address_space_operations ext4_aops = {
         .writepages             = ext4_writepages,
         .write_begin            = ext4_write_begin,
         .write_end              = ext4_write_end,
+       .set_page_dirty         = ext4_set_page_dirty,
         .bmap                   = ext4_bmap,
         .invalidatepage         = ext4_invalidatepage,
         .releasepage            = ext4_releasepage,
@@ -3654,6 +3753,7 @@ static const struct address_space_operations ext4_da_aops = {
         .writepages             = ext4_writepages,
         .write_begin            = ext4_da_write_begin,
         .write_end              = ext4_da_write_end,
+       .set_page_dirty         = ext4_set_page_dirty,
         .bmap                   = ext4_bmap,
         .invalidatepage         = ext4_da_invalidatepage,
         .releasepage            = ext4_releasepage,
@@ -3743,7 +3843,8 @@ static int __ext4_block_zero_page_range(handle_t *handle,
                         /* We expect the key to be set. */
                         BUG_ON(!fscrypt_has_encryption_key(inode));
                         BUG_ON(blocksize != PAGE_SIZE);
-                       WARN_ON_ONCE(fscrypt_decrypt_page(page));
+                       WARN_ON_ONCE(fscrypt_decrypt_page(page->mapping->host,
+                                               page, PAGE_SIZE, 0, page->index));
                 }
         }
         if (ext4_should_journal_data(inode)) {
@@ -3792,8 +3893,10 @@ static int ext4_block_zero_page_range(handle_t *handle,
         if (length > max || length < 0)
                 length = max;
  
-       if (IS_DAX(inode))
-               return dax_zero_page_range(inode, from, length, ext4_get_block);
+       if (IS_DAX(inode)) {
+               return iomap_zero_range(inode, from, length, NULL,
+                                       &ext4_iomap_ops);
+       }
         return __ext4_block_zero_page_range(handle, mapping, from, length);
  }
  
@@ -4026,7 +4129,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
         if (IS_SYNC(inode))
                 ext4_handle_sync(handle);
  
-       inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
+       inode->i_mtime = inode->i_ctime = current_time(inode);
         ext4_mark_inode_dirty(handle, inode);
  out_stop:
         ext4_journal_stop(handle);
@@ -4091,10 +4194,11 @@ int ext4_inode_attach_jinode(struct inode *inode)
   * that's fine - as long as they are linked from the inode, the post-crash
   * ext4_truncate() run will find them and release them.
   */
-void ext4_truncate(struct inode *inode)
+int ext4_truncate(struct inode *inode)
  {
         struct ext4_inode_info *ei = EXT4_I(inode);
         unsigned int credits;
+       int err = 0;
         handle_t *handle;
         struct address_space *mapping = inode->i_mapping;
  
@@ -4108,7 +4212,7 @@ void ext4_truncate(struct inode *inode)
         trace_ext4_truncate_enter(inode);
  
         if (!ext4_can_truncate(inode))
-               return;
+               return 0;
  
         ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
  
@@ -4120,13 +4224,13 @@ void ext4_truncate(struct inode *inode)
  
                 ext4_inline_data_truncate(inode, &has_inline);
                 if (has_inline)
-                       return;
+                       return 0;
         }
  
         /* If we zero-out tail of the page, we have to create jinode for jbd2 */
         if (inode->i_size & (inode->i_sb->s_blocksize - 1)) {
                 if (ext4_inode_attach_jinode(inode) < 0)
-                       return;
+                       return 0;
         }
  
         if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
@@ -4135,10 +4239,8 @@ void ext4_truncate(struct inode *inode)
                 credits = ext4_blocks_for_truncate(inode);
  
         handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
-       if (IS_ERR(handle)) {
-               ext4_std_error(inode->i_sb, PTR_ERR(handle));
-               return;
-       }
+       if (IS_ERR(handle))
+               return PTR_ERR(handle);
  
         if (inode->i_size & (inode->i_sb->s_blocksize - 1))
                 ext4_block_truncate_page(handle, mapping, inode->i_size);
@@ -4152,7 +4254,8 @@ void ext4_truncate(struct inode *inode)
          * Implication: the file must always be in a sane, consistent
          * truncatable state while each transaction commits.
          */
-       if (ext4_orphan_add(handle, inode))
+       err = ext4_orphan_add(handle, inode);
+       if (err)
                 goto out_stop;
  
         down_write(&EXT4_I(inode)->i_data_sem);
@@ -4160,11 +4263,13 @@ void ext4_truncate(struct inode *inode)
         ext4_discard_preallocations(inode);
  
         if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
-               ext4_ext_truncate(handle, inode);
+               err = ext4_ext_truncate(handle, inode);
         else
                 ext4_ind_truncate(handle, inode);
  
         up_write(&ei->i_data_sem);
+       if (err)
+               goto out_stop;
  
         if (IS_SYNC(inode))
                 ext4_handle_sync(handle);
@@ -4180,11 +4285,12 @@ out_stop:
         if (inode->i_nlink)
                 ext4_orphan_del(handle, inode);
  
-       inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
+       inode->i_mtime = inode->i_ctime = current_time(inode);
         ext4_mark_inode_dirty(handle, inode);
         ext4_journal_stop(handle);
  
         trace_ext4_truncate_exit(inode);
+       return err;
  }
  
  /*
@@ -4352,7 +4458,9 @@ void ext4_set_inode_flags(struct inode *inode)
                 new_fl |= S_NOATIME;
         if (flags & EXT4_DIRSYNC_FL)
                 new_fl |= S_DIRSYNC;
-       if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode))
+       if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode) &&
+           !ext4_should_journal_data(inode) && !ext4_has_inline_data(inode) &&
+           !ext4_encrypted_inode(inode))
                 new_fl |= S_DAX;
         inode_set_flags(inode, new_fl,
                         S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX);
@@ -4411,7 +4519,9 @@ static inline void ext4_iget_extra_inode(struct inode *inode,
  {
         __le32 *magic = (void *)raw_inode +
                         EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize;
-       if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) {
+       if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize + sizeof(__le32) <=
+           EXT4_INODE_SIZE(inode->i_sb) &&
+           *magic == cpu_to_le32(EXT4_XATTR_MAGIC)) {
                 ext4_set_inode_state(inode, EXT4_STATE_XATTR);
                 ext4_find_inline_data_nolock(inode);
         } else
@@ -4434,6 +4544,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
         struct inode *inode;
         journal_t *journal = EXT4_SB(sb)->s_journal;
         long ret;
+       loff_t size;
         int block;
         uid_t i_uid;
         gid_t i_gid;
@@ -4456,10 +4567,12 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
         if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
                 ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
                 if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
-                   EXT4_INODE_SIZE(inode->i_sb)) {
-                       EXT4_ERROR_INODE(inode, "bad extra_isize (%u != %u)",
-                               EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize,
-                               EXT4_INODE_SIZE(inode->i_sb));
+                       EXT4_INODE_SIZE(inode->i_sb) ||
+                   (ei->i_extra_isize & 3)) {
+                       EXT4_ERROR_INODE(inode,
+                                        "bad extra_isize %u (inode size %u)",
+                                        ei->i_extra_isize,
+                                        EXT4_INODE_SIZE(inode->i_sb));
                         ret = -EFSCORRUPTED;
                         goto bad_inode;
                 }
@@ -4534,6 +4647,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                 ei->i_file_acl |=
                         ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
         inode->i_size = ext4_isize(raw_inode);
+       if ((size = i_size_read(inode)) < 0) {
+               EXT4_ERROR_INODE(inode, "bad i_size value: %lld", size);
+               ret = -EFSCORRUPTED;
+               goto bad_inode;
+       }
         ei->i_disksize = inode->i_size;
  #ifdef CONFIG_QUOTA
         ei->i_reserved_quota = 0;
@@ -4577,6 +4695,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
         if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
                 if (ei->i_extra_isize == 0) {
                         /* The extra space is currently unused. Use it. */
+                       BUILD_BUG_ON(sizeof(struct ext4_inode) & 3);
                         ei->i_extra_isize = sizeof(struct ext4_inode) -
                                             EXT4_GOOD_OLD_INODE_SIZE;
                 } else {
@@ -5154,7 +5273,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
                          * update c/mtime in shrink case below
                          */
                         if (!shrink) {
-                               inode->i_mtime = ext4_current_time(inode);
+                               inode->i_mtime = current_time(inode);
                                 inode->i_ctime = inode->i_mtime;
                         }
                         down_write(&EXT4_I(inode)->i_data_sem);
@@ -5199,12 +5318,15 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
                  * in data=journal mode to make pages freeable.
                  */
                 truncate_pagecache(inode, inode->i_size);
-               if (shrink)
-                       ext4_truncate(inode);
+               if (shrink) {
+                       rc = ext4_truncate(inode);
+                       if (rc)
+                               error = rc;
+               }
                 up_write(&EXT4_I(inode)->i_mmap_sem);
         }
  
-       if (!rc) {
+       if (!error) {
                 setattr_copy(inode, attr);
                 mark_inode_dirty(inode);
         }
@@ -5216,7 +5338,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
         if (orphan && inode->i_nlink)
                 ext4_orphan_del(NULL, inode);
  
-       if (!rc && (ia_valid & ATTR_MODE))
+       if (!error && (ia_valid & ATTR_MODE))
                 rc = posix_acl_chmod(inode, inode->i_mode);
  
  err_out:
@@ -5455,18 +5577,20 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
         err = ext4_reserve_inode_write(handle, inode, &iloc);
         if (err)
                 return err;
-       if (ext4_handle_valid(handle) &&
-           EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
+       if (EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
             !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) {
                 /*
-                * We need extra buffer credits since we may write into EA block
+                * In nojournal mode, we can immediately attempt to expand
+                * the inode.  When journaled, we first need to obtain extra
+                * buffer credits since we may write into the EA block
                  * with this same handle. If journal_extend fails, then it will
                  * only result in a minor loss of functionality for that inode.
                  * If this is felt to be critical, then e2fsck should be run to
                  * force a large enough s_min_extra_isize.
                  */
-               if ((jbd2_journal_extend(handle,
-                            EXT4_DATA_TRANS_BLOCKS(inode->i_sb))) == 0) {
+               if (!ext4_handle_valid(handle) ||
+                   jbd2_journal_extend(handle,
+                            EXT4_DATA_TRANS_BLOCKS(inode->i_sb)) == 0) {
                         ret = ext4_expand_extra_isize(inode,
                                                       sbi->s_want_extra_isize,
                                                       iloc, handle);
@@ -5620,6 +5744,11 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
                 ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
         }
         ext4_set_aops(inode);
+       /*
+        * Update inode->i_flags after EXT4_INODE_JOURNAL_DATA was updated.
+        * E.g. S_DAX may get cleared / set.
+        */
+       ext4_set_inode_flags(inode);
  
         jbd2_journal_unlock_updates(journal);
         percpu_up_write(&sbi->s_journal_flag_rwsem);