ext4: add shutdown bit and check for it

[mirror_ubuntu-artful-kernel.git] / fs / ext4 / inode.c
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c

index c6ea25a190f88b09354e93cb7245586af2dbc87a..bc282f9d0969355b865c20c1488776029bb20111 100644 (file)
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -37,6 +37,7 @@
  #include <linux/printk.h>
  #include <linux/slab.h>
  #include <linux/bitops.h>
+#include <linux/iomap.h>
  
  #include "ext4_jbd2.h"
  #include "xattr.h"
@@ -71,10 +72,9 @@ static __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw,
                         csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum,
                                            csum_size);
                         offset += csum_size;
-                       csum = ext4_chksum(sbi, csum, (__u8 *)raw + offset,
-                                          EXT4_INODE_SIZE(inode->i_sb) -
-                                          offset);
                 }
+               csum = ext4_chksum(sbi, csum, (__u8 *)raw + offset,
+                                  EXT4_INODE_SIZE(inode->i_sb) - offset);
         }
  
         return csum;
@@ -261,8 +261,15 @@ void ext4_evict_inode(struct inode *inode)
                              "couldn't mark inode dirty (err %d)", err);
                 goto stop_handle;
         }
-       if (inode->i_blocks)
-               ext4_truncate(inode);
+       if (inode->i_blocks) {
+               err = ext4_truncate(inode);
+               if (err) {
+                       ext4_error(inode->i_sb,
+                                  "couldn't truncate inode %lu (err %d)",
+                                  inode->i_ino, err);
+                       goto stop_handle;
+               }
+       }
  
         /*
          * ext4_ext_truncate() doesn't reserve any slop when it
@@ -647,11 +654,15 @@ found:
                 /*
                  * We have to zeroout blocks before inserting them into extent
                  * status tree. Otherwise someone could look them up there and
-                * use them before they are really zeroed.
+                * use them before they are really zeroed. We also have to
+                * unmap metadata before zeroing as otherwise writeback can
+                * overwrite zeros with stale data from block device.
                  */
                 if (flags & EXT4_GET_BLOCKS_ZERO &&
                     map->m_flags & EXT4_MAP_MAPPED &&
                     map->m_flags & EXT4_MAP_NEW) {
+                       clean_bdev_aliases(inode->i_sb->s_bdev, map->m_pblk,
+                                          map->m_len);
                         ret = ext4_issue_zeroout(inode, map->m_lblk,
                                                  map->m_pblk, map->m_len);
                         if (ret) {
@@ -759,6 +770,9 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
                 ext4_update_bh_state(bh, map.m_flags);
                 bh->b_size = inode->i_sb->s_blocksize * map.m_len;
                 ret = 0;
+       } else if (ret == 0) {
+               /* hole case, need to fill in bh->b_size */
+               bh->b_size = inode->i_sb->s_blocksize * map.m_len;
         }
         return ret;
  }
@@ -1119,8 +1133,7 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
                         if (err)
                                 break;
                         if (buffer_new(bh)) {
-                               unmap_underlying_metadata(bh->b_bdev,
-                                                         bh->b_blocknr);
+                               clean_bdev_bh_alias(bh);
                                 if (PageUptodate(page)) {
                                         clear_buffer_new(bh);
                                         set_buffer_uptodate(bh);
@@ -1158,7 +1171,8 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
         if (unlikely(err))
                 page_zero_new_buffers(page, from, to);
         else if (decrypt)
-               err = fscrypt_decrypt_page(page);
+               err = fscrypt_decrypt_page(page->mapping->host, page,
+                               PAGE_SIZE, 0, page->index);
         return err;
  }
  #endif
@@ -1175,6 +1189,9 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
         pgoff_t index;
         unsigned from, to;
  
+       if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+               return -EIO;
+
         trace_ext4_write_begin(inode, pos, len, flags);
         /*
          * Reserve one block more for addition to orphan list in case
@@ -1316,8 +1333,11 @@ static int ext4_write_end(struct file *file,
         if (ext4_has_inline_data(inode)) {
                 ret = ext4_write_inline_data_end(inode, pos, len,
                                                  copied, page);
-               if (ret < 0)
+               if (ret < 0) {
+                       unlock_page(page);
+                       put_page(page);
                         goto errout;
+               }
                 copied = ret;
         } else
                 copied = block_write_end(file, mapping, pos,
@@ -1371,7 +1391,9 @@ errout:
   * set the buffer to be dirty, since in data=journalled mode we need
   * to call ext4_handle_dirty_metadata() instead.
   */
-static void zero_new_buffers(struct page *page, unsigned from, unsigned to)
+static void ext4_journalled_zero_new_buffers(handle_t *handle,
+                                           struct page *page,
+                                           unsigned from, unsigned to)
  {
         unsigned int block_start = 0, block_end;
         struct buffer_head *head, *bh;
@@ -1388,7 +1410,7 @@ static void zero_new_buffers(struct page *page, unsigned from, unsigned to)
                                         size = min(to, block_end) - start;
  
                                         zero_user(page, start, size);
-                                       set_buffer_uptodate(bh);
+                                       write_end_fn(handle, bh);
                                 }
                                 clear_buffer_new(bh);
                         }
@@ -1417,18 +1439,25 @@ static int ext4_journalled_write_end(struct file *file,
  
         BUG_ON(!ext4_handle_valid(handle));
  
-       if (ext4_has_inline_data(inode))
-               copied = ext4_write_inline_data_end(inode, pos, len,
-                                                   copied, page);
-       else {
-               if (copied < len) {
-                       if (!PageUptodate(page))
-                               copied = 0;
-                       zero_new_buffers(page, from+copied, to);
+       if (ext4_has_inline_data(inode)) {
+               ret = ext4_write_inline_data_end(inode, pos, len,
+                                                copied, page);
+               if (ret < 0) {
+                       unlock_page(page);
+                       put_page(page);
+                       goto errout;
                 }
-
+               copied = ret;
+       } else if (unlikely(copied < len) && !PageUptodate(page)) {
+               copied = 0;
+               ext4_journalled_zero_new_buffers(handle, page, from, to);
+       } else {
+               if (unlikely(copied < len))
+                       ext4_journalled_zero_new_buffers(handle, page,
+                                                        from + copied, to);
                 ret = ext4_walk_page_buffers(handle, page_buffers(page), from,
-                                            to, &partial, write_end_fn);
+                                            from + copied, &partial,
+                                            write_end_fn);
                 if (!partial)
                         SetPageUptodate(page);
         }
@@ -1454,6 +1483,7 @@ static int ext4_journalled_write_end(struct file *file,
                  */
                 ext4_orphan_add(handle, inode);
  
+errout:
         ret2 = ext4_journal_stop(handle);
         if (!ret)
                 ret = ret2;
@@ -1649,6 +1679,8 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd,
                         BUG_ON(!PageLocked(page));
                         BUG_ON(PageWriteback(page));
                         if (invalidate) {
+                               if (page_mapped(page))
+                                       clear_page_dirty_for_io(page);
                                 block_invalidatepage(page, 0, PAGE_SIZE);
                                 ClearPageUptodate(page);
                         }
@@ -2018,6 +2050,12 @@ static int ext4_writepage(struct page *page,
         struct ext4_io_submit io_submit;
         bool keep_towrite = false;
  
+       if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) {
+               ext4_invalidatepage(page, 0, PAGE_SIZE);
+               unlock_page(page);
+               return -EIO;
+       }
+
         trace_ext4_writepage(page);
         size = i_size_read(inode);
         if (page->index == size >> PAGE_SHIFT)
@@ -2350,11 +2388,8 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
  
         BUG_ON(map->m_len == 0);
         if (map->m_flags & EXT4_MAP_NEW) {
-               struct block_device *bdev = inode->i_sb->s_bdev;
-               int i;
-
-               for (i = 0; i < map->m_len; i++)
-                       unmap_underlying_metadata(bdev, map->m_pblk + i);
+               clean_bdev_aliases(inode->i_sb->s_bdev, map->m_pblk,
+                                  map->m_len);
         }
         return 0;
  }
@@ -2396,7 +2431,8 @@ static int mpage_map_and_submit_extent(handle_t *handle,
                 if (err < 0) {
                         struct super_block *sb = inode->i_sb;
  
-                       if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)
+                       if (ext4_forced_shutdown(EXT4_SB(sb)) ||
+                           EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)
                                 goto invalidate_dirty_pages;
                         /*
                          * Let the uper layers retry transient errors.
@@ -2451,8 +2487,8 @@ update_disksize:
                         disksize = i_size;
                 if (disksize > EXT4_I(inode)->i_disksize)
                         EXT4_I(inode)->i_disksize = disksize;
-               err2 = ext4_mark_inode_dirty(handle, inode);
                 up_write(&EXT4_I(inode)->i_data_sem);
+               err2 = ext4_mark_inode_dirty(handle, inode);
                 if (err2)
                         ext4_error(inode->i_sb,
                                    "Failed to mark inode %lu dirty",
@@ -2618,6 +2654,9 @@ static int ext4_writepages(struct address_space *mapping,
         struct blk_plug plug;
         bool give_up_on_write = false;
  
+       if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+               return -EIO;
+
         percpu_down_read(&sbi->s_journal_flag_rwsem);
         trace_ext4_writepages(inode, wbc);
  
@@ -2654,7 +2693,8 @@ static int ext4_writepages(struct address_space *mapping,
          * *never* be called, so if that ever happens, we would want
          * the stack trace.
          */
-       if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) {
+       if (unlikely(ext4_forced_shutdown(EXT4_SB(mapping->host->i_sb)) ||
+                    sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) {
                 ret = -EROFS;
                 goto out_writepages;
         }
@@ -2879,9 +2919,13 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
         struct inode *inode = mapping->host;
         handle_t *handle;
  
+       if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+               return -EIO;
+
         index = pos >> PAGE_SHIFT;
  
-       if (ext4_nonda_switch(inode->i_sb)) {
+       if (ext4_nonda_switch(inode->i_sb) ||
+           S_ISLNK(inode->i_mode)) {
                 *fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
                 return ext4_write_begin(file, mapping, pos,
                                         len, flags, pagep, fsdata);
@@ -3258,53 +3302,159 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
  }
  
  #ifdef CONFIG_FS_DAX
-/*
- * Get block function for DAX IO and mmap faults. It takes care of converting
- * unwritten extents to written ones and initializes new / converted blocks
- * to zeros.
- */
-int ext4_dax_get_block(struct inode *inode, sector_t iblock,
-                      struct buffer_head *bh_result, int create)
+static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
+                           unsigned flags, struct iomap *iomap)
  {
+       unsigned int blkbits = inode->i_blkbits;
+       unsigned long first_block = offset >> blkbits;
+       unsigned long last_block = (offset + length - 1) >> blkbits;
+       struct ext4_map_blocks map;
         int ret;
  
-       ext4_debug("inode %lu, create flag %d\n", inode->i_ino, create);
-       if (!create)
-               return _ext4_get_block(inode, iblock, bh_result, 0);
+       if (WARN_ON_ONCE(ext4_has_inline_data(inode)))
+               return -ERANGE;
  
-       ret = ext4_get_block_trans(inode, iblock, bh_result,
-                                  EXT4_GET_BLOCKS_PRE_IO |
-                                  EXT4_GET_BLOCKS_CREATE_ZERO);
-       if (ret < 0)
-               return ret;
+       map.m_lblk = first_block;
+       map.m_len = last_block - first_block + 1;
+
+       if (!(flags & IOMAP_WRITE)) {
+               ret = ext4_map_blocks(NULL, inode, &map, 0);
+       } else {
+               int dio_credits;
+               handle_t *handle;
+               int retries = 0;
  
-       if (buffer_unwritten(bh_result)) {
+               /* Trim mapping request to maximum we can map at once for DIO */
+               if (map.m_len > DIO_MAX_BLOCKS)
+                       map.m_len = DIO_MAX_BLOCKS;
+               dio_credits = ext4_chunk_trans_blocks(inode, map.m_len);
+retry:
                 /*
-                * We are protected by i_mmap_sem or i_mutex so we know block
-                * cannot go away from under us even though we dropped
-                * i_data_sem. Convert extent to written and write zeros there.
+                * Either we allocate blocks and then we don't get unwritten
+                * extent so we have reserved enough credits, or the blocks
+                * are already allocated and unwritten and in that case
+                * extent conversion fits in the credits as well.
                  */
-               ret = ext4_get_block_trans(inode, iblock, bh_result,
-                                          EXT4_GET_BLOCKS_CONVERT |
-                                          EXT4_GET_BLOCKS_CREATE_ZERO);
-               if (ret < 0)
+               handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
+                                           dio_credits);
+               if (IS_ERR(handle))
+                       return PTR_ERR(handle);
+
+               ret = ext4_map_blocks(handle, inode, &map,
+                                     EXT4_GET_BLOCKS_CREATE_ZERO);
+               if (ret < 0) {
+                       ext4_journal_stop(handle);
+                       if (ret == -ENOSPC &&
+                           ext4_should_retry_alloc(inode->i_sb, &retries))
+                               goto retry;
                         return ret;
+               }
+
+               /*
+                * If we added blocks beyond i_size, we need to make sure they
+                * will get truncated if we crash before updating i_size in
+                * ext4_iomap_end(). For faults we don't need to do that (and
+                * even cannot because for orphan list operations inode_lock is
+                * required) - if we happen to instantiate block beyond i_size,
+                * it is because we race with truncate which has already added
+                * the inode to the orphan list.
+                */
+               if (!(flags & IOMAP_FAULT) && first_block + map.m_len >
+                   (i_size_read(inode) + (1 << blkbits) - 1) >> blkbits) {
+                       int err;
+
+                       err = ext4_orphan_add(handle, inode);
+                       if (err < 0) {
+                               ext4_journal_stop(handle);
+                               return err;
+                       }
+               }
+               ext4_journal_stop(handle);
         }
-       /*
-        * At least for now we have to clear BH_New so that DAX code
-        * doesn't attempt to zero blocks again in a racy way.
-        */
-       clear_buffer_new(bh_result);
+
+       iomap->flags = 0;
+       iomap->bdev = inode->i_sb->s_bdev;
+       iomap->offset = first_block << blkbits;
+
+       if (ret == 0) {
+               iomap->type = IOMAP_HOLE;
+               iomap->blkno = IOMAP_NULL_BLOCK;
+               iomap->length = (u64)map.m_len << blkbits;
+       } else {
+               if (map.m_flags & EXT4_MAP_MAPPED) {
+                       iomap->type = IOMAP_MAPPED;
+               } else if (map.m_flags & EXT4_MAP_UNWRITTEN) {
+                       iomap->type = IOMAP_UNWRITTEN;
+               } else {
+                       WARN_ON_ONCE(1);
+                       return -EIO;
+               }
+               iomap->blkno = (sector_t)map.m_pblk << (blkbits - 9);
+               iomap->length = (u64)map.m_len << blkbits;
+       }
+
+       if (map.m_flags & EXT4_MAP_NEW)
+               iomap->flags |= IOMAP_F_NEW;
         return 0;
  }
-#else
-/* Just define empty function, it will never get called. */
-int ext4_dax_get_block(struct inode *inode, sector_t iblock,
-                      struct buffer_head *bh_result, int create)
+
+static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length,
+                         ssize_t written, unsigned flags, struct iomap *iomap)
  {
-       BUG();
-       return 0;
+       int ret = 0;
+       handle_t *handle;
+       int blkbits = inode->i_blkbits;
+       bool truncate = false;
+
+       if (!(flags & IOMAP_WRITE) || (flags & IOMAP_FAULT))
+               return 0;
+
+       handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
+       if (IS_ERR(handle)) {
+               ret = PTR_ERR(handle);
+               goto orphan_del;
+       }
+       if (ext4_update_inode_size(inode, offset + written))
+               ext4_mark_inode_dirty(handle, inode);
+       /*
+        * We may need to truncate allocated but not written blocks beyond EOF.
+        */
+       if (iomap->offset + iomap->length > 
+           ALIGN(inode->i_size, 1 << blkbits)) {
+               ext4_lblk_t written_blk, end_blk;
+
+               written_blk = (offset + written) >> blkbits;
+               end_blk = (offset + length) >> blkbits;
+               if (written_blk < end_blk && ext4_can_truncate(inode))
+                       truncate = true;
+       }
+       /*
+        * Remove inode from orphan list if we were extending a inode and
+        * everything went fine.
+        */
+       if (!truncate && inode->i_nlink &&
+           !list_empty(&EXT4_I(inode)->i_orphan))
+               ext4_orphan_del(handle, inode);
+       ext4_journal_stop(handle);
+       if (truncate) {
+               ext4_truncate_failed_write(inode);
+orphan_del:
+               /*
+                * If truncate failed early the inode might still be on the
+                * orphan list; we need to make sure the inode is removed from
+                * the orphan list in that case.
+                */
+               if (inode->i_nlink)
+                       ext4_orphan_del(NULL, inode);
+       }
+       return ret;
  }
+
+struct iomap_ops ext4_iomap_ops = {
+       .iomap_begin            = ext4_iomap_begin,
+       .iomap_end              = ext4_iomap_end,
+};
+
  #endif
  
  static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
@@ -3426,19 +3576,7 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter)
         iocb->private = NULL;
         if (overwrite)
                 get_block_func = ext4_dio_get_block_overwrite;
-       else if (IS_DAX(inode)) {
-               /*
-                * We can avoid zeroing for aligned DAX writes beyond EOF. Other
-                * writes need zeroing either because they can race with page
-                * faults or because they use partial blocks.
-                */
-               if (round_down(offset, 1<<inode->i_blkbits) >= inode->i_size &&
-                   ext4_aligned_io(inode, offset, count))
-                       get_block_func = ext4_dio_get_block;
-               else
-                       get_block_func = ext4_dax_get_block;
-               dio_flags = DIO_LOCKING;
-       } else if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) ||
+       else if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) ||
                    round_down(offset, 1 << inode->i_blkbits) >= inode->i_size) {
                 get_block_func = ext4_dio_get_block;
                 dio_flags = DIO_LOCKING | DIO_SKIP_HOLES;
@@ -3452,14 +3590,9 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter)
  #ifdef CONFIG_EXT4_FS_ENCRYPTION
         BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode));
  #endif
-       if (IS_DAX(inode)) {
-               ret = dax_do_io(iocb, inode, iter, get_block_func,
-                               ext4_end_io_dio, dio_flags);
-       } else
-               ret = __blockdev_direct_IO(iocb, inode,
-                                          inode->i_sb->s_bdev, iter,
-                                          get_block_func,
-                                          ext4_end_io_dio, NULL, dio_flags);
+       ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
+                                  get_block_func, ext4_end_io_dio, NULL,
+                                  dio_flags);
  
         if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
                                                 EXT4_STATE_DIO_UNWRITTEN)) {
@@ -3526,35 +3659,25 @@ out:
  
  static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter)
  {
-       int unlocked = 0;
-       struct inode *inode = iocb->ki_filp->f_mapping->host;
+       struct address_space *mapping = iocb->ki_filp->f_mapping;
+       struct inode *inode = mapping->host;
+       size_t count = iov_iter_count(iter);
         ssize_t ret;
  
-       if (ext4_should_dioread_nolock(inode)) {
-               /*
-                * Nolock dioread optimization may be dynamically disabled
-                * via ext4_inode_block_unlocked_dio(). Check inode's state
-                * while holding extra i_dio_count ref.
-                */
-               inode_dio_begin(inode);
-               smp_mb();
-               if (unlikely(ext4_test_inode_state(inode,
-                                                   EXT4_STATE_DIOREAD_LOCK)))
-                       inode_dio_end(inode);
-               else
-                       unlocked = 1;
-       }
-       if (IS_DAX(inode)) {
-               ret = dax_do_io(iocb, inode, iter, ext4_dio_get_block,
-                               NULL, unlocked ? 0 : DIO_LOCKING);
-       } else {
-               ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
-                                          iter, ext4_dio_get_block,
-                                          NULL, NULL,
-                                          unlocked ? 0 : DIO_LOCKING);
-       }
-       if (unlocked)
-               inode_dio_end(inode);
+       /*
+        * Shared inode_lock is enough for us - it protects against concurrent
+        * writes & truncates and since we take care of writing back page cache,
+        * we are protected against page writeback as well.
+        */
+       inode_lock_shared(inode);
+       ret = filemap_write_and_wait_range(mapping, iocb->ki_pos,
+                                          iocb->ki_pos + count);
+       if (ret)
+               goto out_unlock;
+       ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
+                                  iter, ext4_dio_get_block, NULL, NULL, 0);
+out_unlock:
+       inode_unlock_shared(inode);
         return ret;
  }
  
@@ -3581,6 +3704,10 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
         if (ext4_has_inline_data(inode))
                 return 0;
  
+       /* DAX uses iomap path now */
+       if (WARN_ON_ONCE(IS_DAX(inode)))
+               return 0;
+
         trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
         if (iov_iter_rw(iter) == READ)
                 ret = ext4_direct_IO_read(iocb, iter);
@@ -3609,6 +3736,13 @@ static int ext4_journalled_set_page_dirty(struct page *page)
         return __set_page_dirty_nobuffers(page);
  }
  
+static int ext4_set_page_dirty(struct page *page)
+{
+       WARN_ON_ONCE(!PageLocked(page) && !PageDirty(page));
+       WARN_ON_ONCE(!page_has_buffers(page));
+       return __set_page_dirty_buffers(page);
+}
+
  static const struct address_space_operations ext4_aops = {
         .readpage               = ext4_readpage,
         .readpages              = ext4_readpages,
@@ -3616,6 +3750,7 @@ static const struct address_space_operations ext4_aops = {
         .writepages             = ext4_writepages,
         .write_begin            = ext4_write_begin,
         .write_end              = ext4_write_end,
+       .set_page_dirty         = ext4_set_page_dirty,
         .bmap                   = ext4_bmap,
         .invalidatepage         = ext4_invalidatepage,
         .releasepage            = ext4_releasepage,
@@ -3648,6 +3783,7 @@ static const struct address_space_operations ext4_da_aops = {
         .writepages             = ext4_writepages,
         .write_begin            = ext4_da_write_begin,
         .write_end              = ext4_da_write_end,
+       .set_page_dirty         = ext4_set_page_dirty,
         .bmap                   = ext4_bmap,
         .invalidatepage         = ext4_da_invalidatepage,
         .releasepage            = ext4_releasepage,
@@ -3737,7 +3873,8 @@ static int __ext4_block_zero_page_range(handle_t *handle,
                         /* We expect the key to be set. */
                         BUG_ON(!fscrypt_has_encryption_key(inode));
                         BUG_ON(blocksize != PAGE_SIZE);
-                       WARN_ON_ONCE(fscrypt_decrypt_page(page));
+                       WARN_ON_ONCE(fscrypt_decrypt_page(page->mapping->host,
+                                               page, PAGE_SIZE, 0, page->index));
                 }
         }
         if (ext4_should_journal_data(inode)) {
@@ -3786,8 +3923,10 @@ static int ext4_block_zero_page_range(handle_t *handle,
         if (length > max || length < 0)
                 length = max;
  
-       if (IS_DAX(inode))
-               return dax_zero_page_range(inode, from, length, ext4_get_block);
+       if (IS_DAX(inode)) {
+               return iomap_zero_range(inode, from, length, NULL,
+                                       &ext4_iomap_ops);
+       }
         return __ext4_block_zero_page_range(handle, mapping, from, length);
  }
  
@@ -3890,7 +4029,7 @@ int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
  }
  
  /*
- * ext4_punch_hole: punches a hole in a file by releaseing the blocks
+ * ext4_punch_hole: punches a hole in a file by releasing the blocks
   * associated with the given offset and length
   *
   * @inode:  File inode
@@ -3919,7 +4058,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
          * Write out all dirty pages to avoid race conditions
          * Then release them.
          */
-       if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
+       if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
                 ret = filemap_write_and_wait_range(mapping, offset,
                                                    offset + length - 1);
                 if (ret)
@@ -4020,7 +4159,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
         if (IS_SYNC(inode))
                 ext4_handle_sync(handle);
  
-       inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
+       inode->i_mtime = inode->i_ctime = current_time(inode);
         ext4_mark_inode_dirty(handle, inode);
  out_stop:
         ext4_journal_stop(handle);
@@ -4085,10 +4224,11 @@ int ext4_inode_attach_jinode(struct inode *inode)
   * that's fine - as long as they are linked from the inode, the post-crash
   * ext4_truncate() run will find them and release them.
   */
-void ext4_truncate(struct inode *inode)
+int ext4_truncate(struct inode *inode)
  {
         struct ext4_inode_info *ei = EXT4_I(inode);
         unsigned int credits;
+       int err = 0;
         handle_t *handle;
         struct address_space *mapping = inode->i_mapping;
  
@@ -4102,7 +4242,7 @@ void ext4_truncate(struct inode *inode)
         trace_ext4_truncate_enter(inode);
  
         if (!ext4_can_truncate(inode))
-               return;
+               return 0;
  
         ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
  
@@ -4112,15 +4252,17 @@ void ext4_truncate(struct inode *inode)
         if (ext4_has_inline_data(inode)) {
                 int has_inline = 1;
  
-               ext4_inline_data_truncate(inode, &has_inline);
+               err = ext4_inline_data_truncate(inode, &has_inline);
+               if (err)
+                       return err;
                 if (has_inline)
-                       return;
+                       return 0;
         }
  
         /* If we zero-out tail of the page, we have to create jinode for jbd2 */
         if (inode->i_size & (inode->i_sb->s_blocksize - 1)) {
                 if (ext4_inode_attach_jinode(inode) < 0)
-                       return;
+                       return 0;
         }
  
         if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
@@ -4129,10 +4271,8 @@ void ext4_truncate(struct inode *inode)
                 credits = ext4_blocks_for_truncate(inode);
  
         handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
-       if (IS_ERR(handle)) {
-               ext4_std_error(inode->i_sb, PTR_ERR(handle));
-               return;
-       }
+       if (IS_ERR(handle))
+               return PTR_ERR(handle);
  
         if (inode->i_size & (inode->i_sb->s_blocksize - 1))
                 ext4_block_truncate_page(handle, mapping, inode->i_size);
@@ -4146,7 +4286,8 @@ void ext4_truncate(struct inode *inode)
          * Implication: the file must always be in a sane, consistent
          * truncatable state while each transaction commits.
          */
-       if (ext4_orphan_add(handle, inode))
+       err = ext4_orphan_add(handle, inode);
+       if (err)
                 goto out_stop;
  
         down_write(&EXT4_I(inode)->i_data_sem);
@@ -4154,11 +4295,13 @@ void ext4_truncate(struct inode *inode)
         ext4_discard_preallocations(inode);
  
         if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
-               ext4_ext_truncate(handle, inode);
+               err = ext4_ext_truncate(handle, inode);
         else
                 ext4_ind_truncate(handle, inode);
  
         up_write(&ei->i_data_sem);
+       if (err)
+               goto out_stop;
  
         if (IS_SYNC(inode))
                 ext4_handle_sync(handle);
@@ -4174,11 +4317,12 @@ out_stop:
         if (inode->i_nlink)
                 ext4_orphan_del(handle, inode);
  
-       inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
+       inode->i_mtime = inode->i_ctime = current_time(inode);
         ext4_mark_inode_dirty(handle, inode);
         ext4_journal_stop(handle);
  
         trace_ext4_truncate_exit(inode);
+       return err;
  }
  
  /*
@@ -4346,7 +4490,9 @@ void ext4_set_inode_flags(struct inode *inode)
                 new_fl |= S_NOATIME;
         if (flags & EXT4_DIRSYNC_FL)
                 new_fl |= S_DIRSYNC;
-       if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode))
+       if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode) &&
+           !ext4_should_journal_data(inode) && !ext4_has_inline_data(inode) &&
+           !ext4_encrypted_inode(inode))
                 new_fl |= S_DAX;
         inode_set_flags(inode, new_fl,
                         S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX);
@@ -4405,7 +4551,9 @@ static inline void ext4_iget_extra_inode(struct inode *inode,
  {
         __le32 *magic = (void *)raw_inode +
                         EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize;
-       if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) {
+       if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize + sizeof(__le32) <=
+           EXT4_INODE_SIZE(inode->i_sb) &&
+           *magic == cpu_to_le32(EXT4_XATTR_MAGIC)) {
                 ext4_set_inode_state(inode, EXT4_STATE_XATTR);
                 ext4_find_inline_data_nolock(inode);
         } else
@@ -4414,7 +4562,7 @@ static inline void ext4_iget_extra_inode(struct inode *inode,
  
  int ext4_get_projid(struct inode *inode, kprojid_t *projid)
  {
-       if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, EXT4_FEATURE_RO_COMPAT_PROJECT))
+       if (!ext4_has_feature_project(inode->i_sb))
                 return -EOPNOTSUPP;
         *projid = EXT4_I(inode)->i_projid;
         return 0;
@@ -4428,6 +4576,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
         struct inode *inode;
         journal_t *journal = EXT4_SB(sb)->s_journal;
         long ret;
+       loff_t size;
         int block;
         uid_t i_uid;
         gid_t i_gid;
@@ -4450,10 +4599,12 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
         if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
                 ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
                 if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
-                   EXT4_INODE_SIZE(inode->i_sb)) {
-                       EXT4_ERROR_INODE(inode, "bad extra_isize (%u != %u)",
-                               EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize,
-                               EXT4_INODE_SIZE(inode->i_sb));
+                       EXT4_INODE_SIZE(inode->i_sb) ||
+                   (ei->i_extra_isize & 3)) {
+                       EXT4_ERROR_INODE(inode,
+                                        "bad extra_isize %u (inode size %u)",
+                                        ei->i_extra_isize,
+                                        EXT4_INODE_SIZE(inode->i_sb));
                         ret = -EFSCORRUPTED;
                         goto bad_inode;
                 }
@@ -4481,7 +4632,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
         inode->i_mode = le16_to_cpu(raw_inode->i_mode);
         i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
         i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
-       if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_PROJECT) &&
+       if (ext4_has_feature_project(sb) &&
             EXT4_INODE_SIZE(sb) > EXT4_GOOD_OLD_INODE_SIZE &&
             EXT4_FITS_IN_INODE(raw_inode, ei, i_projid))
                 i_projid = (projid_t)le32_to_cpu(raw_inode->i_projid);
@@ -4528,6 +4679,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                 ei->i_file_acl |=
                         ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
         inode->i_size = ext4_isize(raw_inode);
+       if ((size = i_size_read(inode)) < 0) {
+               EXT4_ERROR_INODE(inode, "bad i_size value: %lld", size);
+               ret = -EFSCORRUPTED;
+               goto bad_inode;
+       }
         ei->i_disksize = inode->i_size;
  #ifdef CONFIG_QUOTA
         ei->i_reserved_quota = 0;
@@ -4571,6 +4727,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
         if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
                 if (ei->i_extra_isize == 0) {
                         /* The extra space is currently unused. Use it. */
+                       BUILD_BUG_ON(sizeof(struct ext4_inode) & 3);
                         ei->i_extra_isize = sizeof(struct ext4_inode) -
                                             EXT4_GOOD_OLD_INODE_SIZE;
                 } else {
@@ -4814,14 +4971,14 @@ static int ext4_do_update_inode(handle_t *handle,
   * Fix up interoperability with old kernels. Otherwise, old inodes get
   * re-used with the upper 16 bits of the uid/gid intact
   */
-               if (!ei->i_dtime) {
+               if (ei->i_dtime && list_empty(&ei->i_orphan)) {
+                       raw_inode->i_uid_high = 0;
+                       raw_inode->i_gid_high = 0;
+               } else {
                         raw_inode->i_uid_high =
                                 cpu_to_le16(high_16_bits(i_uid));
                         raw_inode->i_gid_high =
                                 cpu_to_le16(high_16_bits(i_gid));
-               } else {
-                       raw_inode->i_uid_high = 0;
-                       raw_inode->i_gid_high = 0;
                 }
         } else {
                 raw_inode->i_uid_low = cpu_to_le16(fs_high2lowuid(i_uid));
@@ -4885,8 +5042,7 @@ static int ext4_do_update_inode(handle_t *handle,
                 }
         }
  
-       BUG_ON(!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
-                       EXT4_FEATURE_RO_COMPAT_PROJECT) &&
+       BUG_ON(!ext4_has_feature_project(inode->i_sb) &&
                i_projid != EXT4_DEF_PROJID);
  
         if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
@@ -5073,7 +5229,10 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
         int orphan = 0;
         const unsigned int ia_valid = attr->ia_valid;
  
-       error = inode_change_ok(inode, attr);
+       if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+               return -EIO;
+
+       error = setattr_prepare(dentry, attr);
         if (error)
                 return error;
  
@@ -5149,7 +5308,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
                          * update c/mtime in shrink case below
                          */
                         if (!shrink) {
-                               inode->i_mtime = ext4_current_time(inode);
+                               inode->i_mtime = current_time(inode);
                                 inode->i_ctime = inode->i_mtime;
                         }
                         down_write(&EXT4_I(inode)->i_data_sem);
@@ -5194,12 +5353,15 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
                  * in data=journal mode to make pages freeable.
                  */
                 truncate_pagecache(inode, inode->i_size);
-               if (shrink)
-                       ext4_truncate(inode);
+               if (shrink) {
+                       rc = ext4_truncate(inode);
+                       if (rc)
+                               error = rc;
+               }
                 up_write(&EXT4_I(inode)->i_mmap_sem);
         }
  
-       if (!rc) {
+       if (!error) {
                 setattr_copy(inode, attr);
                 mark_inode_dirty(inode);
         }
@@ -5211,7 +5373,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
         if (orphan && inode->i_nlink)
                 ext4_orphan_del(NULL, inode);
  
-       if (!rc && (ia_valid & ATTR_MODE))
+       if (!error && (ia_valid & ATTR_MODE))
                 rc = posix_acl_chmod(inode, inode->i_mode);
  
  err_out:
@@ -5356,6 +5518,9 @@ int ext4_mark_iloc_dirty(handle_t *handle,
  {
         int err = 0;
  
+       if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+               return -EIO;
+
         if (IS_I_VERSION(inode))
                 inode_inc_iversion(inode);
  
@@ -5379,6 +5544,9 @@ ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
  {
         int err;
  
+       if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+               return -EIO;
+
         err = ext4_get_inode_loc(inode, iloc);
         if (!err) {
                 BUFFER_TRACE(iloc->bh, "get_write_access");
@@ -5450,18 +5618,20 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
         err = ext4_reserve_inode_write(handle, inode, &iloc);
         if (err)
                 return err;
-       if (ext4_handle_valid(handle) &&
-           EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
+       if (EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
             !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) {
                 /*
-                * We need extra buffer credits since we may write into EA block
+                * In nojournal mode, we can immediately attempt to expand
+                * the inode.  When journaled, we first need to obtain extra
+                * buffer credits since we may write into the EA block
                  * with this same handle. If journal_extend fails, then it will
                  * only result in a minor loss of functionality for that inode.
                  * If this is felt to be critical, then e2fsck should be run to
                  * force a large enough s_min_extra_isize.
                  */
-               if ((jbd2_journal_extend(handle,
-                            EXT4_DATA_TRANS_BLOCKS(inode->i_sb))) == 0) {
+               if (!ext4_handle_valid(handle) ||
+                   jbd2_journal_extend(handle,
+                            EXT4_DATA_TRANS_BLOCKS(inode->i_sb)) == 0) {
                         ret = ext4_expand_extra_isize(inode,
                                                       sbi->s_want_extra_isize,
                                                       iloc, handle);
@@ -5615,6 +5785,11 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
                 ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
         }
         ext4_set_aops(inode);
+       /*
+        * Update inode->i_flags after EXT4_INODE_JOURNAL_DATA was updated.
+        * E.g. S_DAX may get cleared / set.
+        */
+       ext4_set_inode_flags(inode);
  
         jbd2_journal_unlock_updates(journal);
         percpu_up_write(&sbi->s_journal_flag_rwsem);