Merge branch 'for-4.10/fs-unmap' of git://git.kernel.dk/linux-block

author Linus Torvalds <torvalds@linux-foundation.org>

Thu, 15 Dec 2016 01:09:00 +0000 (17:09 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Thu, 15 Dec 2016 01:09:00 +0000 (17:09 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Thu, 15 Dec 2016 01:09:00 +0000 (17:09 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Thu, 15 Dec 2016 01:09:00 +0000 (17:09 -0800)
diff --combined fs/buffer.c

index a3bfd57c2697e5093d4563623fc9bdee9e8900a1,1104ce8b45369b9dbf43fb1d7fa9e46be9da50c6..d21771fcf7d345ab4299cb7fa25881ffcc61ef52
--- 1/fs/buffer.c
--- 2/fs/buffer.c
+++ b/fs/buffer.c
@@@ -43,6 -43,7 +43,7 @@@
   #include <linux/bitops.h>
   #include <linux/mpage.h>
   #include <linux/bit_spinlock.h>
+ #include <linux/pagevec.h>
   #include <trace/events/block.h>
   
   static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
@@@ -1604,37 -1605,80 +1605,80 @@@ void create_empty_buffers(struct page *
   }
   EXPORT_SYMBOL(create_empty_buffers);
   
- /*
-  * We are taking a block for data and we don't want any output from any
-  * buffer-cache aliases starting from return from that function and
-  * until the moment when something will explicitly mark the buffer
-  * dirty (hopefully that will not happen until we will free that block ;-)
-  * We don't even need to mark it not-uptodate - nobody can expect
-  * anything from a newly allocated buffer anyway. We used to used
-  * unmap_buffer() for such invalidation, but that was wrong. We definitely
-  * don't want to mark the alias unmapped, for example - it would confuse
-  * anyone who might pick it with bread() afterwards...
+ /**
+  * clean_bdev_aliases: clean a range of buffers in block device
+  * @bdev: Block device to clean buffers in
+  * @block: Start of a range of blocks to clean
+  * @len: Number of blocks to clean
    *
-  * Also..  Note that bforget() doesn't lock the buffer.  So there can
-  * be writeout I/O going on against recently-freed buffers.  We don't
-  * wait on that I/O in bforget() - it's more efficient to wait on the I/O
-  * only if we really need to.  That happens here.
+  * We are taking a range of blocks for data and we don't want writeback of any
+  * buffer-cache aliases starting from return from this function and until the
+  * moment when something will explicitly mark the buffer dirty (hopefully that
+  * will not happen until we will free that block ;-) We don't even need to mark
+  * it not-uptodate - nobody can expect anything from a newly allocated buffer
+  * anyway. We used to use unmap_buffer() for such invalidation, but that was
+  * wrong. We definitely don't want to mark the alias unmapped, for example - it
+  * would confuse anyone who might pick it with bread() afterwards...
+  *
+  * Also..  Note that bforget() doesn't lock the buffer.  So there can be
+  * writeout I/O going on against recently-freed buffers.  We don't wait on that
+  * I/O in bforget() - it's more efficient to wait on the I/O only if we really
+  * need to.  That happens here.
    */
- void unmap_underlying_metadata(struct block_device *bdev, sector_t block)
+ void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len)
   {
-       struct buffer_head *old_bh;
+       struct inode *bd_inode = bdev->bd_inode;
+       struct address_space *bd_mapping = bd_inode->i_mapping;
+       struct pagevec pvec;
+       pgoff_t index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
+       pgoff_t end;
+       int i;
+       struct buffer_head *bh;
+       struct buffer_head *head;
   
-       might_sleep();
+       end = (block + len - 1) >> (PAGE_SHIFT - bd_inode->i_blkbits);
+       pagevec_init(&pvec, 0);
+       while (index <= end && pagevec_lookup(&pvec, bd_mapping, index,
+                       min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
+               for (i = 0; i < pagevec_count(&pvec); i++) {
+                       struct page *page = pvec.pages[i];
   
-       old_bh = __find_get_block_slow(bdev, block);
-       if (old_bh) {
-               clear_buffer_dirty(old_bh);
-               wait_on_buffer(old_bh);
-               clear_buffer_req(old_bh);
-               __brelse(old_bh);
+                       index = page->index;
+                       if (index > end)
+                               break;
+                       if (!page_has_buffers(page))
+                               continue;
+                       /*
+                        * We use page lock instead of bd_mapping->private_lock
+                        * to pin buffers here since we can afford to sleep and
+                        * it scales better than a global spinlock lock.
+                        */
+                       lock_page(page);
+                       /* Recheck when the page is locked which pins bhs */
+                       if (!page_has_buffers(page))
+                               goto unlock_page;
+                       head = page_buffers(page);
+                       bh = head;
+                       do {
+                               if (!buffer_mapped(bh))
+                                       goto next;
+                               if (bh->b_blocknr >= block + len)
+                                       break;
+                               clear_buffer_dirty(bh);
+                               wait_on_buffer(bh);
+                               clear_buffer_req(bh);
+ next:
+                               bh = bh->b_this_page;
+                       } while (bh != head);
+ unlock_page:
+                       unlock_page(page);
+               }
+               pagevec_release(&pvec);
+               cond_resched();
+               index++;
         }
   }
- EXPORT_SYMBOL(unmap_underlying_metadata);
+ EXPORT_SYMBOL(clean_bdev_aliases);
   
   /*
    * Size is a power-of-two in the range 512..PAGE_SIZE,
@@@ -1745,8 -1789,7 +1789,7 @@@ int __block_write_full_page(struct inod
                         if (buffer_new(bh)) {
                                 /* blockdev mappings never come here */
                                 clear_buffer_new(bh);
-                               unmap_underlying_metadata(bh->b_bdev,
-                                                       bh->b_blocknr);
+                               clean_bdev_bh_alias(bh);
                         }
                 }
                 bh = bh->b_this_page;
@@@ -1992,8 -2035,7 +2035,7 @@@ int __block_write_begin_int(struct pag
                         }
   
                         if (buffer_new(bh)) {
-                               unmap_underlying_metadata(bh->b_bdev,
-                                                       bh->b_blocknr);
+                               clean_bdev_bh_alias(bh);
                                 if (PageUptodate(page)) {
                                         clear_buffer_new(bh);
                                         set_buffer_uptodate(bh);
@@@ -2633,7 -2675,7 +2675,7 @@@ int nobh_write_begin(struct address_spa
                 if (!buffer_mapped(bh))
                         is_mapped_to_disk = 0;
                 if (buffer_new(bh))
-                       unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
+                       clean_bdev_bh_alias(bh);
                 if (PageUptodate(page)) {
                         set_buffer_uptodate(bh);
                         continue;
@@@ -3403,7 -3445,7 +3445,7 @@@ void free_buffer_head(struct buffer_hea
   }
   EXPORT_SYMBOL(free_buffer_head);
   
- -static void buffer_exit_cpu(int cpu)
+ +static int buffer_exit_cpu_dead(unsigned int cpu)
   {
         int i;
         struct bh_lru *b = &per_cpu(bh_lrus, cpu);
@@@ -3414,7 -3456,14 +3456,7 @@@
         }
         this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr);
         per_cpu(bh_accounting, cpu).nr = 0;
- -}
- -
- -static int buffer_cpu_notify(struct notifier_block *self,
- -                            unsigned long action, void *hcpu)
- -{
- -      if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
- -              buffer_exit_cpu((unsigned long)hcpu);
- -      return NOTIFY_OK;
+ +      return 0;
   }
   
   /**
@@@ -3464,7 -3513,6 +3506,7 @@@ EXPORT_SYMBOL(bh_submit_read)
   void __init buffer_init(void)
   {
         unsigned long nrpages;
+ +      int ret;
   
         bh_cachep = kmem_cache_create("buffer_head",
                         sizeof(struct buffer_head), 0,
@@@ -3477,7 -3525,5 +3519,7 @@@
          */
         nrpages = (nr_free_buffer_pages() * 10) / 100;
         max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
- -      hotcpu_notifier(buffer_cpu_notify, 0);
+ +      ret = cpuhp_setup_state_nocalls(CPUHP_FS_BUFF_DEAD, "fs/buffer:dead",
+ +                                      NULL, buffer_exit_cpu_dead);
+ +      WARN_ON(ret < 0);
   }
diff --combined fs/direct-io.c

index 835e23a4ee4b44b1c2d5a38b76fa50c1cf4b1b83,4ea57edf3e54c62ba33cd5884e8c72c0920d15d5..86aa79859d4d347b59467316b6806389717bd5c8
--- 1/fs/direct-io.c
--- 2/fs/direct-io.c
+++ b/fs/direct-io.c
@@@ -457,7 -457,7 +457,7 @@@ static struct bio *dio_await_one(struc
                 dio->waiter = current;
                 spin_unlock_irqrestore(&dio->bio_lock, flags);
                 if (!(dio->iocb->ki_flags & IOCB_HIPRI) ||
- -                  !blk_poll(bdev_get_queue(dio->bio_bdev), dio->bio_cookie))
+ +                  !blk_mq_poll(bdev_get_queue(dio->bio_bdev), dio->bio_cookie))
                         io_schedule();
                 /* wake up sets us TASK_RUNNING */
                 spin_lock_irqsave(&dio->bio_lock, flags);
@@@ -842,24 -842,6 +842,6 @@@ out
         return ret;
   }
   
- /*
-  * Clean any dirty buffers in the blockdev mapping which alias newly-created
-  * file blocks.  Only called for S_ISREG files - blockdevs do not set
-  * buffer_new
-  */
- static void clean_blockdev_aliases(struct dio *dio, struct buffer_head *map_bh)
- {
-       unsigned i;
-       unsigned nblocks;
- 
-       nblocks = map_bh->b_size >> dio->inode->i_blkbits;
- 
-       for (i = 0; i < nblocks; i++) {
-               unmap_underlying_metadata(map_bh->b_bdev,
-                                         map_bh->b_blocknr + i);
-       }
- }
- 
   /*
    * If we are not writing the entire block and get_block() allocated
    * the block for us, we need to fill-in the unused portion of the
@@@ -960,11 -942,15 +942,15 @@@ static int do_direct_IO(struct dio *dio
                                         goto do_holes;
   
                                 sdio->blocks_available =
-                                               map_bh->b_size >> sdio->blkbits;
+                                               map_bh->b_size >> blkbits;
                                 sdio->next_block_for_io =
                                         map_bh->b_blocknr << sdio->blkfactor;
-                               if (buffer_new(map_bh))
-                                       clean_blockdev_aliases(dio, map_bh);
+                               if (buffer_new(map_bh)) {
+                                       clean_bdev_aliases(
+                                               map_bh->b_bdev,
+                                               map_bh->b_blocknr,
+                                               map_bh->b_size >> blkbits);
+                               }
   
                                 if (!sdio->blkfactor)
                                         goto do_holes;
diff --combined fs/ext2/inode.c

index 046b642f358592b995f831cdf1fcf5cd61ec95db,eb11f7e2b8aaab10b34c09f259f6b58e610f1397..e173afe9266109f4e7b948433d4d422e90bacc72
--- 1/fs/ext2/inode.c
--- 2/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@@ -622,7 -622,7 +622,7 @@@ static int ext2_get_blocks(struct inod
                            u32 *bno, bool *new, bool *boundary,
                            int create)
   {
- -      int err = -EIO;
+ +      int err;
         int offsets[4];
         Indirect chain[4];
         Indirect *partial;
@@@ -639,7 -639,7 +639,7 @@@
         depth = ext2_block_to_path(inode,iblock,offsets,&blocks_to_boundary);
   
         if (depth == 0)
- -              return (err);
+ +              return -EIO;
   
         partial = ext2_get_branch(inode, depth, offsets, chain, &err);
         /* Simplest case - block found, no allocation needed */
@@@ -732,16 -732,13 +732,13 @@@
         }
   
         if (IS_DAX(inode)) {
-               int i;
- 
                 /*
                  * We must unmap blocks before zeroing so that writeback cannot
                  * overwrite zeros with stale data from block device page cache.
                  */
-               for (i = 0; i < count; i++) {
-                       unmap_underlying_metadata(inode->i_sb->s_bdev,
-                                       le32_to_cpu(chain[depth-1].key) + i);
-               }
+               clean_bdev_aliases(inode->i_sb->s_bdev,
+                                  le32_to_cpu(chain[depth-1].key),
+                                  count);
                 /*
                  * block must be initialised before we put it in the tree
                  * so that it's not found by another thread before it's
@@@ -761,6 -758,7 +758,6 @@@
         ext2_splice_branch(inode, iblock, partial, indirect_blks, count);
         mutex_unlock(&ei->truncate_mutex);
   got_it:
- -      *bno = le32_to_cpu(chain[depth-1].key);
         if (count > blocks_to_boundary)
                 *boundary = true;
         err = count;
@@@ -771,8 -769,6 +768,8 @@@ cleanup
                 brelse(partial->bh);
                 partial--;
         }
+ +      if (err > 0)
+ +              *bno = le32_to_cpu(chain[depth-1].key);
         return err;
   }
   
@@@ -850,9 -846,6 +847,9 @@@ struct iomap_ops ext2_iomap_ops = 
         .iomap_begin            = ext2_iomap_begin,
         .iomap_end              = ext2_iomap_end,
   };
+ +#else
+ +/* Define empty ops for !CONFIG_FS_DAX case to avoid ugly ifdefs */
+ +struct iomap_ops ext2_iomap_ops;
   #endif /* CONFIG_FS_DAX */
   
   int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
@@@ -1296,11 -1289,9 +1293,11 @@@ static int ext2_setsize(struct inode *i
   
         inode_dio_wait(inode);
   
- -      if (IS_DAX(inode))
- -              error = dax_truncate_page(inode, newsize, ext2_get_block);
- -      else if (test_opt(inode->i_sb, NOBH))
+ +      if (IS_DAX(inode)) {
+ +              error = iomap_zero_range(inode, newsize,
+ +                                       PAGE_ALIGN(newsize) - newsize, NULL,
+ +                                       &ext2_iomap_ops);
+ +      } else if (test_opt(inode->i_sb, NOBH))
                 error = nobh_truncate_page(inode->i_mapping,
                                 newsize, ext2_get_block);
         else
diff --combined fs/ext4/extents.c

index 3e1014fe835efbd4bfe1e61e6b67f6c982404648,dd5b74dfa01804c3b69cdaf121b4a2e4a61972de..b1f8416923ab9384adc89d36c9d99fa3e21a7285
--- 1/fs/ext4/extents.c
--- 2/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@@ -3777,14 -3777,6 +3777,6 @@@ out
         return err;
   }
   
- static void unmap_underlying_metadata_blocks(struct block_device *bdev,
-                       sector_t block, int count)
- {
-       int i;
-       for (i = 0; i < count; i++)
-                 unmap_underlying_metadata(bdev, block + i);
- }
- 
   /*
    * Handle EOFBLOCKS_FL flag, clearing it if necessary
    */
@@@ -4121,9 -4113,8 +4113,8 @@@ out
          * new.
          */
         if (allocated > map->m_len) {
-               unmap_underlying_metadata_blocks(inode->i_sb->s_bdev,
-                                       newblock + map->m_len,
-                                       allocated - map->m_len);
+               clean_bdev_aliases(inode->i_sb->s_bdev, newblock + map->m_len,
+                                  allocated - map->m_len);
                 allocated = map->m_len;
         }
         map->m_len = allocated;
@@@ -4631,7 -4622,7 +4622,7 @@@ out2
         return err ? err : allocated;
   }
   
- -void ext4_ext_truncate(handle_t *handle, struct inode *inode)
+ +int ext4_ext_truncate(handle_t *handle, struct inode *inode)
   {
         struct super_block *sb = inode->i_sb;
         ext4_lblk_t last_block;
@@@ -4645,9 -4636,7 +4636,9 @@@
   
         /* we have to know where to truncate from in crash case */
         EXT4_I(inode)->i_disksize = inode->i_size;
- -      ext4_mark_inode_dirty(handle, inode);
+ +      err = ext4_mark_inode_dirty(handle, inode);
+ +      if (err)
+ +              return err;
   
         last_block = (inode->i_size + sb->s_blocksize - 1)
                         >> EXT4_BLOCK_SIZE_BITS(sb);
@@@ -4659,9 -4648,12 +4650,9 @@@ retry
                 congestion_wait(BLK_RW_ASYNC, HZ/50);
                 goto retry;
         }
- -      if (err) {
- -              ext4_std_error(inode->i_sb, err);
- -              return;
- -      }
- -      err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
- -      ext4_std_error(inode->i_sb, err);
+ +      if (err)
+ +              return err;
+ +      return ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
   }
   
   static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
@@@ -4700,7 -4692,7 +4691,7 @@@ retry
                 /*
                  * Recalculate credits when extent tree depth changes.
                  */
- -              if (depth >= 0 && depth != ext_depth(inode)) {
+ +              if (depth != ext_depth(inode)) {
                         credits = ext4_chunk_trans_blocks(inode, len);
                         depth = ext_depth(inode);
                 }
@@@ -4724,7 -4716,7 +4715,7 @@@
                 map.m_lblk += ret;
                 map.m_len = len = len - ret;
                 epos = (loff_t)map.m_lblk << inode->i_blkbits;
- -              inode->i_ctime = ext4_current_time(inode);
+ +              inode->i_ctime = current_time(inode);
                 if (new_size) {
                         if (epos > new_size)
                                 epos = new_size;
@@@ -4852,7 -4844,7 +4843,7 @@@ static long ext4_zero_range(struct fil
                 }
                 /* Now release the pages and zero block aligned part of pages */
                 truncate_pagecache_range(inode, start, end - 1);
- -              inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
+ +              inode->i_mtime = inode->i_ctime = current_time(inode);
   
                 ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
                                              flags, mode);
@@@ -4877,7 -4869,7 +4868,7 @@@
                 goto out_dio;
         }
   
- -      inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
+ +      inode->i_mtime = inode->i_ctime = current_time(inode);
         if (new_size) {
                 ext4_update_inode_size(inode, new_size);
         } else {
@@@ -5567,7 -5559,7 +5558,7 @@@ int ext4_collapse_range(struct inode *i
         up_write(&EXT4_I(inode)->i_data_sem);
         if (IS_SYNC(inode))
                 ext4_handle_sync(handle);
- -      inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
+ +      inode->i_mtime = inode->i_ctime = current_time(inode);
         ext4_mark_inode_dirty(handle, inode);
   
   out_stop:
@@@ -5677,7 -5669,7 +5668,7 @@@ int ext4_insert_range(struct inode *ino
         /* Expand file to avoid data loss if there is error while shifting */
         inode->i_size += len;
         EXT4_I(inode)->i_disksize += len;
- -      inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
+ +      inode->i_mtime = inode->i_ctime = current_time(inode);
         ret = ext4_mark_inode_dirty(handle, inode);
         if (ret)
                 goto out_stop;
diff --combined fs/ext4/inode.c

index 72d593fa690d136d5aad0642ed2fac99bb2ce43d,2f8127601befc0f47bace3080ff6ec1975935d43..88d57af1b516c5bbfd7b2f1bc471a9d76382c3bc
--- 1/fs/ext4/inode.c
--- 2/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@@ -37,7 -37,6 +37,7 @@@
   #include <linux/printk.h>
   #include <linux/slab.h>
   #include <linux/bitops.h>
+ +#include <linux/iomap.h>
   
   #include "ext4_jbd2.h"
   #include "xattr.h"
@@@ -72,9 -71,10 +72,9 @@@ static __u32 ext4_inode_csum(struct ino
                         csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum,
                                            csum_size);
                         offset += csum_size;
- -                      csum = ext4_chksum(sbi, csum, (__u8 *)raw + offset,
- -                                         EXT4_INODE_SIZE(inode->i_sb) -
- -                                         offset);
                 }
+ +              csum = ext4_chksum(sbi, csum, (__u8 *)raw + offset,
+ +                                 EXT4_INODE_SIZE(inode->i_sb) - offset);
         }
   
         return csum;
@@@ -261,15 -261,8 +261,15 @@@ void ext4_evict_inode(struct inode *ino
                              "couldn't mark inode dirty (err %d)", err);
                 goto stop_handle;
         }
- -      if (inode->i_blocks)
- -              ext4_truncate(inode);
+ +      if (inode->i_blocks) {
+ +              err = ext4_truncate(inode);
+ +              if (err) {
+ +                      ext4_error(inode->i_sb,
+ +                                 "couldn't truncate inode %lu (err %d)",
+ +                                 inode->i_ino, err);
+ +                      goto stop_handle;
+ +              }
+ +      }
   
         /*
          * ext4_ext_truncate() doesn't reserve any slop when it
@@@ -661,12 -654,8 +661,8 @@@ found
                 if (flags & EXT4_GET_BLOCKS_ZERO &&
                     map->m_flags & EXT4_MAP_MAPPED &&
                     map->m_flags & EXT4_MAP_NEW) {
-                       ext4_lblk_t i;
- 
-                       for (i = 0; i < map->m_len; i++) {
-                               unmap_underlying_metadata(inode->i_sb->s_bdev,
-                                                         map->m_pblk + i);
-                       }
+                       clean_bdev_aliases(inode->i_sb->s_bdev, map->m_pblk,
+                                          map->m_len);
                         ret = ext4_issue_zeroout(inode, map->m_lblk,
                                                  map->m_pblk, map->m_len);
                         if (ret) {
@@@ -774,9 -763,6 +770,9 @@@ static int _ext4_get_block(struct inod
                 ext4_update_bh_state(bh, map.m_flags);
                 bh->b_size = inode->i_sb->s_blocksize * map.m_len;
                 ret = 0;
+ +      } else if (ret == 0) {
+ +              /* hole case, need to fill in bh->b_size */
+ +              bh->b_size = inode->i_sb->s_blocksize * map.m_len;
         }
         return ret;
   }
@@@ -1137,8 -1123,7 +1133,7 @@@ static int ext4_block_write_begin(struc
                         if (err)
                                 break;
                         if (buffer_new(bh)) {
-                               unmap_underlying_metadata(bh->b_bdev,
-                                                         bh->b_blocknr);
+                               clean_bdev_bh_alias(bh);
                                 if (PageUptodate(page)) {
                                         clear_buffer_new(bh);
                                         set_buffer_uptodate(bh);
@@@ -1176,8 -1161,7 +1171,8 @@@
         if (unlikely(err))
                 page_zero_new_buffers(page, from, to);
         else if (decrypt)
- -              err = fscrypt_decrypt_page(page);
+ +              err = fscrypt_decrypt_page(page->mapping->host, page,
+ +                              PAGE_SIZE, 0, page->index);
         return err;
   }
   #endif
@@@ -2371,11 -2355,8 +2366,8 @@@ static int mpage_map_one_extent(handle_
   
         BUG_ON(map->m_len == 0);
         if (map->m_flags & EXT4_MAP_NEW) {
-               struct block_device *bdev = inode->i_sb->s_bdev;
-               int i;
- 
-               for (i = 0; i < map->m_len; i++)
-                       unmap_underlying_metadata(bdev, map->m_pblk + i);
+               clean_bdev_aliases(inode->i_sb->s_bdev, map->m_pblk,
+                                  map->m_len);
         }
         return 0;
   }
@@@ -2902,8 -2883,7 +2894,8 @@@ static int ext4_da_write_begin(struct f
   
         index = pos >> PAGE_SHIFT;
   
- -      if (ext4_nonda_switch(inode->i_sb)) {
+ +      if (ext4_nonda_switch(inode->i_sb) ||
+ +          S_ISLNK(inode->i_mode)) {
                 *fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
                 return ext4_write_begin(file, mapping, pos,
                                         len, flags, pagep, fsdata);
@@@ -3280,159 -3260,53 +3272,159 @@@ static int ext4_releasepage(struct pag
   }
   
   #ifdef CONFIG_FS_DAX
- -/*
- - * Get block function for DAX IO and mmap faults. It takes care of converting
- - * unwritten extents to written ones and initializes new / converted blocks
- - * to zeros.
- - */
- -int ext4_dax_get_block(struct inode *inode, sector_t iblock,
- -                     struct buffer_head *bh_result, int create)
+ +static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
+ +                          unsigned flags, struct iomap *iomap)
   {
+ +      unsigned int blkbits = inode->i_blkbits;
+ +      unsigned long first_block = offset >> blkbits;
+ +      unsigned long last_block = (offset + length - 1) >> blkbits;
+ +      struct ext4_map_blocks map;
         int ret;
   
- -      ext4_debug("inode %lu, create flag %d\n", inode->i_ino, create);
- -      if (!create)
- -              return _ext4_get_block(inode, iblock, bh_result, 0);
+ +      if (WARN_ON_ONCE(ext4_has_inline_data(inode)))
+ +              return -ERANGE;
   
- -      ret = ext4_get_block_trans(inode, iblock, bh_result,
- -                                 EXT4_GET_BLOCKS_PRE_IO |
- -                                 EXT4_GET_BLOCKS_CREATE_ZERO);
- -      if (ret < 0)
- -              return ret;
+ +      map.m_lblk = first_block;
+ +      map.m_len = last_block - first_block + 1;
+ +
+ +      if (!(flags & IOMAP_WRITE)) {
+ +              ret = ext4_map_blocks(NULL, inode, &map, 0);
+ +      } else {
+ +              int dio_credits;
+ +              handle_t *handle;
+ +              int retries = 0;
   
- -      if (buffer_unwritten(bh_result)) {
+ +              /* Trim mapping request to maximum we can map at once for DIO */
+ +              if (map.m_len > DIO_MAX_BLOCKS)
+ +                      map.m_len = DIO_MAX_BLOCKS;
+ +              dio_credits = ext4_chunk_trans_blocks(inode, map.m_len);
+ +retry:
                 /*
- -               * We are protected by i_mmap_sem or i_mutex so we know block
- -               * cannot go away from under us even though we dropped
- -               * i_data_sem. Convert extent to written and write zeros there.
+ +               * Either we allocate blocks and then we don't get unwritten
+ +               * extent so we have reserved enough credits, or the blocks
+ +               * are already allocated and unwritten and in that case
+ +               * extent conversion fits in the credits as well.
                  */
- -              ret = ext4_get_block_trans(inode, iblock, bh_result,
- -                                         EXT4_GET_BLOCKS_CONVERT |
- -                                         EXT4_GET_BLOCKS_CREATE_ZERO);
- -              if (ret < 0)
+ +              handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
+ +                                          dio_credits);
+ +              if (IS_ERR(handle))
+ +                      return PTR_ERR(handle);
+ +
+ +              ret = ext4_map_blocks(handle, inode, &map,
+ +                                    EXT4_GET_BLOCKS_CREATE_ZERO);
+ +              if (ret < 0) {
+ +                      ext4_journal_stop(handle);
+ +                      if (ret == -ENOSPC &&
+ +                          ext4_should_retry_alloc(inode->i_sb, &retries))
+ +                              goto retry;
                         return ret;
+ +              }
+ +
+ +              /*
+ +               * If we added blocks beyond i_size, we need to make sure they
+ +               * will get truncated if we crash before updating i_size in
+ +               * ext4_iomap_end(). For faults we don't need to do that (and
+ +               * even cannot because for orphan list operations inode_lock is
+ +               * required) - if we happen to instantiate block beyond i_size,
+ +               * it is because we race with truncate which has already added
+ +               * the inode to the orphan list.
+ +               */
+ +              if (!(flags & IOMAP_FAULT) && first_block + map.m_len >
+ +                  (i_size_read(inode) + (1 << blkbits) - 1) >> blkbits) {
+ +                      int err;
+ +
+ +                      err = ext4_orphan_add(handle, inode);
+ +                      if (err < 0) {
+ +                              ext4_journal_stop(handle);
+ +                              return err;
+ +                      }
+ +              }
+ +              ext4_journal_stop(handle);
         }
- -      /*
- -       * At least for now we have to clear BH_New so that DAX code
- -       * doesn't attempt to zero blocks again in a racy way.
- -       */
- -      clear_buffer_new(bh_result);
+ +
+ +      iomap->flags = 0;
+ +      iomap->bdev = inode->i_sb->s_bdev;
+ +      iomap->offset = first_block << blkbits;
+ +
+ +      if (ret == 0) {
+ +              iomap->type = IOMAP_HOLE;
+ +              iomap->blkno = IOMAP_NULL_BLOCK;
+ +              iomap->length = (u64)map.m_len << blkbits;
+ +      } else {
+ +              if (map.m_flags & EXT4_MAP_MAPPED) {
+ +                      iomap->type = IOMAP_MAPPED;
+ +              } else if (map.m_flags & EXT4_MAP_UNWRITTEN) {
+ +                      iomap->type = IOMAP_UNWRITTEN;
+ +              } else {
+ +                      WARN_ON_ONCE(1);
+ +                      return -EIO;
+ +              }
+ +              iomap->blkno = (sector_t)map.m_pblk << (blkbits - 9);
+ +              iomap->length = (u64)map.m_len << blkbits;
+ +      }
+ +
+ +      if (map.m_flags & EXT4_MAP_NEW)
+ +              iomap->flags |= IOMAP_F_NEW;
         return 0;
   }
- -#else
- -/* Just define empty function, it will never get called. */
- -int ext4_dax_get_block(struct inode *inode, sector_t iblock,
- -                     struct buffer_head *bh_result, int create)
+ +
+ +static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length,
+ +                        ssize_t written, unsigned flags, struct iomap *iomap)
   {
- -      BUG();
- -      return 0;
+ +      int ret = 0;
+ +      handle_t *handle;
+ +      int blkbits = inode->i_blkbits;
+ +      bool truncate = false;
+ +
+ +      if (!(flags & IOMAP_WRITE) || (flags & IOMAP_FAULT))
+ +              return 0;
+ +
+ +      handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
+ +      if (IS_ERR(handle)) {
+ +              ret = PTR_ERR(handle);
+ +              goto orphan_del;
+ +      }
+ +      if (ext4_update_inode_size(inode, offset + written))
+ +              ext4_mark_inode_dirty(handle, inode);
+ +      /*
+ +       * We may need to truncate allocated but not written blocks beyond EOF.
+ +       */
+ +      if (iomap->offset + iomap->length > 
+ +          ALIGN(inode->i_size, 1 << blkbits)) {
+ +              ext4_lblk_t written_blk, end_blk;
+ +
+ +              written_blk = (offset + written) >> blkbits;
+ +              end_blk = (offset + length) >> blkbits;
+ +              if (written_blk < end_blk && ext4_can_truncate(inode))
+ +                      truncate = true;
+ +      }
+ +      /*
+ +       * Remove inode from orphan list if we were extending a inode and
+ +       * everything went fine.
+ +       */
+ +      if (!truncate && inode->i_nlink &&
+ +          !list_empty(&EXT4_I(inode)->i_orphan))
+ +              ext4_orphan_del(handle, inode);
+ +      ext4_journal_stop(handle);
+ +      if (truncate) {
+ +              ext4_truncate_failed_write(inode);
+ +orphan_del:
+ +              /*
+ +               * If truncate failed early the inode might still be on the
+ +               * orphan list; we need to make sure the inode is removed from
+ +               * the orphan list in that case.
+ +               */
+ +              if (inode->i_nlink)
+ +                      ext4_orphan_del(NULL, inode);
+ +      }
+ +      return ret;
   }
+ +
+ +struct iomap_ops ext4_iomap_ops = {
+ +      .iomap_begin            = ext4_iomap_begin,
+ +      .iomap_end              = ext4_iomap_end,
+ +};
+ +
   #endif
   
   static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
@@@ -3554,7 -3428,19 +3546,7 @@@ static ssize_t ext4_direct_IO_write(str
         iocb->private = NULL;
         if (overwrite)
                 get_block_func = ext4_dio_get_block_overwrite;
- -      else if (IS_DAX(inode)) {
- -              /*
- -               * We can avoid zeroing for aligned DAX writes beyond EOF. Other
- -               * writes need zeroing either because they can race with page
- -               * faults or because they use partial blocks.
- -               */
- -              if (round_down(offset, 1<<inode->i_blkbits) >= inode->i_size &&
- -                  ext4_aligned_io(inode, offset, count))
- -                      get_block_func = ext4_dio_get_block;
- -              else
- -                      get_block_func = ext4_dax_get_block;
- -              dio_flags = DIO_LOCKING;
- -      } else if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) ||
+ +      else if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) ||
                    round_down(offset, 1 << inode->i_blkbits) >= inode->i_size) {
                 get_block_func = ext4_dio_get_block;
                 dio_flags = DIO_LOCKING | DIO_SKIP_HOLES;
@@@ -3568,9 -3454,14 +3560,9 @@@
   #ifdef CONFIG_EXT4_FS_ENCRYPTION
         BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode));
   #endif
- -      if (IS_DAX(inode)) {
- -              ret = dax_do_io(iocb, inode, iter, get_block_func,
- -                              ext4_end_io_dio, dio_flags);
- -      } else
- -              ret = __blockdev_direct_IO(iocb, inode,
- -                                         inode->i_sb->s_bdev, iter,
- -                                         get_block_func,
- -                                         ext4_end_io_dio, NULL, dio_flags);
+ +      ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
+ +                                 get_block_func, ext4_end_io_dio, NULL,
+ +                                 dio_flags);
   
         if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
                                                 EXT4_STATE_DIO_UNWRITTEN)) {
@@@ -3639,7 -3530,6 +3631,7 @@@ static ssize_t ext4_direct_IO_read(stru
   {
         struct address_space *mapping = iocb->ki_filp->f_mapping;
         struct inode *inode = mapping->host;
+ +      size_t count = iov_iter_count(iter);
         ssize_t ret;
   
         /*
@@@ -3648,12 -3538,19 +3640,12 @@@
          * we are protected against page writeback as well.
          */
         inode_lock_shared(inode);
- -      if (IS_DAX(inode)) {
- -              ret = dax_do_io(iocb, inode, iter, ext4_dio_get_block, NULL, 0);
- -      } else {
- -              size_t count = iov_iter_count(iter);
- -
- -              ret = filemap_write_and_wait_range(mapping, iocb->ki_pos,
- -                                                 iocb->ki_pos + count);
- -              if (ret)
- -                      goto out_unlock;
- -              ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
- -                                         iter, ext4_dio_get_block,
- -                                         NULL, NULL, 0);
- -      }
+ +      ret = filemap_write_and_wait_range(mapping, iocb->ki_pos,
+ +                                         iocb->ki_pos + count);
+ +      if (ret)
+ +              goto out_unlock;
+ +      ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
+ +                                 iter, ext4_dio_get_block, NULL, NULL, 0);
   out_unlock:
         inode_unlock_shared(inode);
         return ret;
@@@ -3682,10 -3579,6 +3674,10 @@@ static ssize_t ext4_direct_IO(struct ki
         if (ext4_has_inline_data(inode))
                 return 0;
   
+ +      /* DAX uses iomap path now */
+ +      if (WARN_ON_ONCE(IS_DAX(inode)))
+ +              return 0;
+ +
         trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
         if (iov_iter_rw(iter) == READ)
                 ret = ext4_direct_IO_read(iocb, iter);
@@@ -3714,13 -3607,6 +3706,13 @@@ static int ext4_journalled_set_page_dir
         return __set_page_dirty_nobuffers(page);
   }
   
+ +static int ext4_set_page_dirty(struct page *page)
+ +{
+ +      WARN_ON_ONCE(!PageLocked(page) && !PageDirty(page));
+ +      WARN_ON_ONCE(!page_has_buffers(page));
+ +      return __set_page_dirty_buffers(page);
+ +}
+ +
   static const struct address_space_operations ext4_aops = {
         .readpage               = ext4_readpage,
         .readpages              = ext4_readpages,
@@@ -3728,7 -3614,6 +3720,7 @@@
         .writepages             = ext4_writepages,
         .write_begin            = ext4_write_begin,
         .write_end              = ext4_write_end,
+ +      .set_page_dirty         = ext4_set_page_dirty,
         .bmap                   = ext4_bmap,
         .invalidatepage         = ext4_invalidatepage,
         .releasepage            = ext4_releasepage,
@@@ -3761,7 -3646,6 +3753,7 @@@ static const struct address_space_opera
         .writepages             = ext4_writepages,
         .write_begin            = ext4_da_write_begin,
         .write_end              = ext4_da_write_end,
+ +      .set_page_dirty         = ext4_set_page_dirty,
         .bmap                   = ext4_bmap,
         .invalidatepage         = ext4_da_invalidatepage,
         .releasepage            = ext4_releasepage,
@@@ -3851,8 -3735,7 +3843,8 @@@ static int __ext4_block_zero_page_range
                         /* We expect the key to be set. */
                         BUG_ON(!fscrypt_has_encryption_key(inode));
                         BUG_ON(blocksize != PAGE_SIZE);
- -                      WARN_ON_ONCE(fscrypt_decrypt_page(page));
+ +                      WARN_ON_ONCE(fscrypt_decrypt_page(page->mapping->host,
+ +                                              page, PAGE_SIZE, 0, page->index));
                 }
         }
         if (ext4_should_journal_data(inode)) {
@@@ -3901,10 -3784,8 +3893,10 @@@ static int ext4_block_zero_page_range(h
         if (length > max || length < 0)
                 length = max;
   
- -      if (IS_DAX(inode))
- -              return dax_zero_page_range(inode, from, length, ext4_get_block);
+ +      if (IS_DAX(inode)) {
+ +              return iomap_zero_range(inode, from, length, NULL,
+ +                                      &ext4_iomap_ops);
+ +      }
         return __ext4_block_zero_page_range(handle, mapping, from, length);
   }
   
@@@ -4137,7 -4018,7 +4129,7 @@@ int ext4_punch_hole(struct inode *inode
         if (IS_SYNC(inode))
                 ext4_handle_sync(handle);
   
- -      inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
+ +      inode->i_mtime = inode->i_ctime = current_time(inode);
         ext4_mark_inode_dirty(handle, inode);
   out_stop:
         ext4_journal_stop(handle);
@@@ -4202,11 -4083,10 +4194,11 @@@ int ext4_inode_attach_jinode(struct ino
    * that's fine - as long as they are linked from the inode, the post-crash
    * ext4_truncate() run will find them and release them.
    */
- -void ext4_truncate(struct inode *inode)
+ +int ext4_truncate(struct inode *inode)
   {
         struct ext4_inode_info *ei = EXT4_I(inode);
         unsigned int credits;
+ +      int err = 0;
         handle_t *handle;
         struct address_space *mapping = inode->i_mapping;
   
@@@ -4220,7 -4100,7 +4212,7 @@@
         trace_ext4_truncate_enter(inode);
   
         if (!ext4_can_truncate(inode))
- -              return;
+ +              return 0;
   
         ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
   
@@@ -4232,13 -4112,13 +4224,13 @@@
   
                 ext4_inline_data_truncate(inode, &has_inline);
                 if (has_inline)
- -                      return;
+ +                      return 0;
         }
   
         /* If we zero-out tail of the page, we have to create jinode for jbd2 */
         if (inode->i_size & (inode->i_sb->s_blocksize - 1)) {
                 if (ext4_inode_attach_jinode(inode) < 0)
- -                      return;
+ +                      return 0;
         }
   
         if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
@@@ -4247,8 -4127,10 +4239,8 @@@
                 credits = ext4_blocks_for_truncate(inode);
   
         handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
- -      if (IS_ERR(handle)) {
- -              ext4_std_error(inode->i_sb, PTR_ERR(handle));
- -              return;
- -      }
+ +      if (IS_ERR(handle))
+ +              return PTR_ERR(handle);
   
         if (inode->i_size & (inode->i_sb->s_blocksize - 1))
                 ext4_block_truncate_page(handle, mapping, inode->i_size);
@@@ -4262,8 -4144,7 +4254,8 @@@
          * Implication: the file must always be in a sane, consistent
          * truncatable state while each transaction commits.
          */
- -      if (ext4_orphan_add(handle, inode))
+ +      err = ext4_orphan_add(handle, inode);
+ +      if (err)
                 goto out_stop;
   
         down_write(&EXT4_I(inode)->i_data_sem);
@@@ -4271,13 -4152,11 +4263,13 @@@
         ext4_discard_preallocations(inode);
   
         if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
- -              ext4_ext_truncate(handle, inode);
+ +              err = ext4_ext_truncate(handle, inode);
         else
                 ext4_ind_truncate(handle, inode);
   
         up_write(&ei->i_data_sem);
+ +      if (err)
+ +              goto out_stop;
   
         if (IS_SYNC(inode))
                 ext4_handle_sync(handle);
@@@ -4293,12 -4172,11 +4285,12 @@@ out_stop
         if (inode->i_nlink)
                 ext4_orphan_del(handle, inode);
   
- -      inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
+ +      inode->i_mtime = inode->i_ctime = current_time(inode);
         ext4_mark_inode_dirty(handle, inode);
         ext4_journal_stop(handle);
   
         trace_ext4_truncate_exit(inode);
+ +      return err;
   }
   
   /*
@@@ -4466,9 -4344,7 +4458,9 @@@ void ext4_set_inode_flags(struct inode 
                 new_fl |= S_NOATIME;
         if (flags & EXT4_DIRSYNC_FL)
                 new_fl |= S_DIRSYNC;
- -      if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode))
+ +      if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode) &&
+ +          !ext4_should_journal_data(inode) && !ext4_has_inline_data(inode) &&
+ +          !ext4_encrypted_inode(inode))
                 new_fl |= S_DAX;
         inode_set_flags(inode, new_fl,
                         S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX);
@@@ -4527,9 -4403,7 +4519,9 @@@ static inline void ext4_iget_extra_inod
   {
         __le32 *magic = (void *)raw_inode +
                         EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize;
- -      if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) {
+ +      if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize + sizeof(__le32) <=
+ +          EXT4_INODE_SIZE(inode->i_sb) &&
+ +          *magic == cpu_to_le32(EXT4_XATTR_MAGIC)) {
                 ext4_set_inode_state(inode, EXT4_STATE_XATTR);
                 ext4_find_inline_data_nolock(inode);
         } else
@@@ -4552,7 -4426,6 +4544,7 @@@ struct inode *ext4_iget(struct super_bl
         struct inode *inode;
         journal_t *journal = EXT4_SB(sb)->s_journal;
         long ret;
+ +      loff_t size;
         int block;
         uid_t i_uid;
         gid_t i_gid;
@@@ -4575,12 -4448,10 +4567,12 @@@
         if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
                 ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
                 if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
- -                  EXT4_INODE_SIZE(inode->i_sb)) {
- -                      EXT4_ERROR_INODE(inode, "bad extra_isize (%u != %u)",
- -                              EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize,
- -                              EXT4_INODE_SIZE(inode->i_sb));
+ +                      EXT4_INODE_SIZE(inode->i_sb) ||
+ +                  (ei->i_extra_isize & 3)) {
+ +                      EXT4_ERROR_INODE(inode,
+ +                                       "bad extra_isize %u (inode size %u)",
+ +                                       ei->i_extra_isize,
+ +                                       EXT4_INODE_SIZE(inode->i_sb));
                         ret = -EFSCORRUPTED;
                         goto bad_inode;
                 }
@@@ -4655,11 -4526,6 +4647,11 @@@
                 ei->i_file_acl |=
                         ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
         inode->i_size = ext4_isize(raw_inode);
+ +      if ((size = i_size_read(inode)) < 0) {
+ +              EXT4_ERROR_INODE(inode, "bad i_size value: %lld", size);
+ +              ret = -EFSCORRUPTED;
+ +              goto bad_inode;
+ +      }
         ei->i_disksize = inode->i_size;
   #ifdef CONFIG_QUOTA
         ei->i_reserved_quota = 0;
@@@ -4703,7 -4569,6 +4695,7 @@@
         if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
                 if (ei->i_extra_isize == 0) {
                         /* The extra space is currently unused. Use it. */
+ +                      BUILD_BUG_ON(sizeof(struct ext4_inode) & 3);
                         ei->i_extra_isize = sizeof(struct ext4_inode) -
                                             EXT4_GOOD_OLD_INODE_SIZE;
                 } else {
@@@ -5281,7 -5146,7 +5273,7 @@@ int ext4_setattr(struct dentry *dentry
                          * update c/mtime in shrink case below
                          */
                         if (!shrink) {
- -                              inode->i_mtime = ext4_current_time(inode);
+ +                              inode->i_mtime = current_time(inode);
                                 inode->i_ctime = inode->i_mtime;
                         }
                         down_write(&EXT4_I(inode)->i_data_sem);
@@@ -5326,15 -5191,12 +5318,15 @@@
                  * in data=journal mode to make pages freeable.
                  */
                 truncate_pagecache(inode, inode->i_size);
- -              if (shrink)
- -                      ext4_truncate(inode);
+ +              if (shrink) {
+ +                      rc = ext4_truncate(inode);
+ +                      if (rc)
+ +                              error = rc;
+ +              }
                 up_write(&EXT4_I(inode)->i_mmap_sem);
         }
   
- -      if (!rc) {
+ +      if (!error) {
                 setattr_copy(inode, attr);
                 mark_inode_dirty(inode);
         }
@@@ -5346,7 -5208,7 +5338,7 @@@
         if (orphan && inode->i_nlink)
                 ext4_orphan_del(NULL, inode);
   
- -      if (!rc && (ia_valid & ATTR_MODE))
+ +      if (!error && (ia_valid & ATTR_MODE))
                 rc = posix_acl_chmod(inode, inode->i_mode);
   
   err_out:
@@@ -5585,20 -5447,18 +5577,20 @@@ int ext4_mark_inode_dirty(handle_t *han
         err = ext4_reserve_inode_write(handle, inode, &iloc);
         if (err)
                 return err;
- -      if (ext4_handle_valid(handle) &&
- -          EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
+ +      if (EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
             !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) {
                 /*
- -               * We need extra buffer credits since we may write into EA block
+ +               * In nojournal mode, we can immediately attempt to expand
+ +               * the inode.  When journaled, we first need to obtain extra
+ +               * buffer credits since we may write into the EA block
                  * with this same handle. If journal_extend fails, then it will
                  * only result in a minor loss of functionality for that inode.
                  * If this is felt to be critical, then e2fsck should be run to
                  * force a large enough s_min_extra_isize.
                  */
- -              if ((jbd2_journal_extend(handle,
- -                           EXT4_DATA_TRANS_BLOCKS(inode->i_sb))) == 0) {
+ +              if (!ext4_handle_valid(handle) ||
+ +                  jbd2_journal_extend(handle,
+ +                           EXT4_DATA_TRANS_BLOCKS(inode->i_sb)) == 0) {
                         ret = ext4_expand_extra_isize(inode,
                                                       sbi->s_want_extra_isize,
                                                       iloc, handle);
@@@ -5752,11 -5612,6 +5744,11 @@@ int ext4_change_inode_journal_flag(stru
                 ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
         }
         ext4_set_aops(inode);
+ +      /*
+ +       * Update inode->i_flags after EXT4_INODE_JOURNAL_DATA was updated.
+ +       * E.g. S_DAX may get cleared / set.
+ +       */
+ +      ext4_set_inode_flags(inode);
   
         jbd2_journal_unlock_updates(journal);
         percpu_up_write(&sbi->s_journal_flag_rwsem);
diff --combined fs/ext4/page-io.c

index e2332a65e8fbb0d12ef754f8f70c5ee453013525,f28fd6483e0453c8f0bff5ea352d54344b8e0669..d83b0f3c5fe9eac1390b71bd2c3d43b13a087f51
--- 1/fs/ext4/page-io.c
--- 2/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@@ -457,7 -457,7 +457,7 @@@ int ext4_bio_write_page(struct ext4_io_
                 }
                 if (buffer_new(bh)) {
                         clear_buffer_new(bh);
-                       unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
+                       clean_bdev_bh_alias(bh);
                 }
                 set_buffer_async_write(bh);
                 nr_to_submit++;
@@@ -470,8 -470,7 +470,8 @@@
                 gfp_t gfp_flags = GFP_NOFS;
   
         retry_encrypt:
- -              data_page = fscrypt_encrypt_page(inode, page, gfp_flags);
+ +              data_page = fscrypt_encrypt_page(inode, page, PAGE_SIZE, 0,
+ +                                              page->index, gfp_flags);
                 if (IS_ERR(data_page)) {
                         ret = PTR_ERR(data_page);
                         if (ret == -ENOMEM && wbc->sync_mode == WB_SYNC_ALL) {
diff --combined fs/ocfs2/aops.c

index 9a88984f9f6fa1803a6ecdbc8e8ae6b9efa6b2b9,e8f65eefffca81a9dfa2163cd544698107fdaca1..4d9c6f5ec28a62efbda693918562df97a8bfa20f
--- 1/fs/ocfs2/aops.c
--- 2/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@@ -630,7 -630,7 +630,7 @@@ int ocfs2_map_page_blocks(struct page *
   
                 if (!buffer_mapped(bh)) {
                         map_bh(bh, inode->i_sb, *p_blkno);
-                       unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
+                       clean_bdev_bh_alias(bh);
                 }
   
                 if (PageUptodate(page)) {
@@@ -1950,7 -1950,8 +1950,7 @@@ static void ocfs2_write_end_inline(stru
   }
   
   int ocfs2_write_end_nolock(struct address_space *mapping,
- -                         loff_t pos, unsigned len, unsigned copied,
- -                         struct page *page, void *fsdata)
+ +                         loff_t pos, unsigned len, unsigned copied, void *fsdata)
   {
         int i, ret;
         unsigned from, to, start = pos & (PAGE_SIZE - 1);
@@@ -2063,7 -2064,7 +2063,7 @@@ static int ocfs2_write_end(struct file 
         int ret;
         struct inode *inode = mapping->host;
   
- -      ret = ocfs2_write_end_nolock(mapping, pos, len, copied, page, fsdata);
+ +      ret = ocfs2_write_end_nolock(mapping, pos, len, copied, fsdata);
   
         up_write(&OCFS2_I(inode)->ip_alloc_sem);
         ocfs2_inode_unlock(inode, 1);
@@@ -2240,7 -2241,7 +2240,7 @@@ static int ocfs2_dio_get_block(struct i
                 dwc->dw_zero_count++;
         }
   
- -      ret = ocfs2_write_end_nolock(inode->i_mapping, pos, len, len, NULL, wc);
+ +      ret = ocfs2_write_end_nolock(inode->i_mapping, pos, len, len, wc);
         BUG_ON(ret != len);
         ret = 0;
   unlock:
author	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 15 Dec 2016 01:09:00 +0000 (17:09 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 15 Dec 2016 01:09:00 +0000 (17:09 -0800)
		1	2
fs/buffer.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/direct-io.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext2/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/extents.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/page-io.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ocfs2/aops.c	patch \|	diff1 \|	diff2 \|	blob \| history