]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/commitdiff
Merge branch 'for-4.10/fs-unmap' of git://git.kernel.dk/linux-block
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 15 Dec 2016 01:09:00 +0000 (17:09 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 15 Dec 2016 01:09:00 +0000 (17:09 -0800)
Pull fs meta data unmap optimization from Jens Axboe:
 "A series from Jan Kara, providing a more efficient way for unmapping
  meta data from in the buffer cache than doing it block-by-block.

  Provide a general helper that existing callers can use"

* 'for-4.10/fs-unmap' of git://git.kernel.dk/linux-block:
  fs: Remove unmap_underlying_metadata
  fs: Add helper to clean bdev aliases under a bh and use it
  ext2: Use clean_bdev_aliases() instead of iteration
  ext4: Use clean_bdev_aliases() instead of iteration
  direct-io: Use clean_bdev_aliases() instead of handmade iteration
  fs: Provide function to unmap metadata for a range of blocks

1  2 
fs/buffer.c
fs/direct-io.c
fs/ext2/inode.c
fs/ext4/extents.c
fs/ext4/inode.c
fs/ext4/page-io.c
fs/ocfs2/aops.c

diff --combined fs/buffer.c
index a3bfd57c2697e5093d4563623fc9bdee9e8900a1,1104ce8b45369b9dbf43fb1d7fa9e46be9da50c6..d21771fcf7d345ab4299cb7fa25881ffcc61ef52
@@@ -43,6 -43,7 +43,7 @@@
  #include <linux/bitops.h>
  #include <linux/mpage.h>
  #include <linux/bit_spinlock.h>
+ #include <linux/pagevec.h>
  #include <trace/events/block.h>
  
  static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
@@@ -1604,37 -1605,80 +1605,80 @@@ void create_empty_buffers(struct page *
  }
  EXPORT_SYMBOL(create_empty_buffers);
  
- /*
-  * We are taking a block for data and we don't want any output from any
-  * buffer-cache aliases starting from return from that function and
-  * until the moment when something will explicitly mark the buffer
-  * dirty (hopefully that will not happen until we will free that block ;-)
-  * We don't even need to mark it not-uptodate - nobody can expect
-  * anything from a newly allocated buffer anyway. We used to used
-  * unmap_buffer() for such invalidation, but that was wrong. We definitely
-  * don't want to mark the alias unmapped, for example - it would confuse
-  * anyone who might pick it with bread() afterwards...
+ /**
+  * clean_bdev_aliases: clean a range of buffers in block device
+  * @bdev: Block device to clean buffers in
+  * @block: Start of a range of blocks to clean
+  * @len: Number of blocks to clean
   *
-  * Also..  Note that bforget() doesn't lock the buffer.  So there can
-  * be writeout I/O going on against recently-freed buffers.  We don't
-  * wait on that I/O in bforget() - it's more efficient to wait on the I/O
-  * only if we really need to.  That happens here.
+  * We are taking a range of blocks for data and we don't want writeback of any
+  * buffer-cache aliases starting from return from this function and until the
+  * moment when something will explicitly mark the buffer dirty (hopefully that
+  * will not happen until we will free that block ;-) We don't even need to mark
+  * it not-uptodate - nobody can expect anything from a newly allocated buffer
+  * anyway. We used to use unmap_buffer() for such invalidation, but that was
+  * wrong. We definitely don't want to mark the alias unmapped, for example - it
+  * would confuse anyone who might pick it with bread() afterwards...
+  *
+  * Also..  Note that bforget() doesn't lock the buffer.  So there can be
+  * writeout I/O going on against recently-freed buffers.  We don't wait on that
+  * I/O in bforget() - it's more efficient to wait on the I/O only if we really
+  * need to.  That happens here.
   */
- void unmap_underlying_metadata(struct block_device *bdev, sector_t block)
+ void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len)
  {
-       struct buffer_head *old_bh;
+       struct inode *bd_inode = bdev->bd_inode;
+       struct address_space *bd_mapping = bd_inode->i_mapping;
+       struct pagevec pvec;
+       pgoff_t index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
+       pgoff_t end;
+       int i;
+       struct buffer_head *bh;
+       struct buffer_head *head;
  
-       might_sleep();
+       end = (block + len - 1) >> (PAGE_SHIFT - bd_inode->i_blkbits);
+       pagevec_init(&pvec, 0);
+       while (index <= end && pagevec_lookup(&pvec, bd_mapping, index,
+                       min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
+               for (i = 0; i < pagevec_count(&pvec); i++) {
+                       struct page *page = pvec.pages[i];
  
-       old_bh = __find_get_block_slow(bdev, block);
-       if (old_bh) {
-               clear_buffer_dirty(old_bh);
-               wait_on_buffer(old_bh);
-               clear_buffer_req(old_bh);
-               __brelse(old_bh);
+                       index = page->index;
+                       if (index > end)
+                               break;
+                       if (!page_has_buffers(page))
+                               continue;
+                       /*
+                        * We use page lock instead of bd_mapping->private_lock
+                        * to pin buffers here since we can afford to sleep and
+                        * it scales better than a global spinlock lock.
+                        */
+                       lock_page(page);
+                       /* Recheck when the page is locked which pins bhs */
+                       if (!page_has_buffers(page))
+                               goto unlock_page;
+                       head = page_buffers(page);
+                       bh = head;
+                       do {
+                               if (!buffer_mapped(bh))
+                                       goto next;
+                               if (bh->b_blocknr >= block + len)
+                                       break;
+                               clear_buffer_dirty(bh);
+                               wait_on_buffer(bh);
+                               clear_buffer_req(bh);
+ next:
+                               bh = bh->b_this_page;
+                       } while (bh != head);
+ unlock_page:
+                       unlock_page(page);
+               }
+               pagevec_release(&pvec);
+               cond_resched();
+               index++;
        }
  }
- EXPORT_SYMBOL(unmap_underlying_metadata);
+ EXPORT_SYMBOL(clean_bdev_aliases);
  
  /*
   * Size is a power-of-two in the range 512..PAGE_SIZE,
@@@ -1745,8 -1789,7 +1789,7 @@@ int __block_write_full_page(struct inod
                        if (buffer_new(bh)) {
                                /* blockdev mappings never come here */
                                clear_buffer_new(bh);
-                               unmap_underlying_metadata(bh->b_bdev,
-                                                       bh->b_blocknr);
+                               clean_bdev_bh_alias(bh);
                        }
                }
                bh = bh->b_this_page;
@@@ -1992,8 -2035,7 +2035,7 @@@ int __block_write_begin_int(struct pag
                        }
  
                        if (buffer_new(bh)) {
-                               unmap_underlying_metadata(bh->b_bdev,
-                                                       bh->b_blocknr);
+                               clean_bdev_bh_alias(bh);
                                if (PageUptodate(page)) {
                                        clear_buffer_new(bh);
                                        set_buffer_uptodate(bh);
@@@ -2633,7 -2675,7 +2675,7 @@@ int nobh_write_begin(struct address_spa
                if (!buffer_mapped(bh))
                        is_mapped_to_disk = 0;
                if (buffer_new(bh))
-                       unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
+                       clean_bdev_bh_alias(bh);
                if (PageUptodate(page)) {
                        set_buffer_uptodate(bh);
                        continue;
@@@ -3403,7 -3445,7 +3445,7 @@@ void free_buffer_head(struct buffer_hea
  }
  EXPORT_SYMBOL(free_buffer_head);
  
 -static void buffer_exit_cpu(int cpu)
 +static int buffer_exit_cpu_dead(unsigned int cpu)
  {
        int i;
        struct bh_lru *b = &per_cpu(bh_lrus, cpu);
        }
        this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr);
        per_cpu(bh_accounting, cpu).nr = 0;
 -}
 -
 -static int buffer_cpu_notify(struct notifier_block *self,
 -                            unsigned long action, void *hcpu)
 -{
 -      if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
 -              buffer_exit_cpu((unsigned long)hcpu);
 -      return NOTIFY_OK;
 +      return 0;
  }
  
  /**
@@@ -3464,7 -3513,6 +3506,7 @@@ EXPORT_SYMBOL(bh_submit_read)
  void __init buffer_init(void)
  {
        unsigned long nrpages;
 +      int ret;
  
        bh_cachep = kmem_cache_create("buffer_head",
                        sizeof(struct buffer_head), 0,
         */
        nrpages = (nr_free_buffer_pages() * 10) / 100;
        max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
 -      hotcpu_notifier(buffer_cpu_notify, 0);
 +      ret = cpuhp_setup_state_nocalls(CPUHP_FS_BUFF_DEAD, "fs/buffer:dead",
 +                                      NULL, buffer_exit_cpu_dead);
 +      WARN_ON(ret < 0);
  }
diff --combined fs/direct-io.c
index 835e23a4ee4b44b1c2d5a38b76fa50c1cf4b1b83,4ea57edf3e54c62ba33cd5884e8c72c0920d15d5..86aa79859d4d347b59467316b6806389717bd5c8
@@@ -457,7 -457,7 +457,7 @@@ static struct bio *dio_await_one(struc
                dio->waiter = current;
                spin_unlock_irqrestore(&dio->bio_lock, flags);
                if (!(dio->iocb->ki_flags & IOCB_HIPRI) ||
 -                  !blk_poll(bdev_get_queue(dio->bio_bdev), dio->bio_cookie))
 +                  !blk_mq_poll(bdev_get_queue(dio->bio_bdev), dio->bio_cookie))
                        io_schedule();
                /* wake up sets us TASK_RUNNING */
                spin_lock_irqsave(&dio->bio_lock, flags);
@@@ -842,24 -842,6 +842,6 @@@ out
        return ret;
  }
  
- /*
-  * Clean any dirty buffers in the blockdev mapping which alias newly-created
-  * file blocks.  Only called for S_ISREG files - blockdevs do not set
-  * buffer_new
-  */
- static void clean_blockdev_aliases(struct dio *dio, struct buffer_head *map_bh)
- {
-       unsigned i;
-       unsigned nblocks;
-       nblocks = map_bh->b_size >> dio->inode->i_blkbits;
-       for (i = 0; i < nblocks; i++) {
-               unmap_underlying_metadata(map_bh->b_bdev,
-                                         map_bh->b_blocknr + i);
-       }
- }
  /*
   * If we are not writing the entire block and get_block() allocated
   * the block for us, we need to fill-in the unused portion of the
@@@ -960,11 -942,15 +942,15 @@@ static int do_direct_IO(struct dio *dio
                                        goto do_holes;
  
                                sdio->blocks_available =
-                                               map_bh->b_size >> sdio->blkbits;
+                                               map_bh->b_size >> blkbits;
                                sdio->next_block_for_io =
                                        map_bh->b_blocknr << sdio->blkfactor;
-                               if (buffer_new(map_bh))
-                                       clean_blockdev_aliases(dio, map_bh);
+                               if (buffer_new(map_bh)) {
+                                       clean_bdev_aliases(
+                                               map_bh->b_bdev,
+                                               map_bh->b_blocknr,
+                                               map_bh->b_size >> blkbits);
+                               }
  
                                if (!sdio->blkfactor)
                                        goto do_holes;
diff --combined fs/ext2/inode.c
index 046b642f358592b995f831cdf1fcf5cd61ec95db,eb11f7e2b8aaab10b34c09f259f6b58e610f1397..e173afe9266109f4e7b948433d4d422e90bacc72
@@@ -622,7 -622,7 +622,7 @@@ static int ext2_get_blocks(struct inod
                           u32 *bno, bool *new, bool *boundary,
                           int create)
  {
 -      int err = -EIO;
 +      int err;
        int offsets[4];
        Indirect chain[4];
        Indirect *partial;
        depth = ext2_block_to_path(inode,iblock,offsets,&blocks_to_boundary);
  
        if (depth == 0)
 -              return (err);
 +              return -EIO;
  
        partial = ext2_get_branch(inode, depth, offsets, chain, &err);
        /* Simplest case - block found, no allocation needed */
        }
  
        if (IS_DAX(inode)) {
-               int i;
                /*
                 * We must unmap blocks before zeroing so that writeback cannot
                 * overwrite zeros with stale data from block device page cache.
                 */
-               for (i = 0; i < count; i++) {
-                       unmap_underlying_metadata(inode->i_sb->s_bdev,
-                                       le32_to_cpu(chain[depth-1].key) + i);
-               }
+               clean_bdev_aliases(inode->i_sb->s_bdev,
+                                  le32_to_cpu(chain[depth-1].key),
+                                  count);
                /*
                 * block must be initialised before we put it in the tree
                 * so that it's not found by another thread before it's
        ext2_splice_branch(inode, iblock, partial, indirect_blks, count);
        mutex_unlock(&ei->truncate_mutex);
  got_it:
 -      *bno = le32_to_cpu(chain[depth-1].key);
        if (count > blocks_to_boundary)
                *boundary = true;
        err = count;
@@@ -771,8 -769,6 +768,8 @@@ cleanup
                brelse(partial->bh);
                partial--;
        }
 +      if (err > 0)
 +              *bno = le32_to_cpu(chain[depth-1].key);
        return err;
  }
  
@@@ -850,9 -846,6 +847,9 @@@ struct iomap_ops ext2_iomap_ops = 
        .iomap_begin            = ext2_iomap_begin,
        .iomap_end              = ext2_iomap_end,
  };
 +#else
 +/* Define empty ops for !CONFIG_FS_DAX case to avoid ugly ifdefs */
 +struct iomap_ops ext2_iomap_ops;
  #endif /* CONFIG_FS_DAX */
  
  int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
@@@ -1296,11 -1289,9 +1293,11 @@@ static int ext2_setsize(struct inode *i
  
        inode_dio_wait(inode);
  
 -      if (IS_DAX(inode))
 -              error = dax_truncate_page(inode, newsize, ext2_get_block);
 -      else if (test_opt(inode->i_sb, NOBH))
 +      if (IS_DAX(inode)) {
 +              error = iomap_zero_range(inode, newsize,
 +                                       PAGE_ALIGN(newsize) - newsize, NULL,
 +                                       &ext2_iomap_ops);
 +      } else if (test_opt(inode->i_sb, NOBH))
                error = nobh_truncate_page(inode->i_mapping,
                                newsize, ext2_get_block);
        else
diff --combined fs/ext4/extents.c
index 3e1014fe835efbd4bfe1e61e6b67f6c982404648,dd5b74dfa01804c3b69cdaf121b4a2e4a61972de..b1f8416923ab9384adc89d36c9d99fa3e21a7285
@@@ -3777,14 -3777,6 +3777,6 @@@ out
        return err;
  }
  
- static void unmap_underlying_metadata_blocks(struct block_device *bdev,
-                       sector_t block, int count)
- {
-       int i;
-       for (i = 0; i < count; i++)
-                 unmap_underlying_metadata(bdev, block + i);
- }
  /*
   * Handle EOFBLOCKS_FL flag, clearing it if necessary
   */
@@@ -4121,9 -4113,8 +4113,8 @@@ out
         * new.
         */
        if (allocated > map->m_len) {
-               unmap_underlying_metadata_blocks(inode->i_sb->s_bdev,
-                                       newblock + map->m_len,
-                                       allocated - map->m_len);
+               clean_bdev_aliases(inode->i_sb->s_bdev, newblock + map->m_len,
+                                  allocated - map->m_len);
                allocated = map->m_len;
        }
        map->m_len = allocated;
@@@ -4631,7 -4622,7 +4622,7 @@@ out2
        return err ? err : allocated;
  }
  
 -void ext4_ext_truncate(handle_t *handle, struct inode *inode)
 +int ext4_ext_truncate(handle_t *handle, struct inode *inode)
  {
        struct super_block *sb = inode->i_sb;
        ext4_lblk_t last_block;
  
        /* we have to know where to truncate from in crash case */
        EXT4_I(inode)->i_disksize = inode->i_size;
 -      ext4_mark_inode_dirty(handle, inode);
 +      err = ext4_mark_inode_dirty(handle, inode);
 +      if (err)
 +              return err;
  
        last_block = (inode->i_size + sb->s_blocksize - 1)
                        >> EXT4_BLOCK_SIZE_BITS(sb);
@@@ -4659,9 -4648,12 +4650,9 @@@ retry
                congestion_wait(BLK_RW_ASYNC, HZ/50);
                goto retry;
        }
 -      if (err) {
 -              ext4_std_error(inode->i_sb, err);
 -              return;
 -      }
 -      err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
 -      ext4_std_error(inode->i_sb, err);
 +      if (err)
 +              return err;
 +      return ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
  }
  
  static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
@@@ -4700,7 -4692,7 +4691,7 @@@ retry
                /*
                 * Recalculate credits when extent tree depth changes.
                 */
 -              if (depth >= 0 && depth != ext_depth(inode)) {
 +              if (depth != ext_depth(inode)) {
                        credits = ext4_chunk_trans_blocks(inode, len);
                        depth = ext_depth(inode);
                }
                map.m_lblk += ret;
                map.m_len = len = len - ret;
                epos = (loff_t)map.m_lblk << inode->i_blkbits;
 -              inode->i_ctime = ext4_current_time(inode);
 +              inode->i_ctime = current_time(inode);
                if (new_size) {
                        if (epos > new_size)
                                epos = new_size;
@@@ -4852,7 -4844,7 +4843,7 @@@ static long ext4_zero_range(struct fil
                }
                /* Now release the pages and zero block aligned part of pages */
                truncate_pagecache_range(inode, start, end - 1);
 -              inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
 +              inode->i_mtime = inode->i_ctime = current_time(inode);
  
                ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
                                             flags, mode);
                goto out_dio;
        }
  
 -      inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
 +      inode->i_mtime = inode->i_ctime = current_time(inode);
        if (new_size) {
                ext4_update_inode_size(inode, new_size);
        } else {
@@@ -5567,7 -5559,7 +5558,7 @@@ int ext4_collapse_range(struct inode *i
        up_write(&EXT4_I(inode)->i_data_sem);
        if (IS_SYNC(inode))
                ext4_handle_sync(handle);
 -      inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
 +      inode->i_mtime = inode->i_ctime = current_time(inode);
        ext4_mark_inode_dirty(handle, inode);
  
  out_stop:
@@@ -5677,7 -5669,7 +5668,7 @@@ int ext4_insert_range(struct inode *ino
        /* Expand file to avoid data loss if there is error while shifting */
        inode->i_size += len;
        EXT4_I(inode)->i_disksize += len;
 -      inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
 +      inode->i_mtime = inode->i_ctime = current_time(inode);
        ret = ext4_mark_inode_dirty(handle, inode);
        if (ret)
                goto out_stop;
diff --combined fs/ext4/inode.c
index 72d593fa690d136d5aad0642ed2fac99bb2ce43d,2f8127601befc0f47bace3080ff6ec1975935d43..88d57af1b516c5bbfd7b2f1bc471a9d76382c3bc
@@@ -37,7 -37,6 +37,7 @@@
  #include <linux/printk.h>
  #include <linux/slab.h>
  #include <linux/bitops.h>
 +#include <linux/iomap.h>
  
  #include "ext4_jbd2.h"
  #include "xattr.h"
@@@ -72,9 -71,10 +72,9 @@@ static __u32 ext4_inode_csum(struct ino
                        csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum,
                                           csum_size);
                        offset += csum_size;
 -                      csum = ext4_chksum(sbi, csum, (__u8 *)raw + offset,
 -                                         EXT4_INODE_SIZE(inode->i_sb) -
 -                                         offset);
                }
 +              csum = ext4_chksum(sbi, csum, (__u8 *)raw + offset,
 +                                 EXT4_INODE_SIZE(inode->i_sb) - offset);
        }
  
        return csum;
@@@ -261,15 -261,8 +261,15 @@@ void ext4_evict_inode(struct inode *ino
                             "couldn't mark inode dirty (err %d)", err);
                goto stop_handle;
        }
 -      if (inode->i_blocks)
 -              ext4_truncate(inode);
 +      if (inode->i_blocks) {
 +              err = ext4_truncate(inode);
 +              if (err) {
 +                      ext4_error(inode->i_sb,
 +                                 "couldn't truncate inode %lu (err %d)",
 +                                 inode->i_ino, err);
 +                      goto stop_handle;
 +              }
 +      }
  
        /*
         * ext4_ext_truncate() doesn't reserve any slop when it
@@@ -661,12 -654,8 +661,8 @@@ found
                if (flags & EXT4_GET_BLOCKS_ZERO &&
                    map->m_flags & EXT4_MAP_MAPPED &&
                    map->m_flags & EXT4_MAP_NEW) {
-                       ext4_lblk_t i;
-                       for (i = 0; i < map->m_len; i++) {
-                               unmap_underlying_metadata(inode->i_sb->s_bdev,
-                                                         map->m_pblk + i);
-                       }
+                       clean_bdev_aliases(inode->i_sb->s_bdev, map->m_pblk,
+                                          map->m_len);
                        ret = ext4_issue_zeroout(inode, map->m_lblk,
                                                 map->m_pblk, map->m_len);
                        if (ret) {
@@@ -774,9 -763,6 +770,9 @@@ static int _ext4_get_block(struct inod
                ext4_update_bh_state(bh, map.m_flags);
                bh->b_size = inode->i_sb->s_blocksize * map.m_len;
                ret = 0;
 +      } else if (ret == 0) {
 +              /* hole case, need to fill in bh->b_size */
 +              bh->b_size = inode->i_sb->s_blocksize * map.m_len;
        }
        return ret;
  }
@@@ -1137,8 -1123,7 +1133,7 @@@ static int ext4_block_write_begin(struc
                        if (err)
                                break;
                        if (buffer_new(bh)) {
-                               unmap_underlying_metadata(bh->b_bdev,
-                                                         bh->b_blocknr);
+                               clean_bdev_bh_alias(bh);
                                if (PageUptodate(page)) {
                                        clear_buffer_new(bh);
                                        set_buffer_uptodate(bh);
        if (unlikely(err))
                page_zero_new_buffers(page, from, to);
        else if (decrypt)
 -              err = fscrypt_decrypt_page(page);
 +              err = fscrypt_decrypt_page(page->mapping->host, page,
 +                              PAGE_SIZE, 0, page->index);
        return err;
  }
  #endif
@@@ -2371,11 -2355,8 +2366,8 @@@ static int mpage_map_one_extent(handle_
  
        BUG_ON(map->m_len == 0);
        if (map->m_flags & EXT4_MAP_NEW) {
-               struct block_device *bdev = inode->i_sb->s_bdev;
-               int i;
-               for (i = 0; i < map->m_len; i++)
-                       unmap_underlying_metadata(bdev, map->m_pblk + i);
+               clean_bdev_aliases(inode->i_sb->s_bdev, map->m_pblk,
+                                  map->m_len);
        }
        return 0;
  }
@@@ -2902,8 -2883,7 +2894,8 @@@ static int ext4_da_write_begin(struct f
  
        index = pos >> PAGE_SHIFT;
  
 -      if (ext4_nonda_switch(inode->i_sb)) {
 +      if (ext4_nonda_switch(inode->i_sb) ||
 +          S_ISLNK(inode->i_mode)) {
                *fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
                return ext4_write_begin(file, mapping, pos,
                                        len, flags, pagep, fsdata);
@@@ -3280,159 -3260,53 +3272,159 @@@ static int ext4_releasepage(struct pag
  }
  
  #ifdef CONFIG_FS_DAX
 -/*
 - * Get block function for DAX IO and mmap faults. It takes care of converting
 - * unwritten extents to written ones and initializes new / converted blocks
 - * to zeros.
 - */
 -int ext4_dax_get_block(struct inode *inode, sector_t iblock,
 -                     struct buffer_head *bh_result, int create)
 +static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
 +                          unsigned flags, struct iomap *iomap)
  {
 +      unsigned int blkbits = inode->i_blkbits;
 +      unsigned long first_block = offset >> blkbits;
 +      unsigned long last_block = (offset + length - 1) >> blkbits;
 +      struct ext4_map_blocks map;
        int ret;
  
 -      ext4_debug("inode %lu, create flag %d\n", inode->i_ino, create);
 -      if (!create)
 -              return _ext4_get_block(inode, iblock, bh_result, 0);
 +      if (WARN_ON_ONCE(ext4_has_inline_data(inode)))
 +              return -ERANGE;
  
 -      ret = ext4_get_block_trans(inode, iblock, bh_result,
 -                                 EXT4_GET_BLOCKS_PRE_IO |
 -                                 EXT4_GET_BLOCKS_CREATE_ZERO);
 -      if (ret < 0)
 -              return ret;
 +      map.m_lblk = first_block;
 +      map.m_len = last_block - first_block + 1;
 +
 +      if (!(flags & IOMAP_WRITE)) {
 +              ret = ext4_map_blocks(NULL, inode, &map, 0);
 +      } else {
 +              int dio_credits;
 +              handle_t *handle;
 +              int retries = 0;
  
 -      if (buffer_unwritten(bh_result)) {
 +              /* Trim mapping request to maximum we can map at once for DIO */
 +              if (map.m_len > DIO_MAX_BLOCKS)
 +                      map.m_len = DIO_MAX_BLOCKS;
 +              dio_credits = ext4_chunk_trans_blocks(inode, map.m_len);
 +retry:
                /*
 -               * We are protected by i_mmap_sem or i_mutex so we know block
 -               * cannot go away from under us even though we dropped
 -               * i_data_sem. Convert extent to written and write zeros there.
 +               * Either we allocate blocks and then we don't get unwritten
 +               * extent so we have reserved enough credits, or the blocks
 +               * are already allocated and unwritten and in that case
 +               * extent conversion fits in the credits as well.
                 */
 -              ret = ext4_get_block_trans(inode, iblock, bh_result,
 -                                         EXT4_GET_BLOCKS_CONVERT |
 -                                         EXT4_GET_BLOCKS_CREATE_ZERO);
 -              if (ret < 0)
 +              handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
 +                                          dio_credits);
 +              if (IS_ERR(handle))
 +                      return PTR_ERR(handle);
 +
 +              ret = ext4_map_blocks(handle, inode, &map,
 +                                    EXT4_GET_BLOCKS_CREATE_ZERO);
 +              if (ret < 0) {
 +                      ext4_journal_stop(handle);
 +                      if (ret == -ENOSPC &&
 +                          ext4_should_retry_alloc(inode->i_sb, &retries))
 +                              goto retry;
                        return ret;
 +              }
 +
 +              /*
 +               * If we added blocks beyond i_size, we need to make sure they
 +               * will get truncated if we crash before updating i_size in
 +               * ext4_iomap_end(). For faults we don't need to do that (and
 +               * even cannot because for orphan list operations inode_lock is
 +               * required) - if we happen to instantiate block beyond i_size,
 +               * it is because we race with truncate which has already added
 +               * the inode to the orphan list.
 +               */
 +              if (!(flags & IOMAP_FAULT) && first_block + map.m_len >
 +                  (i_size_read(inode) + (1 << blkbits) - 1) >> blkbits) {
 +                      int err;
 +
 +                      err = ext4_orphan_add(handle, inode);
 +                      if (err < 0) {
 +                              ext4_journal_stop(handle);
 +                              return err;
 +                      }
 +              }
 +              ext4_journal_stop(handle);
        }
 -      /*
 -       * At least for now we have to clear BH_New so that DAX code
 -       * doesn't attempt to zero blocks again in a racy way.
 -       */
 -      clear_buffer_new(bh_result);
 +
 +      iomap->flags = 0;
 +      iomap->bdev = inode->i_sb->s_bdev;
 +      iomap->offset = first_block << blkbits;
 +
 +      if (ret == 0) {
 +              iomap->type = IOMAP_HOLE;
 +              iomap->blkno = IOMAP_NULL_BLOCK;
 +              iomap->length = (u64)map.m_len << blkbits;
 +      } else {
 +              if (map.m_flags & EXT4_MAP_MAPPED) {
 +                      iomap->type = IOMAP_MAPPED;
 +              } else if (map.m_flags & EXT4_MAP_UNWRITTEN) {
 +                      iomap->type = IOMAP_UNWRITTEN;
 +              } else {
 +                      WARN_ON_ONCE(1);
 +                      return -EIO;
 +              }
 +              iomap->blkno = (sector_t)map.m_pblk << (blkbits - 9);
 +              iomap->length = (u64)map.m_len << blkbits;
 +      }
 +
 +      if (map.m_flags & EXT4_MAP_NEW)
 +              iomap->flags |= IOMAP_F_NEW;
        return 0;
  }
 -#else
 -/* Just define empty function, it will never get called. */
 -int ext4_dax_get_block(struct inode *inode, sector_t iblock,
 -                     struct buffer_head *bh_result, int create)
 +
 +static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length,
 +                        ssize_t written, unsigned flags, struct iomap *iomap)
  {
 -      BUG();
 -      return 0;
 +      int ret = 0;
 +      handle_t *handle;
 +      int blkbits = inode->i_blkbits;
 +      bool truncate = false;
 +
 +      if (!(flags & IOMAP_WRITE) || (flags & IOMAP_FAULT))
 +              return 0;
 +
 +      handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
 +      if (IS_ERR(handle)) {
 +              ret = PTR_ERR(handle);
 +              goto orphan_del;
 +      }
 +      if (ext4_update_inode_size(inode, offset + written))
 +              ext4_mark_inode_dirty(handle, inode);
 +      /*
 +       * We may need to truncate allocated but not written blocks beyond EOF.
 +       */
 +      if (iomap->offset + iomap->length > 
 +          ALIGN(inode->i_size, 1 << blkbits)) {
 +              ext4_lblk_t written_blk, end_blk;
 +
 +              written_blk = (offset + written) >> blkbits;
 +              end_blk = (offset + length) >> blkbits;
 +              if (written_blk < end_blk && ext4_can_truncate(inode))
 +                      truncate = true;
 +      }
 +      /*
 +       * Remove inode from orphan list if we were extending a inode and
 +       * everything went fine.
 +       */
 +      if (!truncate && inode->i_nlink &&
 +          !list_empty(&EXT4_I(inode)->i_orphan))
 +              ext4_orphan_del(handle, inode);
 +      ext4_journal_stop(handle);
 +      if (truncate) {
 +              ext4_truncate_failed_write(inode);
 +orphan_del:
 +              /*
 +               * If truncate failed early the inode might still be on the
 +               * orphan list; we need to make sure the inode is removed from
 +               * the orphan list in that case.
 +               */
 +              if (inode->i_nlink)
 +                      ext4_orphan_del(NULL, inode);
 +      }
 +      return ret;
  }
 +
 +struct iomap_ops ext4_iomap_ops = {
 +      .iomap_begin            = ext4_iomap_begin,
 +      .iomap_end              = ext4_iomap_end,
 +};
 +
  #endif
  
  static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
@@@ -3554,7 -3428,19 +3546,7 @@@ static ssize_t ext4_direct_IO_write(str
        iocb->private = NULL;
        if (overwrite)
                get_block_func = ext4_dio_get_block_overwrite;
 -      else if (IS_DAX(inode)) {
 -              /*
 -               * We can avoid zeroing for aligned DAX writes beyond EOF. Other
 -               * writes need zeroing either because they can race with page
 -               * faults or because they use partial blocks.
 -               */
 -              if (round_down(offset, 1<<inode->i_blkbits) >= inode->i_size &&
 -                  ext4_aligned_io(inode, offset, count))
 -                      get_block_func = ext4_dio_get_block;
 -              else
 -                      get_block_func = ext4_dax_get_block;
 -              dio_flags = DIO_LOCKING;
 -      } else if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) ||
 +      else if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) ||
                   round_down(offset, 1 << inode->i_blkbits) >= inode->i_size) {
                get_block_func = ext4_dio_get_block;
                dio_flags = DIO_LOCKING | DIO_SKIP_HOLES;
  #ifdef CONFIG_EXT4_FS_ENCRYPTION
        BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode));
  #endif
 -      if (IS_DAX(inode)) {
 -              ret = dax_do_io(iocb, inode, iter, get_block_func,
 -                              ext4_end_io_dio, dio_flags);
 -      } else
 -              ret = __blockdev_direct_IO(iocb, inode,
 -                                         inode->i_sb->s_bdev, iter,
 -                                         get_block_func,
 -                                         ext4_end_io_dio, NULL, dio_flags);
 +      ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
 +                                 get_block_func, ext4_end_io_dio, NULL,
 +                                 dio_flags);
  
        if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
                                                EXT4_STATE_DIO_UNWRITTEN)) {
@@@ -3639,7 -3530,6 +3631,7 @@@ static ssize_t ext4_direct_IO_read(stru
  {
        struct address_space *mapping = iocb->ki_filp->f_mapping;
        struct inode *inode = mapping->host;
 +      size_t count = iov_iter_count(iter);
        ssize_t ret;
  
        /*
         * we are protected against page writeback as well.
         */
        inode_lock_shared(inode);
 -      if (IS_DAX(inode)) {
 -              ret = dax_do_io(iocb, inode, iter, ext4_dio_get_block, NULL, 0);
 -      } else {
 -              size_t count = iov_iter_count(iter);
 -
 -              ret = filemap_write_and_wait_range(mapping, iocb->ki_pos,
 -                                                 iocb->ki_pos + count);
 -              if (ret)
 -                      goto out_unlock;
 -              ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
 -                                         iter, ext4_dio_get_block,
 -                                         NULL, NULL, 0);
 -      }
 +      ret = filemap_write_and_wait_range(mapping, iocb->ki_pos,
 +                                         iocb->ki_pos + count);
 +      if (ret)
 +              goto out_unlock;
 +      ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
 +                                 iter, ext4_dio_get_block, NULL, NULL, 0);
  out_unlock:
        inode_unlock_shared(inode);
        return ret;
@@@ -3682,10 -3579,6 +3674,10 @@@ static ssize_t ext4_direct_IO(struct ki
        if (ext4_has_inline_data(inode))
                return 0;
  
 +      /* DAX uses iomap path now */
 +      if (WARN_ON_ONCE(IS_DAX(inode)))
 +              return 0;
 +
        trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
        if (iov_iter_rw(iter) == READ)
                ret = ext4_direct_IO_read(iocb, iter);
@@@ -3714,13 -3607,6 +3706,13 @@@ static int ext4_journalled_set_page_dir
        return __set_page_dirty_nobuffers(page);
  }
  
 +static int ext4_set_page_dirty(struct page *page)
 +{
 +      WARN_ON_ONCE(!PageLocked(page) && !PageDirty(page));
 +      WARN_ON_ONCE(!page_has_buffers(page));
 +      return __set_page_dirty_buffers(page);
 +}
 +
  static const struct address_space_operations ext4_aops = {
        .readpage               = ext4_readpage,
        .readpages              = ext4_readpages,
        .writepages             = ext4_writepages,
        .write_begin            = ext4_write_begin,
        .write_end              = ext4_write_end,
 +      .set_page_dirty         = ext4_set_page_dirty,
        .bmap                   = ext4_bmap,
        .invalidatepage         = ext4_invalidatepage,
        .releasepage            = ext4_releasepage,
@@@ -3761,7 -3646,6 +3753,7 @@@ static const struct address_space_opera
        .writepages             = ext4_writepages,
        .write_begin            = ext4_da_write_begin,
        .write_end              = ext4_da_write_end,
 +      .set_page_dirty         = ext4_set_page_dirty,
        .bmap                   = ext4_bmap,
        .invalidatepage         = ext4_da_invalidatepage,
        .releasepage            = ext4_releasepage,
@@@ -3851,8 -3735,7 +3843,8 @@@ static int __ext4_block_zero_page_range
                        /* We expect the key to be set. */
                        BUG_ON(!fscrypt_has_encryption_key(inode));
                        BUG_ON(blocksize != PAGE_SIZE);
 -                      WARN_ON_ONCE(fscrypt_decrypt_page(page));
 +                      WARN_ON_ONCE(fscrypt_decrypt_page(page->mapping->host,
 +                                              page, PAGE_SIZE, 0, page->index));
                }
        }
        if (ext4_should_journal_data(inode)) {
@@@ -3901,10 -3784,8 +3893,10 @@@ static int ext4_block_zero_page_range(h
        if (length > max || length < 0)
                length = max;
  
 -      if (IS_DAX(inode))
 -              return dax_zero_page_range(inode, from, length, ext4_get_block);
 +      if (IS_DAX(inode)) {
 +              return iomap_zero_range(inode, from, length, NULL,
 +                                      &ext4_iomap_ops);
 +      }
        return __ext4_block_zero_page_range(handle, mapping, from, length);
  }
  
@@@ -4137,7 -4018,7 +4129,7 @@@ int ext4_punch_hole(struct inode *inode
        if (IS_SYNC(inode))
                ext4_handle_sync(handle);
  
 -      inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
 +      inode->i_mtime = inode->i_ctime = current_time(inode);
        ext4_mark_inode_dirty(handle, inode);
  out_stop:
        ext4_journal_stop(handle);
@@@ -4202,11 -4083,10 +4194,11 @@@ int ext4_inode_attach_jinode(struct ino
   * that's fine - as long as they are linked from the inode, the post-crash
   * ext4_truncate() run will find them and release them.
   */
 -void ext4_truncate(struct inode *inode)
 +int ext4_truncate(struct inode *inode)
  {
        struct ext4_inode_info *ei = EXT4_I(inode);
        unsigned int credits;
 +      int err = 0;
        handle_t *handle;
        struct address_space *mapping = inode->i_mapping;
  
        trace_ext4_truncate_enter(inode);
  
        if (!ext4_can_truncate(inode))
 -              return;
 +              return 0;
  
        ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
  
  
                ext4_inline_data_truncate(inode, &has_inline);
                if (has_inline)
 -                      return;
 +                      return 0;
        }
  
        /* If we zero-out tail of the page, we have to create jinode for jbd2 */
        if (inode->i_size & (inode->i_sb->s_blocksize - 1)) {
                if (ext4_inode_attach_jinode(inode) < 0)
 -                      return;
 +                      return 0;
        }
  
        if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
                credits = ext4_blocks_for_truncate(inode);
  
        handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
 -      if (IS_ERR(handle)) {
 -              ext4_std_error(inode->i_sb, PTR_ERR(handle));
 -              return;
 -      }
 +      if (IS_ERR(handle))
 +              return PTR_ERR(handle);
  
        if (inode->i_size & (inode->i_sb->s_blocksize - 1))
                ext4_block_truncate_page(handle, mapping, inode->i_size);
         * Implication: the file must always be in a sane, consistent
         * truncatable state while each transaction commits.
         */
 -      if (ext4_orphan_add(handle, inode))
 +      err = ext4_orphan_add(handle, inode);
 +      if (err)
                goto out_stop;
  
        down_write(&EXT4_I(inode)->i_data_sem);
        ext4_discard_preallocations(inode);
  
        if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
 -              ext4_ext_truncate(handle, inode);
 +              err = ext4_ext_truncate(handle, inode);
        else
                ext4_ind_truncate(handle, inode);
  
        up_write(&ei->i_data_sem);
 +      if (err)
 +              goto out_stop;
  
        if (IS_SYNC(inode))
                ext4_handle_sync(handle);
@@@ -4293,12 -4172,11 +4285,12 @@@ out_stop
        if (inode->i_nlink)
                ext4_orphan_del(handle, inode);
  
 -      inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
 +      inode->i_mtime = inode->i_ctime = current_time(inode);
        ext4_mark_inode_dirty(handle, inode);
        ext4_journal_stop(handle);
  
        trace_ext4_truncate_exit(inode);
 +      return err;
  }
  
  /*
@@@ -4466,9 -4344,7 +4458,9 @@@ void ext4_set_inode_flags(struct inode 
                new_fl |= S_NOATIME;
        if (flags & EXT4_DIRSYNC_FL)
                new_fl |= S_DIRSYNC;
 -      if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode))
 +      if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode) &&
 +          !ext4_should_journal_data(inode) && !ext4_has_inline_data(inode) &&
 +          !ext4_encrypted_inode(inode))
                new_fl |= S_DAX;
        inode_set_flags(inode, new_fl,
                        S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX);
@@@ -4527,9 -4403,7 +4519,9 @@@ static inline void ext4_iget_extra_inod
  {
        __le32 *magic = (void *)raw_inode +
                        EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize;
 -      if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) {
 +      if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize + sizeof(__le32) <=
 +          EXT4_INODE_SIZE(inode->i_sb) &&
 +          *magic == cpu_to_le32(EXT4_XATTR_MAGIC)) {
                ext4_set_inode_state(inode, EXT4_STATE_XATTR);
                ext4_find_inline_data_nolock(inode);
        } else
@@@ -4552,7 -4426,6 +4544,7 @@@ struct inode *ext4_iget(struct super_bl
        struct inode *inode;
        journal_t *journal = EXT4_SB(sb)->s_journal;
        long ret;
 +      loff_t size;
        int block;
        uid_t i_uid;
        gid_t i_gid;
        if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
                ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
                if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
 -                  EXT4_INODE_SIZE(inode->i_sb)) {
 -                      EXT4_ERROR_INODE(inode, "bad extra_isize (%u != %u)",
 -                              EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize,
 -                              EXT4_INODE_SIZE(inode->i_sb));
 +                      EXT4_INODE_SIZE(inode->i_sb) ||
 +                  (ei->i_extra_isize & 3)) {
 +                      EXT4_ERROR_INODE(inode,
 +                                       "bad extra_isize %u (inode size %u)",
 +                                       ei->i_extra_isize,
 +                                       EXT4_INODE_SIZE(inode->i_sb));
                        ret = -EFSCORRUPTED;
                        goto bad_inode;
                }
                ei->i_file_acl |=
                        ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
        inode->i_size = ext4_isize(raw_inode);
 +      if ((size = i_size_read(inode)) < 0) {
 +              EXT4_ERROR_INODE(inode, "bad i_size value: %lld", size);
 +              ret = -EFSCORRUPTED;
 +              goto bad_inode;
 +      }
        ei->i_disksize = inode->i_size;
  #ifdef CONFIG_QUOTA
        ei->i_reserved_quota = 0;
        if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
                if (ei->i_extra_isize == 0) {
                        /* The extra space is currently unused. Use it. */
 +                      BUILD_BUG_ON(sizeof(struct ext4_inode) & 3);
                        ei->i_extra_isize = sizeof(struct ext4_inode) -
                                            EXT4_GOOD_OLD_INODE_SIZE;
                } else {
@@@ -5281,7 -5146,7 +5273,7 @@@ int ext4_setattr(struct dentry *dentry
                         * update c/mtime in shrink case below
                         */
                        if (!shrink) {
 -                              inode->i_mtime = ext4_current_time(inode);
 +                              inode->i_mtime = current_time(inode);
                                inode->i_ctime = inode->i_mtime;
                        }
                        down_write(&EXT4_I(inode)->i_data_sem);
                 * in data=journal mode to make pages freeable.
                 */
                truncate_pagecache(inode, inode->i_size);
 -              if (shrink)
 -                      ext4_truncate(inode);
 +              if (shrink) {
 +                      rc = ext4_truncate(inode);
 +                      if (rc)
 +                              error = rc;
 +              }
                up_write(&EXT4_I(inode)->i_mmap_sem);
        }
  
 -      if (!rc) {
 +      if (!error) {
                setattr_copy(inode, attr);
                mark_inode_dirty(inode);
        }
        if (orphan && inode->i_nlink)
                ext4_orphan_del(NULL, inode);
  
 -      if (!rc && (ia_valid & ATTR_MODE))
 +      if (!error && (ia_valid & ATTR_MODE))
                rc = posix_acl_chmod(inode, inode->i_mode);
  
  err_out:
@@@ -5585,20 -5447,18 +5577,20 @@@ int ext4_mark_inode_dirty(handle_t *han
        err = ext4_reserve_inode_write(handle, inode, &iloc);
        if (err)
                return err;
 -      if (ext4_handle_valid(handle) &&
 -          EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
 +      if (EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
            !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) {
                /*
 -               * We need extra buffer credits since we may write into EA block
 +               * In nojournal mode, we can immediately attempt to expand
 +               * the inode.  When journaled, we first need to obtain extra
 +               * buffer credits since we may write into the EA block
                 * with this same handle. If journal_extend fails, then it will
                 * only result in a minor loss of functionality for that inode.
                 * If this is felt to be critical, then e2fsck should be run to
                 * force a large enough s_min_extra_isize.
                 */
 -              if ((jbd2_journal_extend(handle,
 -                           EXT4_DATA_TRANS_BLOCKS(inode->i_sb))) == 0) {
 +              if (!ext4_handle_valid(handle) ||
 +                  jbd2_journal_extend(handle,
 +                           EXT4_DATA_TRANS_BLOCKS(inode->i_sb)) == 0) {
                        ret = ext4_expand_extra_isize(inode,
                                                      sbi->s_want_extra_isize,
                                                      iloc, handle);
@@@ -5752,11 -5612,6 +5744,11 @@@ int ext4_change_inode_journal_flag(stru
                ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
        }
        ext4_set_aops(inode);
 +      /*
 +       * Update inode->i_flags after EXT4_INODE_JOURNAL_DATA was updated.
 +       * E.g. S_DAX may get cleared / set.
 +       */
 +      ext4_set_inode_flags(inode);
  
        jbd2_journal_unlock_updates(journal);
        percpu_up_write(&sbi->s_journal_flag_rwsem);
diff --combined fs/ext4/page-io.c
index e2332a65e8fbb0d12ef754f8f70c5ee453013525,f28fd6483e0453c8f0bff5ea352d54344b8e0669..d83b0f3c5fe9eac1390b71bd2c3d43b13a087f51
@@@ -457,7 -457,7 +457,7 @@@ int ext4_bio_write_page(struct ext4_io_
                }
                if (buffer_new(bh)) {
                        clear_buffer_new(bh);
-                       unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
+                       clean_bdev_bh_alias(bh);
                }
                set_buffer_async_write(bh);
                nr_to_submit++;
                gfp_t gfp_flags = GFP_NOFS;
  
        retry_encrypt:
 -              data_page = fscrypt_encrypt_page(inode, page, gfp_flags);
 +              data_page = fscrypt_encrypt_page(inode, page, PAGE_SIZE, 0,
 +                                              page->index, gfp_flags);
                if (IS_ERR(data_page)) {
                        ret = PTR_ERR(data_page);
                        if (ret == -ENOMEM && wbc->sync_mode == WB_SYNC_ALL) {
diff --combined fs/ocfs2/aops.c
index 9a88984f9f6fa1803a6ecdbc8e8ae6b9efa6b2b9,e8f65eefffca81a9dfa2163cd544698107fdaca1..4d9c6f5ec28a62efbda693918562df97a8bfa20f
@@@ -630,7 -630,7 +630,7 @@@ int ocfs2_map_page_blocks(struct page *
  
                if (!buffer_mapped(bh)) {
                        map_bh(bh, inode->i_sb, *p_blkno);
-                       unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
+                       clean_bdev_bh_alias(bh);
                }
  
                if (PageUptodate(page)) {
@@@ -1950,7 -1950,8 +1950,7 @@@ static void ocfs2_write_end_inline(stru
  }
  
  int ocfs2_write_end_nolock(struct address_space *mapping,
 -                         loff_t pos, unsigned len, unsigned copied,
 -                         struct page *page, void *fsdata)
 +                         loff_t pos, unsigned len, unsigned copied, void *fsdata)
  {
        int i, ret;
        unsigned from, to, start = pos & (PAGE_SIZE - 1);
@@@ -2063,7 -2064,7 +2063,7 @@@ static int ocfs2_write_end(struct file 
        int ret;
        struct inode *inode = mapping->host;
  
 -      ret = ocfs2_write_end_nolock(mapping, pos, len, copied, page, fsdata);
 +      ret = ocfs2_write_end_nolock(mapping, pos, len, copied, fsdata);
  
        up_write(&OCFS2_I(inode)->ip_alloc_sem);
        ocfs2_inode_unlock(inode, 1);
@@@ -2240,7 -2241,7 +2240,7 @@@ static int ocfs2_dio_get_block(struct i
                dwc->dw_zero_count++;
        }
  
 -      ret = ocfs2_write_end_nolock(inode->i_mapping, pos, len, len, NULL, wc);
 +      ret = ocfs2_write_end_nolock(inode->i_mapping, pos, len, len, wc);
        BUG_ON(ret != len);
        ret = 0;
  unlock: