Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso...

[mirror_ubuntu-zesty-kernel.git] / fs / ext4 / inode.c
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c

index 848f436df29f6ffec6e8487549764d98c63d5b6b..feaa82fe629d067e0900744fcbb50da2768b0183 100644 (file)
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -18,7 +18,6 @@
   *  Assorted race fixes, rewrite of ext4_get_block() by Al Viro, 2000
   */
  
-#include <linux/module.h>
  #include <linux/fs.h>
  #include <linux/time.h>
  #include <linux/jbd2.h>
@@ -72,6 +71,9 @@ static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode);
  static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
  static int __ext4_journalled_writepage(struct page *page, unsigned int len);
  static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh);
+static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
+               struct inode *inode, struct page *page, loff_t from,
+               loff_t length, int flags);
  
  /*
   * Test whether an inode is a fast symlink.
@@ -1339,8 +1341,11 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
                                         clear_buffer_unwritten(bh);
                                 }
  
-                               /* skip page if block allocation undone */
-                               if (buffer_delay(bh) || buffer_unwritten(bh))
+                               /*
+                                * skip page if block allocation undone and
+                                * block is dirty
+                                */
+                               if (ext4_bh_delay_or_unwritten(NULL, bh))
                                         skip_page = 1;
                                 bh = bh->b_this_page;
                                 block_start += bh->b_size;
@@ -1878,7 +1883,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
   * a[0] = 'a';
   * truncate(f, 4096);
   * we have in the page first buffer_head mapped via page_mkwrite call back
- * but other bufer_heads would be unmapped but dirty(dirty done via the
+ * but other buffer_heads would be unmapped but dirty (dirty done via the
   * do_wp_page). So writepage should write the first block. If we modify
   * the mmap area beyond 1024 we will again get a page_fault and the
   * page_mkwrite callback will do the block allocation and mark the
@@ -2387,7 +2392,6 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
         pgoff_t index;
         struct inode *inode = mapping->host;
         handle_t *handle;
-       loff_t page_len;
  
         index = pos >> PAGE_CACHE_SHIFT;
  
@@ -2434,13 +2438,6 @@ retry:
                  */
                 if (pos + len > inode->i_size)
                         ext4_truncate_failed_write(inode);
-       } else {
-               page_len = pos & (PAGE_CACHE_SIZE - 1);
-               if (page_len > 0) {
-                       ret = ext4_discard_partial_page_buffers_no_lock(handle,
-                               inode, page, pos - page_len, page_len,
-                               EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED);
-               }
         }
  
         if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@@ -2483,7 +2480,6 @@ static int ext4_da_write_end(struct file *file,
         loff_t new_i_size;
         unsigned long start, end;
         int write_mode = (int)(unsigned long)fsdata;
-       loff_t page_len;
  
         if (write_mode == FALL_BACK_TO_NONDELALLOC) {
                 if (ext4_should_order_data(inode)) {
@@ -2508,7 +2504,7 @@ static int ext4_da_write_end(struct file *file,
          */
  
         new_i_size = pos + copied;
-       if (new_i_size > EXT4_I(inode)->i_disksize) {
+       if (copied && new_i_size > EXT4_I(inode)->i_disksize) {
                 if (ext4_da_should_update_i_disksize(page, end)) {
                         down_write(&EXT4_I(inode)->i_data_sem);
                         if (new_i_size > EXT4_I(inode)->i_disksize) {
@@ -2532,16 +2528,6 @@ static int ext4_da_write_end(struct file *file,
         }
         ret2 = generic_write_end(file, mapping, pos, len, copied,
                                                         page, fsdata);
-
-       page_len = PAGE_CACHE_SIZE -
-                       ((pos + copied - 1) & (PAGE_CACHE_SIZE - 1));
-
-       if (page_len > 0) {
-               ret = ext4_discard_partial_page_buffers_no_lock(handle,
-                       inode, page, pos + copied - 1, page_len,
-                       EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED);
-       }
-
         copied = ret2;
         if (ret2 < 0)
                 ret = ret2;
@@ -2776,15 +2762,16 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
         if (!io_end || !size)
                 goto out;
  
-       ext_debug("ext4_end_io_dio(): io_end 0x%p"
+       ext_debug("ext4_end_io_dio(): io_end 0x%p "
                   "for inode %lu, iocb 0x%p, offset %llu, size %llu\n",
                   iocb->private, io_end->inode->i_ino, iocb, offset,
                   size);
  
+       iocb->private = NULL;
+
         /* if not aio dio with unwritten extents, just free io and return */
         if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
                 ext4_free_io_end(io_end);
-               iocb->private = NULL;
  out:
                 if (is_async)
                         aio_complete(iocb, ret, 0);
@@ -2807,7 +2794,6 @@ out:
         spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
  
         /* queue the work to convert unwritten extents to written */
-       iocb->private = NULL;
         queue_work(wq, &io_end->work);
  
         /* XXX: probably should move into the real I/O completion handler */
@@ -3177,7 +3163,7 @@ int ext4_discard_partial_page_buffers(handle_t *handle,
   *
   * Returns zero on sucess or negative on failure.
   */
-int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
+static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
                 struct inode *inode, struct page *page, loff_t from,
                 loff_t length, int flags)
  {
@@ -3203,26 +3189,8 @@ int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
  
         iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
  
-       if (!page_has_buffers(page)) {
-               /*
-                * If the range to be discarded covers a partial block
-                * we need to get the page buffers.  This is because
-                * partial blocks cannot be released and the page needs
-                * to be updated with the contents of the block before
-                * we write the zeros on top of it.
-                */
-               if ((from & (blocksize - 1)) ||
-                   ((from + length) & (blocksize - 1))) {
-                       create_empty_buffers(page, blocksize, 0);
-               } else {
-                       /*
-                        * If there are no partial blocks,
-                        * there is nothing to update,
-                        * so we can return now
-                        */
-                       return 0;
-               }
-       }
+       if (!page_has_buffers(page))
+               create_empty_buffers(page, blocksize, 0);
  
         /* Find the buffer that contains "offset" */
         bh = page_buffers(page);
@@ -3335,126 +3303,6 @@ next:
         return err;
  }
  
-/*
- * ext4_block_truncate_page() zeroes out a mapping from file offset `from'
- * up to the end of the block which corresponds to `from'.
- * This required during truncate. We need to physically zero the tail end
- * of that block so it doesn't yield old data if the file is later grown.
- */
-int ext4_block_truncate_page(handle_t *handle,
-               struct address_space *mapping, loff_t from)
-{
-       unsigned offset = from & (PAGE_CACHE_SIZE-1);
-       unsigned length;
-       unsigned blocksize;
-       struct inode *inode = mapping->host;
-
-       blocksize = inode->i_sb->s_blocksize;
-       length = blocksize - (offset & (blocksize - 1));
-
-       return ext4_block_zero_page_range(handle, mapping, from, length);
-}
-
-/*
- * ext4_block_zero_page_range() zeros out a mapping of length 'length'
- * starting from file offset 'from'.  The range to be zero'd must
- * be contained with in one block.  If the specified range exceeds
- * the end of the block it will be shortened to end of the block
- * that cooresponds to 'from'
- */
-int ext4_block_zero_page_range(handle_t *handle,
-               struct address_space *mapping, loff_t from, loff_t length)
-{
-       ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
-       unsigned offset = from & (PAGE_CACHE_SIZE-1);
-       unsigned blocksize, max, pos;
-       ext4_lblk_t iblock;
-       struct inode *inode = mapping->host;
-       struct buffer_head *bh;
-       struct page *page;
-       int err = 0;
-
-       page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT,
-                                  mapping_gfp_mask(mapping) & ~__GFP_FS);
-       if (!page)
-               return -ENOMEM;
-
-       blocksize = inode->i_sb->s_blocksize;
-       max = blocksize - (offset & (blocksize - 1));
-
-       /*
-        * correct length if it does not fall between
-        * 'from' and the end of the block
-        */
-       if (length > max || length < 0)
-               length = max;
-
-       iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
-
-       if (!page_has_buffers(page))
-               create_empty_buffers(page, blocksize, 0);
-
-       /* Find the buffer that contains "offset" */
-       bh = page_buffers(page);
-       pos = blocksize;
-       while (offset >= pos) {
-               bh = bh->b_this_page;
-               iblock++;
-               pos += blocksize;
-       }
-
-       err = 0;
-       if (buffer_freed(bh)) {
-               BUFFER_TRACE(bh, "freed: skip");
-               goto unlock;
-       }
-
-       if (!buffer_mapped(bh)) {
-               BUFFER_TRACE(bh, "unmapped");
-               ext4_get_block(inode, iblock, bh, 0);
-               /* unmapped? It's a hole - nothing to do */
-               if (!buffer_mapped(bh)) {
-                       BUFFER_TRACE(bh, "still unmapped");
-                       goto unlock;
-               }
-       }
-
-       /* Ok, it's mapped. Make sure it's up-to-date */
-       if (PageUptodate(page))
-               set_buffer_uptodate(bh);
-
-       if (!buffer_uptodate(bh)) {
-               err = -EIO;
-               ll_rw_block(READ, 1, &bh);
-               wait_on_buffer(bh);
-               /* Uhhuh. Read error. Complain and punt. */
-               if (!buffer_uptodate(bh))
-                       goto unlock;
-       }
-
-       if (ext4_should_journal_data(inode)) {
-               BUFFER_TRACE(bh, "get write access");
-               err = ext4_journal_get_write_access(handle, bh);
-               if (err)
-                       goto unlock;
-       }
-
-       zero_user(page, offset, length);
-
-       BUFFER_TRACE(bh, "zeroed end of block");
-
-       err = 0;
-       if (ext4_should_journal_data(inode)) {
-               err = ext4_handle_dirty_metadata(handle, inode, bh);
-       } else
-               mark_buffer_dirty(bh);
-
-unlock:
-       unlock_page(page);
-       page_cache_release(page);
-       return err;
-}
-
  int ext4_can_truncate(struct inode *inode)
  {
         if (S_ISREG(inode->i_mode))
@@ -3503,7 +3351,7 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
   * transaction, and VFS/VM ensures that ext4_truncate() cannot run
   * simultaneously on behalf of the same inode.
   *
- * As we work through the truncate and commmit bits of it to the journal there
+ * As we work through the truncate and commit bits of it to the journal there
   * is one core, guiding principle: the file's tree must always be consistent on
   * disk.  We must be able to restart the truncate after a crash.
   *
@@ -4681,9 +4529,19 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
                 return 0;
         if (is_journal_aborted(journal))
                 return -EROFS;
+       /* We have to allocate physical blocks for delalloc blocks
+        * before flushing journal. otherwise delalloc blocks can not
+        * be allocated any more. even more truncate on delalloc blocks
+        * could trigger BUG by flushing delalloc blocks in journal.
+        * There is no delalloc block in non-journal data mode.
+        */
+       if (val && test_opt(inode->i_sb, DELALLOC)) {
+               err = ext4_alloc_da_blocks(inode);
+               if (err < 0)
+                       return err;
+       }
  
         jbd2_journal_lock_updates(journal);
-       jbd2_journal_flush(journal);
  
         /*
          * OK, there are no updates running now, and all cached data is
@@ -4695,8 +4553,10 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
  
         if (val)
                 ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
-       else
+       else {
+               jbd2_journal_flush(journal);
                 ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
+       }
         ext4_set_aops(inode);
  
         jbd2_journal_unlock_updates(journal);