]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blobdiff - fs/btrfs/file.c
Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[mirror_ubuntu-bionic-kernel.git] / fs / btrfs / file.c
index 098bb8f690c992e1ebd01270f49ff2d37e6658bd..15a09cb156cec4c59776c683ecc2f98527e98d15 100644 (file)
@@ -41,6 +41,7 @@
 #include "locking.h"
 #include "volumes.h"
 #include "qgroup.h"
+#include "compression.h"
 
 static struct kmem_cache *btrfs_inode_defrag_cachep;
 /*
@@ -498,7 +499,7 @@ int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
        loff_t isize = i_size_read(inode);
 
        start_pos = pos & ~((u64)root->sectorsize - 1);
-       num_bytes = ALIGN(write_bytes + pos - start_pos, root->sectorsize);
+       num_bytes = round_up(write_bytes + pos - start_pos, root->sectorsize);
 
        end_of_last_block = start_pos + num_bytes - 1;
        err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
@@ -1379,16 +1380,19 @@ fail:
 static noinline int
 lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages,
                                size_t num_pages, loff_t pos,
+                               size_t write_bytes,
                                u64 *lockstart, u64 *lockend,
                                struct extent_state **cached_state)
 {
+       struct btrfs_root *root = BTRFS_I(inode)->root;
        u64 start_pos;
        u64 last_pos;
        int i;
        int ret = 0;
 
-       start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1);
-       last_pos = start_pos + ((u64)num_pages << PAGE_CACHE_SHIFT) - 1;
+       start_pos = round_down(pos, root->sectorsize);
+       last_pos = start_pos
+               + round_up(pos + write_bytes - start_pos, root->sectorsize) - 1;
 
        if (start_pos < inode->i_size) {
                struct btrfs_ordered_extent *ordered;
@@ -1503,6 +1507,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
 
        while (iov_iter_count(i) > 0) {
                size_t offset = pos & (PAGE_CACHE_SIZE - 1);
+               size_t sector_offset;
                size_t write_bytes = min(iov_iter_count(i),
                                         nrptrs * (size_t)PAGE_CACHE_SIZE -
                                         offset);
@@ -1511,6 +1516,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
                size_t reserve_bytes;
                size_t dirty_pages;
                size_t copied;
+               size_t dirty_sectors;
+               size_t num_sectors;
 
                WARN_ON(num_pages > nrptrs);
 
@@ -1523,29 +1530,29 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
                        break;
                }
 
-               reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
+               sector_offset = pos & (root->sectorsize - 1);
+               reserve_bytes = round_up(write_bytes + sector_offset,
+                               root->sectorsize);
 
-               if (BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
-                                            BTRFS_INODE_PREALLOC)) {
-                       ret = check_can_nocow(inode, pos, &write_bytes);
-                       if (ret < 0)
-                               break;
-                       if (ret > 0) {
-                               /*
-                                * For nodata cow case, no need to reserve
-                                * data space.
-                                */
-                               only_release_metadata = true;
-                               /*
-                                * our prealloc extent may be smaller than
-                                * write_bytes, so scale down.
-                                */
-                               num_pages = DIV_ROUND_UP(write_bytes + offset,
-                                                        PAGE_CACHE_SIZE);
-                               reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
-                               goto reserve_metadata;
-                       }
+               if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
+                                             BTRFS_INODE_PREALLOC)) &&
+                   check_can_nocow(inode, pos, &write_bytes) > 0) {
+                       /*
+                        * For nodata cow case, no need to reserve
+                        * data space.
+                        */
+                       only_release_metadata = true;
+                       /*
+                        * our prealloc extent may be smaller than
+                        * write_bytes, so scale down.
+                        */
+                       num_pages = DIV_ROUND_UP(write_bytes + offset,
+                                                PAGE_CACHE_SIZE);
+                       reserve_bytes = round_up(write_bytes + sector_offset,
+                                       root->sectorsize);
+                       goto reserve_metadata;
                }
+
                ret = btrfs_check_data_free_space(inode, pos, write_bytes);
                if (ret < 0)
                        break;
@@ -1576,8 +1583,8 @@ again:
                        break;
 
                ret = lock_and_cleanup_extent_if_need(inode, pages, num_pages,
-                                                     pos, &lockstart, &lockend,
-                                                     &cached_state);
+                                               pos, write_bytes, &lockstart,
+                                               &lockend, &cached_state);
                if (ret < 0) {
                        if (ret == -EAGAIN)
                                goto again;
@@ -1612,9 +1619,16 @@ again:
                 * we still have an outstanding extent for the chunk we actually
                 * managed to copy.
                 */
-               if (num_pages > dirty_pages) {
-                       release_bytes = (num_pages - dirty_pages) <<
-                               PAGE_CACHE_SHIFT;
+               num_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info,
+                                               reserve_bytes);
+               dirty_sectors = round_up(copied + sector_offset,
+                                       root->sectorsize);
+               dirty_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info,
+                                               dirty_sectors);
+
+               if (num_sectors > dirty_sectors) {
+                       release_bytes = (write_bytes - copied)
+                               & ~((u64)root->sectorsize - 1);
                        if (copied > 0) {
                                spin_lock(&BTRFS_I(inode)->lock);
                                BTRFS_I(inode)->outstanding_extents++;
@@ -1633,7 +1647,8 @@ again:
                        }
                }
 
-               release_bytes = dirty_pages << PAGE_CACHE_SHIFT;
+               release_bytes = round_up(copied + sector_offset,
+                                       root->sectorsize);
 
                if (copied > 0)
                        ret = btrfs_dirty_pages(root, inode, pages,
@@ -1654,8 +1669,7 @@ again:
 
                if (only_release_metadata && copied > 0) {
                        lockstart = round_down(pos, root->sectorsize);
-                       lockend = lockstart +
-                               (dirty_pages << PAGE_CACHE_SHIFT) - 1;
+                       lockend = round_up(pos + copied, root->sectorsize) - 1;
 
                        set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
                                       lockend, EXTENT_NORESERVE, NULL,
@@ -1761,6 +1775,8 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
        ssize_t err;
        loff_t pos;
        size_t count;
+       loff_t oldsize;
+       int clean_page = 0;
 
        inode_lock(inode);
        err = generic_write_checks(iocb, from);
@@ -1799,14 +1815,17 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
        pos = iocb->ki_pos;
        count = iov_iter_count(from);
        start_pos = round_down(pos, root->sectorsize);
-       if (start_pos > i_size_read(inode)) {
+       oldsize = i_size_read(inode);
+       if (start_pos > oldsize) {
                /* Expand hole size to cover write data, preventing empty gap */
                end_pos = round_up(pos + count, root->sectorsize);
-               err = btrfs_cont_expand(inode, i_size_read(inode), end_pos);
+               err = btrfs_cont_expand(inode, oldsize, end_pos);
                if (err) {
                        inode_unlock(inode);
                        goto out;
                }
+               if (start_pos > round_up(oldsize, root->sectorsize))
+                       clean_page = 1;
        }
 
        if (sync)
@@ -1818,6 +1837,9 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
                num_written = __btrfs_buffered_write(file, from, pos);
                if (num_written > 0)
                        iocb->ki_pos = pos + num_written;
+               if (clean_page)
+                       pagecache_isize_extended(inode, oldsize,
+                                               i_size_read(inode));
        }
 
        inode_unlock(inode);
@@ -1825,7 +1847,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
        /*
         * We also have to set last_sub_trans to the current log transid,
         * otherwise subsequent syncs to a file that's been synced in this
-        * transaction will appear to have already occured.
+        * transaction will appear to have already occurred.
         */
        spin_lock(&BTRFS_I(inode)->lock);
        BTRFS_I(inode)->last_sub_trans = root->log_transid;
@@ -1996,10 +2018,11 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
         */
        smp_mb();
        if (btrfs_inode_in_log(inode, root->fs_info->generation) ||
-           (BTRFS_I(inode)->last_trans <=
-            root->fs_info->last_trans_committed &&
-            (full_sync ||
-             !btrfs_have_ordered_extents_in_range(inode, start, len)))) {
+           (full_sync && BTRFS_I(inode)->last_trans <=
+            root->fs_info->last_trans_committed) ||
+           (!btrfs_have_ordered_extents_in_range(inode, start, len) &&
+            BTRFS_I(inode)->last_trans
+            <= root->fs_info->last_trans_committed)) {
                /*
                 * We'v had everything committed since the last time we were
                 * modified so clear this flag in case it was set for whatever
@@ -2293,10 +2316,10 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
        int ret = 0;
        int err = 0;
        unsigned int rsv_count;
-       bool same_page;
+       bool same_block;
        bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
        u64 ino_size;
-       bool truncated_page = false;
+       bool truncated_block = false;
        bool updated_inode = false;
 
        ret = btrfs_wait_ordered_range(inode, offset, len);
@@ -2304,7 +2327,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
                return ret;
 
        inode_lock(inode);
-       ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE);
+       ino_size = round_up(inode->i_size, root->sectorsize);
        ret = find_first_non_hole(inode, &offset, &len);
        if (ret < 0)
                goto out_only_mutex;
@@ -2317,31 +2340,30 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
        lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize);
        lockend = round_down(offset + len,
                             BTRFS_I(inode)->root->sectorsize) - 1;
-       same_page = ((offset >> PAGE_CACHE_SHIFT) ==
-                   ((offset + len - 1) >> PAGE_CACHE_SHIFT));
-
+       same_block = (BTRFS_BYTES_TO_BLKS(root->fs_info, offset))
+               == (BTRFS_BYTES_TO_BLKS(root->fs_info, offset + len - 1));
        /*
-        * We needn't truncate any page which is beyond the end of the file
+        * We needn't truncate any block which is beyond the end of the file
         * because we are sure there is no data there.
         */
        /*
-        * Only do this if we are in the same page and we aren't doing the
-        * entire page.
+        * Only do this if we are in the same block and we aren't doing the
+        * entire block.
         */
-       if (same_page && len < PAGE_CACHE_SIZE) {
+       if (same_block && len < root->sectorsize) {
                if (offset < ino_size) {
-                       truncated_page = true;
-                       ret = btrfs_truncate_page(inode, offset, len, 0);
+                       truncated_block = true;
+                       ret = btrfs_truncate_block(inode, offset, len, 0);
                } else {
                        ret = 0;
                }
                goto out_only_mutex;
        }
 
-       /* zero back part of the first page */
+       /* zero back part of the first block */
        if (offset < ino_size) {
-               truncated_page = true;
-               ret = btrfs_truncate_page(inode, offset, 0, 0);
+               truncated_block = true;
+               ret = btrfs_truncate_block(inode, offset, 0, 0);
                if (ret) {
                        inode_unlock(inode);
                        return ret;
@@ -2376,9 +2398,10 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
                if (!ret) {
                        /* zero the front end of the last page */
                        if (tail_start + tail_len < ino_size) {
-                               truncated_page = true;
-                               ret = btrfs_truncate_page(inode,
-                                               tail_start + tail_len, 0, 1);
+                               truncated_block = true;
+                               ret = btrfs_truncate_block(inode,
+                                                       tail_start + tail_len,
+                                                       0, 1);
                                if (ret)
                                        goto out_only_mutex;
                        }
@@ -2544,7 +2567,7 @@ out_trans:
                goto out_free;
 
        inode_inc_iversion(inode);
-       inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+       inode->i_mtime = inode->i_ctime = current_fs_time(inode->i_sb);
 
        trans->block_rsv = &root->fs_info->trans_block_rsv;
        ret = btrfs_update_inode(trans, root, inode);
@@ -2558,7 +2581,7 @@ out:
        unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
                             &cached_state, GFP_NOFS);
 out_only_mutex:
-       if (!updated_inode && truncated_page && !ret && !err) {
+       if (!updated_inode && truncated_block && !ret && !err) {
                /*
                 * If we only end up zeroing part of a page, we still need to
                 * update the inode item, so that all the time fields are
@@ -2611,7 +2634,7 @@ static int add_falloc_range(struct list_head *head, u64 start, u64 len)
                return 0;
        }
 insert:
-       range = kmalloc(sizeof(*range), GFP_NOFS);
+       range = kmalloc(sizeof(*range), GFP_KERNEL);
        if (!range)
                return -ENOMEM;
        range->start = start;
@@ -2678,10 +2701,10 @@ static long btrfs_fallocate(struct file *file, int mode,
        } else if (offset + len > inode->i_size) {
                /*
                 * If we are fallocating from the end of the file onward we
-                * need to zero out the end of the page if i_size lands in the
-                * middle of a page.
+                * need to zero out the end of the block if i_size lands in the
+                * middle of a block.
                 */
-               ret = btrfs_truncate_page(inode, inode->i_size, 0, 0);
+               ret = btrfs_truncate_block(inode, inode->i_size, 0, 0);
                if (ret)
                        goto out;
        }
@@ -2712,7 +2735,7 @@ static long btrfs_fallocate(struct file *file, int mode,
                        btrfs_put_ordered_extent(ordered);
                        unlock_extent_cached(&BTRFS_I(inode)->io_tree,
                                             alloc_start, locked_end,
-                                            &cached_state, GFP_NOFS);
+                                            &cached_state, GFP_KERNEL);
                        /*
                         * we can't wait on the range with the transaction
                         * running or with the extent lock held
@@ -2794,7 +2817,7 @@ static long btrfs_fallocate(struct file *file, int mode,
                if (IS_ERR(trans)) {
                        ret = PTR_ERR(trans);
                } else {
-                       inode->i_ctime = CURRENT_TIME;
+                       inode->i_ctime = current_fs_time(inode->i_sb);
                        i_size_write(inode, actual_end);
                        btrfs_ordered_update_i_size(inode, actual_end, NULL);
                        ret = btrfs_update_inode(trans, root, inode);
@@ -2806,7 +2829,7 @@ static long btrfs_fallocate(struct file *file, int mode,
        }
 out_unlock:
        unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
-                            &cached_state, GFP_NOFS);
+                            &cached_state, GFP_KERNEL);
 out:
        /*
         * As we waited the extent range, the data_rsv_map must be empty
@@ -2939,8 +2962,7 @@ const struct file_operations btrfs_file_operations = {
 
 void btrfs_auto_defrag_exit(void)
 {
-       if (btrfs_inode_defrag_cachep)
-               kmem_cache_destroy(btrfs_inode_defrag_cachep);
+       kmem_cache_destroy(btrfs_inode_defrag_cachep);
 }
 
 int btrfs_auto_defrag_init(void)