]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blobdiff - fs/btrfs/file.c
btrfs: Get rid of the confusing btrfs_file_extent_inline_len
[mirror_ubuntu-bionic-kernel.git] / fs / btrfs / file.c
index 77c49751da9f1ee16480603f5b591bf4f0721d9e..e489b879d22654c9dfcf854a4922f2f118a3f1ed 100644 (file)
@@ -854,8 +854,7 @@ next_slot:
                                btrfs_file_extent_num_bytes(leaf, fi);
                } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
                        extent_end = key.offset +
-                               btrfs_file_extent_inline_len(leaf,
-                                                    path->slots[0], fi);
+                               btrfs_file_extent_ram_bytes(leaf, fi);
                } else {
                        /* can't happen */
                        BUG();
@@ -1607,7 +1606,6 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct page **pages = NULL;
-       struct extent_state *cached_state = NULL;
        struct extent_changeset *data_reserved = NULL;
        u64 release_bytes = 0;
        u64 lockstart;
@@ -1628,6 +1626,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
 
        while (iov_iter_count(i) > 0) {
                size_t offset = pos & (PAGE_SIZE - 1);
+               struct extent_state *cached_state = NULL;
                size_t sector_offset;
                size_t write_bytes = min(iov_iter_count(i),
                                         nrptrs * (size_t)PAGE_SIZE -
@@ -1652,6 +1651,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
                        break;
                }
 
+               only_release_metadata = false;
                sector_offset = pos & (fs_info->sectorsize - 1);
                reserve_bytes = round_up(write_bytes + sector_offset,
                                fs_info->sectorsize);
@@ -1774,10 +1774,21 @@ again:
                if (copied > 0)
                        ret = btrfs_dirty_pages(inode, pages, dirty_pages,
                                                pos, copied, NULL);
+
+               /*
+                * If we have not locked the extent range, because the range's
+                * start offset is >= i_size, we might still have a non-NULL
+                * cached extent state, acquired while marking the extent range
+                * as delalloc through btrfs_dirty_pages(). Therefore free any
+                * possible cached extent state to avoid a memory leak.
+                */
                if (extents_locked)
                        unlock_extent_cached(&BTRFS_I(inode)->io_tree,
                                             lockstart, lockend, &cached_state,
                                             GFP_NOFS);
+               else
+                       free_extent_state(cached_state);
+
                btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes);
                if (ret) {
                        btrfs_drop_pages(pages, num_pages);
@@ -1797,7 +1808,6 @@ again:
                        set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
                                       lockend, EXTENT_NORESERVE, NULL,
                                       NULL, GFP_NOFS);
-                       only_release_metadata = false;
                }
 
                btrfs_drop_pages(pages, num_pages);
@@ -1901,7 +1911,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
        bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
        ssize_t err;
        loff_t pos;
-       size_t count = iov_iter_count(from);
+       size_t count;
        loff_t oldsize;
        int clean_page = 0;
 
@@ -1909,9 +1919,10 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
            (iocb->ki_flags & IOCB_NOWAIT))
                return -EOPNOTSUPP;
 
-       if (!inode_trylock(inode)) {
-               if (iocb->ki_flags & IOCB_NOWAIT)
+       if (iocb->ki_flags & IOCB_NOWAIT) {
+               if (!inode_trylock(inode))
                        return -EAGAIN;
+       } else {
                inode_lock(inode);
        }
 
@@ -1922,6 +1933,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
        }
 
        pos = iocb->ki_pos;
+       count = iov_iter_count(from);
        if (iocb->ki_flags & IOCB_NOWAIT) {
                /*
                 * We will allocate space in case nodatacow is not set,
@@ -2077,6 +2089,18 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
        bool full_sync = false;
        u64 len;
 
+       /*
+        * If the inode needs a full sync, make sure we use a full range to
+        * avoid log tree corruption, due to hole detection racing with ordered
+        * extent completion for adjacent ranges, and assertion failures during
+        * hole detection.
+        */
+       if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
+                    &BTRFS_I(inode)->runtime_flags)) {
+               start = 0;
+               end = LLONG_MAX;
+       }
+
        /*
         * The range length can be represented by u64, we have to do the typecasts
         * to avoid signed overflow if it's [0, LLONG_MAX] eg. from fsync()
@@ -2086,6 +2110,30 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 
        btrfs_init_log_ctx(&ctx, inode);
 
+       /*
+        * Before we acquired the inode's lock, someone may have dirtied more
+        * pages in the target range. We need to make sure that writeback for
+        * any such pages does not start while we are logging the inode, because
+        * if it does, any of the following might happen when we are not doing a
+        * full inode sync:
+        *
+        * 1) We log an extent after its writeback finishes but before its
+        *    checksums are added to the csum tree, leading to -EIO errors
+        *    when attempting to read the extent after a log replay.
+        *
+        * 2) We can end up logging an extent before its writeback finishes.
+        *    Therefore after the log replay we will have a file extent item
+        *    pointing to an unwritten extent (and no data checksums as well).
+        *
+        * So trigger writeback for any eventual new dirty pages and then we
+        * wait for all ordered extents to complete below.
+        */
+       ret = start_ordered_ops(inode, start, end);
+       if (ret) {
+               inode_unlock(inode);
+               goto out;
+       }
+
        /*
         * We write the dirty pages in the range and wait until they complete
         * out of the ->i_mutex. If so, we can flush the dirty pages by
@@ -2791,6 +2839,11 @@ out_only_mutex:
                 * for detecting, at fsync time, if the inode isn't yet in the
                 * log tree or it's there but not up to date.
                 */
+               struct timespec now = current_time(inode);
+
+               inode_inc_iversion(inode);
+               inode->i_mtime = now;
+               inode->i_ctime = now;
                trans = btrfs_start_transaction(root, 1);
                if (IS_ERR(trans)) {
                        err = PTR_ERR(trans);
@@ -2983,6 +3036,7 @@ static long btrfs_fallocate(struct file *file, int mode,
                        ret = btrfs_qgroup_reserve_data(inode, &data_reserved,
                                        cur_offset, last_byte - cur_offset);
                        if (ret < 0) {
+                               cur_offset = last_byte;
                                free_extent_map(em);
                                break;
                        }
@@ -3053,7 +3107,7 @@ out:
        /* Let go of our reservation. */
        if (ret != 0)
                btrfs_free_reserved_data_space(inode, data_reserved,
-                               alloc_start, alloc_end - cur_offset);
+                               cur_offset, alloc_end - cur_offset);
        extent_changeset_free(data_reserved);
        return ret;
 }