]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blobdiff - fs/btrfs/tree-log.c
Btrfs: fix copy_items() return value when logging an inode
[mirror_ubuntu-bionic-kernel.git] / fs / btrfs / tree-log.c
index 7bf9b31561db14ec7159fd0b7479e6bdee149735..d9ad14205bf555762c517838c1d5bbb75dcb1e6d 100644 (file)
@@ -28,6 +28,7 @@
 #include "hash.h"
 #include "compression.h"
 #include "qgroup.h"
+#include "inode-map.h"
 
 /* magic values for the inode_only field in btrfs_log_inode:
  *
@@ -2271,8 +2272,10 @@ again:
                        nritems = btrfs_header_nritems(path->nodes[0]);
                        if (path->slots[0] >= nritems) {
                                ret = btrfs_next_leaf(root, path);
-                               if (ret)
+                               if (ret == 1)
                                        break;
+                               else if (ret < 0)
+                                       goto out;
                        }
                        btrfs_item_key_to_cpu(path->nodes[0], &found_key,
                                              path->slots[0]);
@@ -2376,13 +2379,41 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
                        if (ret)
                                break;
 
-                       /* for regular files, make sure corresponding
-                        * orphan item exist. extents past the new EOF
-                        * will be truncated later by orphan cleanup.
+                       /*
+                        * Before replaying extents, truncate the inode to its
+                        * size. We need to do it now and not after log replay
+                        * because before an fsync we can have prealloc extents
+                        * added beyond the inode's i_size. If we did it after,
+                        * through orphan cleanup for example, we would drop
+                        * those prealloc extents just after replaying them.
                         */
                        if (S_ISREG(mode)) {
-                               ret = insert_orphan_item(wc->trans, root,
-                                                        key.objectid);
+                               struct inode *inode;
+                               u64 from;
+
+                               inode = read_one_inode(root, key.objectid);
+                               if (!inode) {
+                                       ret = -EIO;
+                                       break;
+                               }
+                               from = ALIGN(i_size_read(inode),
+                                            root->fs_info->sectorsize);
+                               ret = btrfs_drop_extents(wc->trans, root, inode,
+                                                        from, (u64)-1, 1);
+                               /*
+                                * If the nlink count is zero here, the iput
+                                * will free the inode.  We bump it to make
+                                * sure it doesn't get freed until the link
+                                * count fixup is done.
+                                */
+                               if (!ret) {
+                                       if (inode->i_nlink == 0)
+                                               inc_nlink(inode);
+                                       /* Update link count and nbytes. */
+                                       ret = btrfs_update_inode(wc->trans,
+                                                                root, inode);
+                               }
+                               iput(inode);
                                if (ret)
                                        break;
                        }
@@ -2494,6 +2525,9 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
                                        clean_tree_block(fs_info, next);
                                        btrfs_wait_tree_block_writeback(next);
                                        btrfs_tree_unlock(next);
+                               } else {
+                                       if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags))
+                                               clear_extent_buffer_dirty(next);
                                }
 
                                WARN_ON(root_owner !=
@@ -2574,6 +2608,9 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
                                        clean_tree_block(fs_info, next);
                                        btrfs_wait_tree_block_writeback(next);
                                        btrfs_tree_unlock(next);
+                               } else {
+                                       if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags))
+                                               clear_extent_buffer_dirty(next);
                                }
 
                                WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID);
@@ -2652,6 +2689,9 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
                                clean_tree_block(fs_info, next);
                                btrfs_wait_tree_block_writeback(next);
                                btrfs_tree_unlock(next);
+                       } else {
+                               if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags))
+                                       clear_extent_buffer_dirty(next);
                        }
 
                        WARN_ON(log->root_key.objectid !=
@@ -3040,13 +3080,14 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
 
        while (1) {
                ret = find_first_extent_bit(&log->dirty_log_pages,
-                               0, &start, &end, EXTENT_DIRTY | EXTENT_NEW,
+                               0, &start, &end,
+                               EXTENT_DIRTY | EXTENT_NEW | EXTENT_NEED_WAIT,
                                NULL);
                if (ret)
                        break;
 
                clear_extent_bits(&log->dirty_log_pages, start, end,
-                                 EXTENT_DIRTY | EXTENT_NEW);
+                                 EXTENT_DIRTY | EXTENT_NEW | EXTENT_NEED_WAIT);
        }
 
        /*
@@ -3423,8 +3464,11 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
                 * from this directory and from this transaction
                 */
                ret = btrfs_next_leaf(root, path);
-               if (ret == 1) {
-                       last_offset = (u64)-1;
+               if (ret) {
+                       if (ret == 1)
+                               last_offset = (u64)-1;
+                       else
+                               err = ret;
                        goto done;
                }
                btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]);
@@ -3876,6 +3920,7 @@ fill_holes:
                        ASSERT(ret == 0);
                        src = src_path->nodes[0];
                        i = 0;
+                       need_find_last_extent = true;
                }
 
                btrfs_item_key_to_cpu(src, &key, i);
@@ -4225,6 +4270,31 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
                num++;
        }
 
+       /*
+        * Add all prealloc extents beyond the inode's i_size to make sure we
+        * don't lose them after doing a fast fsync and replaying the log.
+        */
+       if (inode->flags & BTRFS_INODE_PREALLOC) {
+               struct rb_node *node;
+
+               for (node = rb_last(&tree->map); node; node = rb_prev(node)) {
+                       em = rb_entry(node, struct extent_map, rb_node);
+                       if (em->start < i_size_read(&inode->vfs_inode))
+                               break;
+                       if (!list_empty(&em->list))
+                               continue;
+                       /* Same as above loop. */
+                       if (++num > 32768) {
+                               list_del_init(&tree->modified_extents);
+                               ret = -EFBIG;
+                               goto process;
+                       }
+                       refcount_inc(&em->refs);
+                       set_bit(EXTENT_FLAG_LOGGING, &em->flags);
+                       list_add_tail(&em->list, &extents);
+               }
+       }
+
        list_sort(NULL, &extents, extent_cmp);
        btrfs_get_logged_extents(inode, logged_list, logged_start, logged_end);
        /*
@@ -4659,6 +4729,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
        struct extent_map_tree *em_tree = &inode->extent_tree;
        u64 logged_isize = 0;
        bool need_log_inode_item = true;
+       bool xattrs_logged = false;
 
        path = btrfs_alloc_path();
        if (!path)
@@ -4960,6 +5031,7 @@ next_key:
        err = btrfs_log_all_xattrs(trans, root, inode, path, dst_path);
        if (err)
                goto out_unlock;
+       xattrs_logged = true;
        if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) {
                btrfs_release_path(path);
                btrfs_release_path(dst_path);
@@ -4972,6 +5044,11 @@ log_extents:
        btrfs_release_path(dst_path);
        if (need_log_inode_item) {
                err = log_inode_item(trans, log, dst_path, inode);
+               if (!err && !xattrs_logged) {
+                       err = btrfs_log_all_xattrs(trans, root, inode, path,
+                                                  dst_path);
+                       btrfs_release_path(path);
+               }
                if (err)
                        goto out_unlock;
        }
@@ -5705,6 +5782,23 @@ again:
                                                      path);
                }
 
+               if (!ret && wc.stage == LOG_WALK_REPLAY_ALL) {
+                       struct btrfs_root *root = wc.replay_dest;
+
+                       btrfs_release_path(path);
+
+                       /*
+                        * We have just replayed everything, and the highest
+                        * objectid of fs roots probably has changed in case
+                        * some inode_item's got replayed.
+                        *
+                        * root->objectid_mutex is not acquired as log replay
+                        * could only happen during mount.
+                        */
+                       ret = btrfs_find_highest_objectid(root,
+                                                 &root->highest_objectid);
+               }
+
                key.offset = found_key.offset - 1;
                wc.replay_dest->log_root = NULL;
                free_extent_buffer(log->node);
@@ -5853,7 +5947,7 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans,
         * this will force the logging code to walk the dentry chain
         * up for the file
         */
-       if (S_ISREG(inode->vfs_inode.i_mode))
+       if (!S_ISDIR(inode->vfs_inode.i_mode))
                inode->last_unlink_trans = trans->transid;
 
        /*