#include "hash.h"
#include "compression.h"
#include "qgroup.h"
+#include "inode-map.h"
/* magic values for the inode_only field in btrfs_log_inode:
*
nritems = btrfs_header_nritems(path->nodes[0]);
if (path->slots[0] >= nritems) {
ret = btrfs_next_leaf(root, path);
- if (ret)
+ if (ret == 1)
break;
+ else if (ret < 0)
+ goto out;
}
btrfs_item_key_to_cpu(path->nodes[0], &found_key,
path->slots[0]);
if (ret)
break;
- /* for regular files, make sure corresponding
- * orphan item exist. extents past the new EOF
- * will be truncated later by orphan cleanup.
+ /*
+ * Before replaying extents, truncate the inode to its
+ * size. We need to do it now and not after log replay
+ * because before an fsync we can have prealloc extents
+ * added beyond the inode's i_size. If we did it after,
+ * through orphan cleanup for example, we would drop
+ * those prealloc extents just after replaying them.
*/
if (S_ISREG(mode)) {
- ret = insert_orphan_item(wc->trans, root,
- key.objectid);
+ struct inode *inode;
+ u64 from;
+
+ inode = read_one_inode(root, key.objectid);
+ if (!inode) {
+ ret = -EIO;
+ break;
+ }
+ from = ALIGN(i_size_read(inode),
+ root->fs_info->sectorsize);
+ ret = btrfs_drop_extents(wc->trans, root, inode,
+ from, (u64)-1, 1);
+ /*
+ * If the nlink count is zero here, the iput
+ * will free the inode. We bump it to make
+ * sure it doesn't get freed until the link
+ * count fixup is done.
+ */
+ if (!ret) {
+ if (inode->i_nlink == 0)
+ inc_nlink(inode);
+ /* Update link count and nbytes. */
+ ret = btrfs_update_inode(wc->trans,
+ root, inode);
+ }
+ iput(inode);
if (ret)
break;
}
clean_tree_block(fs_info, next);
btrfs_wait_tree_block_writeback(next);
btrfs_tree_unlock(next);
+ } else {
+ if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags))
+ clear_extent_buffer_dirty(next);
}
WARN_ON(root_owner !=
clean_tree_block(fs_info, next);
btrfs_wait_tree_block_writeback(next);
btrfs_tree_unlock(next);
+ } else {
+ if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags))
+ clear_extent_buffer_dirty(next);
}
WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID);
clean_tree_block(fs_info, next);
btrfs_wait_tree_block_writeback(next);
btrfs_tree_unlock(next);
+ } else {
+ if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags))
+ clear_extent_buffer_dirty(next);
}
WARN_ON(log->root_key.objectid !=
while (1) {
ret = find_first_extent_bit(&log->dirty_log_pages,
- 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW,
+ 0, &start, &end,
+ EXTENT_DIRTY | EXTENT_NEW | EXTENT_NEED_WAIT,
NULL);
if (ret)
break;
clear_extent_bits(&log->dirty_log_pages, start, end,
- EXTENT_DIRTY | EXTENT_NEW);
+ EXTENT_DIRTY | EXTENT_NEW | EXTENT_NEED_WAIT);
}
/*
* from this directory and from this transaction
*/
ret = btrfs_next_leaf(root, path);
- if (ret == 1) {
- last_offset = (u64)-1;
+ if (ret) {
+ if (ret == 1)
+ last_offset = (u64)-1;
+ else
+ err = ret;
goto done;
}
btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]);
ASSERT(ret == 0);
src = src_path->nodes[0];
i = 0;
+ need_find_last_extent = true;
}
btrfs_item_key_to_cpu(src, &key, i);
num++;
}
+ /*
+ * Add all prealloc extents beyond the inode's i_size to make sure we
+ * don't lose them after doing a fast fsync and replaying the log.
+ */
+ if (inode->flags & BTRFS_INODE_PREALLOC) {
+ struct rb_node *node;
+
+ for (node = rb_last(&tree->map); node; node = rb_prev(node)) {
+ em = rb_entry(node, struct extent_map, rb_node);
+ if (em->start < i_size_read(&inode->vfs_inode))
+ break;
+ if (!list_empty(&em->list))
+ continue;
+ /* Same as above loop. */
+ if (++num > 32768) {
+ list_del_init(&tree->modified_extents);
+ ret = -EFBIG;
+ goto process;
+ }
+ refcount_inc(&em->refs);
+ set_bit(EXTENT_FLAG_LOGGING, &em->flags);
+ list_add_tail(&em->list, &extents);
+ }
+ }
+
list_sort(NULL, &extents, extent_cmp);
btrfs_get_logged_extents(inode, logged_list, logged_start, logged_end);
/*
struct extent_map_tree *em_tree = &inode->extent_tree;
u64 logged_isize = 0;
bool need_log_inode_item = true;
+ bool xattrs_logged = false;
path = btrfs_alloc_path();
if (!path)
err = btrfs_log_all_xattrs(trans, root, inode, path, dst_path);
if (err)
goto out_unlock;
+ xattrs_logged = true;
if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) {
btrfs_release_path(path);
btrfs_release_path(dst_path);
btrfs_release_path(dst_path);
if (need_log_inode_item) {
err = log_inode_item(trans, log, dst_path, inode);
+ if (!err && !xattrs_logged) {
+ err = btrfs_log_all_xattrs(trans, root, inode, path,
+ dst_path);
+ btrfs_release_path(path);
+ }
if (err)
goto out_unlock;
}
path);
}
+ if (!ret && wc.stage == LOG_WALK_REPLAY_ALL) {
+ struct btrfs_root *root = wc.replay_dest;
+
+ btrfs_release_path(path);
+
+ /*
+ * We have just replayed everything, and the highest
+ * objectid of fs roots probably has changed in case
+ * some inode_item's got replayed.
+ *
+ * root->objectid_mutex is not acquired as log replay
+ * could only happen during mount.
+ */
+ ret = btrfs_find_highest_objectid(root,
+ &root->highest_objectid);
+ }
+
key.offset = found_key.offset - 1;
wc.replay_dest->log_root = NULL;
free_extent_buffer(log->node);
* this will force the logging code to walk the dentry chain
* up for the file
*/
- if (S_ISREG(inode->vfs_inode.i_mode))
+ if (!S_ISDIR(inode->vfs_inode.i_mode))
inode->last_unlink_trans = trans->transid;
/*