--- /dev/null
- trans = btrfs_join_transaction(root, 0);
+/*
+ * Copyright (C) 2011 Fujitsu. All rights reserved.
+ * Written by Miao Xie <miaox@cn.fujitsu.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/slab.h>
+#include "delayed-inode.h"
+#include "disk-io.h"
+#include "transaction.h"
+
+#define BTRFS_DELAYED_WRITEBACK 400
+#define BTRFS_DELAYED_BACKGROUND 100
+
+static struct kmem_cache *delayed_node_cache;
+
+int __init btrfs_delayed_inode_init(void)
+{
+ delayed_node_cache = kmem_cache_create("delayed_node",
+ sizeof(struct btrfs_delayed_node),
+ 0,
+ SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
+ NULL);
+ if (!delayed_node_cache)
+ return -ENOMEM;
+ return 0;
+}
+
+void btrfs_delayed_inode_exit(void)
+{
+ if (delayed_node_cache)
+ kmem_cache_destroy(delayed_node_cache);
+}
+
+static inline void btrfs_init_delayed_node(
+ struct btrfs_delayed_node *delayed_node,
+ struct btrfs_root *root, u64 inode_id)
+{
+ delayed_node->root = root;
+ delayed_node->inode_id = inode_id;
+ atomic_set(&delayed_node->refs, 0);
+ delayed_node->count = 0;
+ delayed_node->in_list = 0;
+ delayed_node->inode_dirty = 0;
+ delayed_node->ins_root = RB_ROOT;
+ delayed_node->del_root = RB_ROOT;
+ mutex_init(&delayed_node->mutex);
+ delayed_node->index_cnt = 0;
+ INIT_LIST_HEAD(&delayed_node->n_list);
+ INIT_LIST_HEAD(&delayed_node->p_list);
+ delayed_node->bytes_reserved = 0;
+}
+
+static inline int btrfs_is_continuous_delayed_item(
+ struct btrfs_delayed_item *item1,
+ struct btrfs_delayed_item *item2)
+{
+ if (item1->key.type == BTRFS_DIR_INDEX_KEY &&
+ item1->key.objectid == item2->key.objectid &&
+ item1->key.type == item2->key.type &&
+ item1->key.offset + 1 == item2->key.offset)
+ return 1;
+ return 0;
+}
+
+static inline struct btrfs_delayed_root *btrfs_get_delayed_root(
+ struct btrfs_root *root)
+{
+ return root->fs_info->delayed_root;
+}
+
+static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
+ struct inode *inode)
+{
+ struct btrfs_delayed_node *node;
+ struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
+ struct btrfs_root *root = btrfs_inode->root;
+ u64 ino = btrfs_ino(inode);
+ int ret;
+
+again:
+ node = ACCESS_ONCE(btrfs_inode->delayed_node);
+ if (node) {
+ atomic_inc(&node->refs); /* can be accessed */
+ return node;
+ }
+
+ spin_lock(&root->inode_lock);
+ node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
+ if (node) {
+ if (btrfs_inode->delayed_node) {
+ spin_unlock(&root->inode_lock);
+ goto again;
+ }
+ btrfs_inode->delayed_node = node;
+ atomic_inc(&node->refs); /* can be accessed */
+ atomic_inc(&node->refs); /* cached in the inode */
+ spin_unlock(&root->inode_lock);
+ return node;
+ }
+ spin_unlock(&root->inode_lock);
+
+ node = kmem_cache_alloc(delayed_node_cache, GFP_NOFS);
+ if (!node)
+ return ERR_PTR(-ENOMEM);
+ btrfs_init_delayed_node(node, root, ino);
+
+ atomic_inc(&node->refs); /* cached in the btrfs inode */
+ atomic_inc(&node->refs); /* can be accessed */
+
+ ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
+ if (ret) {
+ kmem_cache_free(delayed_node_cache, node);
+ return ERR_PTR(ret);
+ }
+
+ spin_lock(&root->inode_lock);
+ ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node);
+ if (ret == -EEXIST) {
+ kmem_cache_free(delayed_node_cache, node);
+ spin_unlock(&root->inode_lock);
+ radix_tree_preload_end();
+ goto again;
+ }
+ btrfs_inode->delayed_node = node;
+ spin_unlock(&root->inode_lock);
+ radix_tree_preload_end();
+
+ return node;
+}
+
+/*
+ * Call it when holding delayed_node->mutex
+ *
+ * If mod = 1, add this node into the prepared list.
+ */
+static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root,
+ struct btrfs_delayed_node *node,
+ int mod)
+{
+ spin_lock(&root->lock);
+ if (node->in_list) {
+ if (!list_empty(&node->p_list))
+ list_move_tail(&node->p_list, &root->prepare_list);
+ else if (mod)
+ list_add_tail(&node->p_list, &root->prepare_list);
+ } else {
+ list_add_tail(&node->n_list, &root->node_list);
+ list_add_tail(&node->p_list, &root->prepare_list);
+ atomic_inc(&node->refs); /* inserted into list */
+ root->nodes++;
+ node->in_list = 1;
+ }
+ spin_unlock(&root->lock);
+}
+
+/* Call it when holding delayed_node->mutex */
+static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root,
+ struct btrfs_delayed_node *node)
+{
+ spin_lock(&root->lock);
+ if (node->in_list) {
+ root->nodes--;
+ atomic_dec(&node->refs); /* not in the list */
+ list_del_init(&node->n_list);
+ if (!list_empty(&node->p_list))
+ list_del_init(&node->p_list);
+ node->in_list = 0;
+ }
+ spin_unlock(&root->lock);
+}
+
+struct btrfs_delayed_node *btrfs_first_delayed_node(
+ struct btrfs_delayed_root *delayed_root)
+{
+ struct list_head *p;
+ struct btrfs_delayed_node *node = NULL;
+
+ spin_lock(&delayed_root->lock);
+ if (list_empty(&delayed_root->node_list))
+ goto out;
+
+ p = delayed_root->node_list.next;
+ node = list_entry(p, struct btrfs_delayed_node, n_list);
+ atomic_inc(&node->refs);
+out:
+ spin_unlock(&delayed_root->lock);
+
+ return node;
+}
+
+struct btrfs_delayed_node *btrfs_next_delayed_node(
+ struct btrfs_delayed_node *node)
+{
+ struct btrfs_delayed_root *delayed_root;
+ struct list_head *p;
+ struct btrfs_delayed_node *next = NULL;
+
+ delayed_root = node->root->fs_info->delayed_root;
+ spin_lock(&delayed_root->lock);
+ if (!node->in_list) { /* not in the list */
+ if (list_empty(&delayed_root->node_list))
+ goto out;
+ p = delayed_root->node_list.next;
+ } else if (list_is_last(&node->n_list, &delayed_root->node_list))
+ goto out;
+ else
+ p = node->n_list.next;
+
+ next = list_entry(p, struct btrfs_delayed_node, n_list);
+ atomic_inc(&next->refs);
+out:
+ spin_unlock(&delayed_root->lock);
+
+ return next;
+}
+
+static void __btrfs_release_delayed_node(
+ struct btrfs_delayed_node *delayed_node,
+ int mod)
+{
+ struct btrfs_delayed_root *delayed_root;
+
+ if (!delayed_node)
+ return;
+
+ delayed_root = delayed_node->root->fs_info->delayed_root;
+
+ mutex_lock(&delayed_node->mutex);
+ if (delayed_node->count)
+ btrfs_queue_delayed_node(delayed_root, delayed_node, mod);
+ else
+ btrfs_dequeue_delayed_node(delayed_root, delayed_node);
+ mutex_unlock(&delayed_node->mutex);
+
+ if (atomic_dec_and_test(&delayed_node->refs)) {
+ struct btrfs_root *root = delayed_node->root;
+ spin_lock(&root->inode_lock);
+ if (atomic_read(&delayed_node->refs) == 0) {
+ radix_tree_delete(&root->delayed_nodes_tree,
+ delayed_node->inode_id);
+ kmem_cache_free(delayed_node_cache, delayed_node);
+ }
+ spin_unlock(&root->inode_lock);
+ }
+}
+
+static inline void btrfs_release_delayed_node(struct btrfs_delayed_node *node)
+{
+ __btrfs_release_delayed_node(node, 0);
+}
+
+struct btrfs_delayed_node *btrfs_first_prepared_delayed_node(
+ struct btrfs_delayed_root *delayed_root)
+{
+ struct list_head *p;
+ struct btrfs_delayed_node *node = NULL;
+
+ spin_lock(&delayed_root->lock);
+ if (list_empty(&delayed_root->prepare_list))
+ goto out;
+
+ p = delayed_root->prepare_list.next;
+ list_del_init(p);
+ node = list_entry(p, struct btrfs_delayed_node, p_list);
+ atomic_inc(&node->refs);
+out:
+ spin_unlock(&delayed_root->lock);
+
+ return node;
+}
+
+static inline void btrfs_release_prepared_delayed_node(
+ struct btrfs_delayed_node *node)
+{
+ __btrfs_release_delayed_node(node, 1);
+}
+
+struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
+{
+ struct btrfs_delayed_item *item;
+ item = kmalloc(sizeof(*item) + data_len, GFP_NOFS);
+ if (item) {
+ item->data_len = data_len;
+ item->ins_or_del = 0;
+ item->bytes_reserved = 0;
+ item->block_rsv = NULL;
+ item->delayed_node = NULL;
+ atomic_set(&item->refs, 1);
+ }
+ return item;
+}
+
+/*
+ * __btrfs_lookup_delayed_item - look up the delayed item by key
+ * @delayed_node: pointer to the delayed node
+ * @key: the key to look up
+ * @prev: used to store the prev item if the right item isn't found
+ * @next: used to store the next item if the right item isn't found
+ *
+ * Note: if we don't find the right item, we will return the prev item and
+ * the next item.
+ */
+static struct btrfs_delayed_item *__btrfs_lookup_delayed_item(
+ struct rb_root *root,
+ struct btrfs_key *key,
+ struct btrfs_delayed_item **prev,
+ struct btrfs_delayed_item **next)
+{
+ struct rb_node *node, *prev_node = NULL;
+ struct btrfs_delayed_item *delayed_item = NULL;
+ int ret = 0;
+
+ node = root->rb_node;
+
+ while (node) {
+ delayed_item = rb_entry(node, struct btrfs_delayed_item,
+ rb_node);
+ prev_node = node;
+ ret = btrfs_comp_cpu_keys(&delayed_item->key, key);
+ if (ret < 0)
+ node = node->rb_right;
+ else if (ret > 0)
+ node = node->rb_left;
+ else
+ return delayed_item;
+ }
+
+ if (prev) {
+ if (!prev_node)
+ *prev = NULL;
+ else if (ret < 0)
+ *prev = delayed_item;
+ else if ((node = rb_prev(prev_node)) != NULL) {
+ *prev = rb_entry(node, struct btrfs_delayed_item,
+ rb_node);
+ } else
+ *prev = NULL;
+ }
+
+ if (next) {
+ if (!prev_node)
+ *next = NULL;
+ else if (ret > 0)
+ *next = delayed_item;
+ else if ((node = rb_next(prev_node)) != NULL) {
+ *next = rb_entry(node, struct btrfs_delayed_item,
+ rb_node);
+ } else
+ *next = NULL;
+ }
+ return NULL;
+}
+
+struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item(
+ struct btrfs_delayed_node *delayed_node,
+ struct btrfs_key *key)
+{
+ struct btrfs_delayed_item *item;
+
+ item = __btrfs_lookup_delayed_item(&delayed_node->ins_root, key,
+ NULL, NULL);
+ return item;
+}
+
+struct btrfs_delayed_item *__btrfs_lookup_delayed_deletion_item(
+ struct btrfs_delayed_node *delayed_node,
+ struct btrfs_key *key)
+{
+ struct btrfs_delayed_item *item;
+
+ item = __btrfs_lookup_delayed_item(&delayed_node->del_root, key,
+ NULL, NULL);
+ return item;
+}
+
+struct btrfs_delayed_item *__btrfs_search_delayed_insertion_item(
+ struct btrfs_delayed_node *delayed_node,
+ struct btrfs_key *key)
+{
+ struct btrfs_delayed_item *item, *next;
+
+ item = __btrfs_lookup_delayed_item(&delayed_node->ins_root, key,
+ NULL, &next);
+ if (!item)
+ item = next;
+
+ return item;
+}
+
+struct btrfs_delayed_item *__btrfs_search_delayed_deletion_item(
+ struct btrfs_delayed_node *delayed_node,
+ struct btrfs_key *key)
+{
+ struct btrfs_delayed_item *item, *next;
+
+ item = __btrfs_lookup_delayed_item(&delayed_node->del_root, key,
+ NULL, &next);
+ if (!item)
+ item = next;
+
+ return item;
+}
+
+static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
+ struct btrfs_delayed_item *ins,
+ int action)
+{
+ struct rb_node **p, *node;
+ struct rb_node *parent_node = NULL;
+ struct rb_root *root;
+ struct btrfs_delayed_item *item;
+ int cmp;
+
+ if (action == BTRFS_DELAYED_INSERTION_ITEM)
+ root = &delayed_node->ins_root;
+ else if (action == BTRFS_DELAYED_DELETION_ITEM)
+ root = &delayed_node->del_root;
+ else
+ BUG();
+ p = &root->rb_node;
+ node = &ins->rb_node;
+
+ while (*p) {
+ parent_node = *p;
+ item = rb_entry(parent_node, struct btrfs_delayed_item,
+ rb_node);
+
+ cmp = btrfs_comp_cpu_keys(&item->key, &ins->key);
+ if (cmp < 0)
+ p = &(*p)->rb_right;
+ else if (cmp > 0)
+ p = &(*p)->rb_left;
+ else
+ return -EEXIST;
+ }
+
+ rb_link_node(node, parent_node, p);
+ rb_insert_color(node, root);
+ ins->delayed_node = delayed_node;
+ ins->ins_or_del = action;
+
+ if (ins->key.type == BTRFS_DIR_INDEX_KEY &&
+ action == BTRFS_DELAYED_INSERTION_ITEM &&
+ ins->key.offset >= delayed_node->index_cnt)
+ delayed_node->index_cnt = ins->key.offset + 1;
+
+ delayed_node->count++;
+ atomic_inc(&delayed_node->root->fs_info->delayed_root->items);
+ return 0;
+}
+
+static int __btrfs_add_delayed_insertion_item(struct btrfs_delayed_node *node,
+ struct btrfs_delayed_item *item)
+{
+ return __btrfs_add_delayed_item(node, item,
+ BTRFS_DELAYED_INSERTION_ITEM);
+}
+
+static int __btrfs_add_delayed_deletion_item(struct btrfs_delayed_node *node,
+ struct btrfs_delayed_item *item)
+{
+ return __btrfs_add_delayed_item(node, item,
+ BTRFS_DELAYED_DELETION_ITEM);
+}
+
+static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
+{
+ struct rb_root *root;
+ struct btrfs_delayed_root *delayed_root;
+
+ delayed_root = delayed_item->delayed_node->root->fs_info->delayed_root;
+
+ BUG_ON(!delayed_root);
+ BUG_ON(delayed_item->ins_or_del != BTRFS_DELAYED_DELETION_ITEM &&
+ delayed_item->ins_or_del != BTRFS_DELAYED_INSERTION_ITEM);
+
+ if (delayed_item->ins_or_del == BTRFS_DELAYED_INSERTION_ITEM)
+ root = &delayed_item->delayed_node->ins_root;
+ else
+ root = &delayed_item->delayed_node->del_root;
+
+ rb_erase(&delayed_item->rb_node, root);
+ delayed_item->delayed_node->count--;
+ atomic_dec(&delayed_root->items);
+ if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND &&
+ waitqueue_active(&delayed_root->wait))
+ wake_up(&delayed_root->wait);
+}
+
+static void btrfs_release_delayed_item(struct btrfs_delayed_item *item)
+{
+ if (item) {
+ __btrfs_remove_delayed_item(item);
+ if (atomic_dec_and_test(&item->refs))
+ kfree(item);
+ }
+}
+
+struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item(
+ struct btrfs_delayed_node *delayed_node)
+{
+ struct rb_node *p;
+ struct btrfs_delayed_item *item = NULL;
+
+ p = rb_first(&delayed_node->ins_root);
+ if (p)
+ item = rb_entry(p, struct btrfs_delayed_item, rb_node);
+
+ return item;
+}
+
+struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item(
+ struct btrfs_delayed_node *delayed_node)
+{
+ struct rb_node *p;
+ struct btrfs_delayed_item *item = NULL;
+
+ p = rb_first(&delayed_node->del_root);
+ if (p)
+ item = rb_entry(p, struct btrfs_delayed_item, rb_node);
+
+ return item;
+}
+
+struct btrfs_delayed_item *__btrfs_next_delayed_item(
+ struct btrfs_delayed_item *item)
+{
+ struct rb_node *p;
+ struct btrfs_delayed_item *next = NULL;
+
+ p = rb_next(&item->rb_node);
+ if (p)
+ next = rb_entry(p, struct btrfs_delayed_item, rb_node);
+
+ return next;
+}
+
+static inline struct btrfs_delayed_node *btrfs_get_delayed_node(
+ struct inode *inode)
+{
+ struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
+ struct btrfs_delayed_node *delayed_node;
+
+ delayed_node = btrfs_inode->delayed_node;
+ if (delayed_node)
+ atomic_inc(&delayed_node->refs);
+
+ return delayed_node;
+}
+
+static inline struct btrfs_root *btrfs_get_fs_root(struct btrfs_root *root,
+ u64 root_id)
+{
+ struct btrfs_key root_key;
+
+ if (root->objectid == root_id)
+ return root;
+
+ root_key.objectid = root_id;
+ root_key.type = BTRFS_ROOT_ITEM_KEY;
+ root_key.offset = (u64)-1;
+ return btrfs_read_fs_root_no_name(root->fs_info, &root_key);
+}
+
+static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_delayed_item *item)
+{
+ struct btrfs_block_rsv *src_rsv;
+ struct btrfs_block_rsv *dst_rsv;
+ u64 num_bytes;
+ int ret;
+
+ if (!trans->bytes_reserved)
+ return 0;
+
+ src_rsv = trans->block_rsv;
+ dst_rsv = &root->fs_info->global_block_rsv;
+
+ num_bytes = btrfs_calc_trans_metadata_size(root, 1);
+ ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
+ if (!ret) {
+ item->bytes_reserved = num_bytes;
+ item->block_rsv = dst_rsv;
+ }
+
+ return ret;
+}
+
+static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
+ struct btrfs_delayed_item *item)
+{
+ if (!item->bytes_reserved)
+ return;
+
+ btrfs_block_rsv_release(root, item->block_rsv,
+ item->bytes_reserved);
+}
+
+static int btrfs_delayed_inode_reserve_metadata(
+ struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_delayed_node *node)
+{
+ struct btrfs_block_rsv *src_rsv;
+ struct btrfs_block_rsv *dst_rsv;
+ u64 num_bytes;
+ int ret;
+
+ if (!trans->bytes_reserved)
+ return 0;
+
+ src_rsv = trans->block_rsv;
+ dst_rsv = &root->fs_info->global_block_rsv;
+
+ num_bytes = btrfs_calc_trans_metadata_size(root, 1);
+ ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
+ if (!ret)
+ node->bytes_reserved = num_bytes;
+
+ return ret;
+}
+
+static void btrfs_delayed_inode_release_metadata(struct btrfs_root *root,
+ struct btrfs_delayed_node *node)
+{
+ struct btrfs_block_rsv *rsv;
+
+ if (!node->bytes_reserved)
+ return;
+
+ rsv = &root->fs_info->global_block_rsv;
+ btrfs_block_rsv_release(root, rsv,
+ node->bytes_reserved);
+ node->bytes_reserved = 0;
+}
+
+/*
+ * This helper will insert some continuous items into the same leaf according
+ * to the free space of the leaf.
+ */
+static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_delayed_item *item)
+{
+ struct btrfs_delayed_item *curr, *next;
+ int free_space;
+ int total_data_size = 0, total_size = 0;
+ struct extent_buffer *leaf;
+ char *data_ptr;
+ struct btrfs_key *keys;
+ u32 *data_size;
+ struct list_head head;
+ int slot;
+ int nitems;
+ int i;
+ int ret = 0;
+
+ BUG_ON(!path->nodes[0]);
+
+ leaf = path->nodes[0];
+ free_space = btrfs_leaf_free_space(root, leaf);
+ INIT_LIST_HEAD(&head);
+
+ next = item;
+
+ /*
+ * count the number of the continuous items that we can insert in batch
+ */
+ while (total_size + next->data_len + sizeof(struct btrfs_item) <=
+ free_space) {
+ total_data_size += next->data_len;
+ total_size += next->data_len + sizeof(struct btrfs_item);
+ list_add_tail(&next->tree_list, &head);
+ nitems++;
+
+ curr = next;
+ next = __btrfs_next_delayed_item(curr);
+ if (!next)
+ break;
+
+ if (!btrfs_is_continuous_delayed_item(curr, next))
+ break;
+ }
+
+ if (!nitems) {
+ ret = 0;
+ goto out;
+ }
+
+ /*
+ * we need allocate some memory space, but it might cause the task
+ * to sleep, so we set all locked nodes in the path to blocking locks
+ * first.
+ */
+ btrfs_set_path_blocking(path);
+
+ keys = kmalloc(sizeof(struct btrfs_key) * nitems, GFP_NOFS);
+ if (!keys) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ data_size = kmalloc(sizeof(u32) * nitems, GFP_NOFS);
+ if (!data_size) {
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ /* get keys of all the delayed items */
+ i = 0;
+ list_for_each_entry(next, &head, tree_list) {
+ keys[i] = next->key;
+ data_size[i] = next->data_len;
+ i++;
+ }
+
+ /* reset all the locked nodes in the patch to spinning locks. */
+ btrfs_clear_path_blocking(path, NULL);
+
+ /* insert the keys of the items */
+ ret = setup_items_for_insert(trans, root, path, keys, data_size,
+ total_data_size, total_size, nitems);
+ if (ret)
+ goto error;
+
+ /* insert the dir index items */
+ slot = path->slots[0];
+ list_for_each_entry_safe(curr, next, &head, tree_list) {
+ data_ptr = btrfs_item_ptr(leaf, slot, char);
+ write_extent_buffer(leaf, &curr->data,
+ (unsigned long)data_ptr,
+ curr->data_len);
+ slot++;
+
+ btrfs_delayed_item_release_metadata(root, curr);
+
+ list_del(&curr->tree_list);
+ btrfs_release_delayed_item(curr);
+ }
+
+error:
+ kfree(data_size);
+ kfree(keys);
+out:
+ return ret;
+}
+
+/*
+ * This helper can just do simple insertion that needn't extend item for new
+ * data, such as directory name index insertion, inode insertion.
+ */
+static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_delayed_item *delayed_item)
+{
+ struct extent_buffer *leaf;
+ struct btrfs_item *item;
+ char *ptr;
+ int ret;
+
+ ret = btrfs_insert_empty_item(trans, root, path, &delayed_item->key,
+ delayed_item->data_len);
+ if (ret < 0 && ret != -EEXIST)
+ return ret;
+
+ leaf = path->nodes[0];
+
+ item = btrfs_item_nr(leaf, path->slots[0]);
+ ptr = btrfs_item_ptr(leaf, path->slots[0], char);
+
+ write_extent_buffer(leaf, delayed_item->data, (unsigned long)ptr,
+ delayed_item->data_len);
+ btrfs_mark_buffer_dirty(leaf);
+
+ btrfs_delayed_item_release_metadata(root, delayed_item);
+ return 0;
+}
+
+/*
+ * we insert an item first, then if there are some continuous items, we try
+ * to insert those items into the same leaf.
+ */
+static int btrfs_insert_delayed_items(struct btrfs_trans_handle *trans,
+ struct btrfs_path *path,
+ struct btrfs_root *root,
+ struct btrfs_delayed_node *node)
+{
+ struct btrfs_delayed_item *curr, *prev;
+ int ret = 0;
+
+do_again:
+ mutex_lock(&node->mutex);
+ curr = __btrfs_first_delayed_insertion_item(node);
+ if (!curr)
+ goto insert_end;
+
+ ret = btrfs_insert_delayed_item(trans, root, path, curr);
+ if (ret < 0) {
+ btrfs_release_path(path);
+ goto insert_end;
+ }
+
+ prev = curr;
+ curr = __btrfs_next_delayed_item(prev);
+ if (curr && btrfs_is_continuous_delayed_item(prev, curr)) {
+ /* insert the continuous items into the same leaf */
+ path->slots[0]++;
+ btrfs_batch_insert_items(trans, root, path, curr);
+ }
+ btrfs_release_delayed_item(prev);
+ btrfs_mark_buffer_dirty(path->nodes[0]);
+
+ btrfs_release_path(path);
+ mutex_unlock(&node->mutex);
+ goto do_again;
+
+insert_end:
+ mutex_unlock(&node->mutex);
+ return ret;
+}
+
+static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_delayed_item *item)
+{
+ struct btrfs_delayed_item *curr, *next;
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+ struct list_head head;
+ int nitems, i, last_item;
+ int ret = 0;
+
+ BUG_ON(!path->nodes[0]);
+
+ leaf = path->nodes[0];
+
+ i = path->slots[0];
+ last_item = btrfs_header_nritems(leaf) - 1;
+ if (i > last_item)
+ return -ENOENT; /* FIXME: Is errno suitable? */
+
+ next = item;
+ INIT_LIST_HEAD(&head);
+ btrfs_item_key_to_cpu(leaf, &key, i);
+ nitems = 0;
+ /*
+ * count the number of the dir index items that we can delete in batch
+ */
+ while (btrfs_comp_cpu_keys(&next->key, &key) == 0) {
+ list_add_tail(&next->tree_list, &head);
+ nitems++;
+
+ curr = next;
+ next = __btrfs_next_delayed_item(curr);
+ if (!next)
+ break;
+
+ if (!btrfs_is_continuous_delayed_item(curr, next))
+ break;
+
+ i++;
+ if (i > last_item)
+ break;
+ btrfs_item_key_to_cpu(leaf, &key, i);
+ }
+
+ if (!nitems)
+ return 0;
+
+ ret = btrfs_del_items(trans, root, path, path->slots[0], nitems);
+ if (ret)
+ goto out;
+
+ list_for_each_entry_safe(curr, next, &head, tree_list) {
+ btrfs_delayed_item_release_metadata(root, curr);
+ list_del(&curr->tree_list);
+ btrfs_release_delayed_item(curr);
+ }
+
+out:
+ return ret;
+}
+
+static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans,
+ struct btrfs_path *path,
+ struct btrfs_root *root,
+ struct btrfs_delayed_node *node)
+{
+ struct btrfs_delayed_item *curr, *prev;
+ int ret = 0;
+
+do_again:
+ mutex_lock(&node->mutex);
+ curr = __btrfs_first_delayed_deletion_item(node);
+ if (!curr)
+ goto delete_fail;
+
+ ret = btrfs_search_slot(trans, root, &curr->key, path, -1, 1);
+ if (ret < 0)
+ goto delete_fail;
+ else if (ret > 0) {
+ /*
+ * can't find the item which the node points to, so this node
+ * is invalid, just drop it.
+ */
+ prev = curr;
+ curr = __btrfs_next_delayed_item(prev);
+ btrfs_release_delayed_item(prev);
+ ret = 0;
+ btrfs_release_path(path);
+ if (curr)
+ goto do_again;
+ else
+ goto delete_fail;
+ }
+
+ btrfs_batch_delete_items(trans, root, path, curr);
+ btrfs_release_path(path);
+ mutex_unlock(&node->mutex);
+ goto do_again;
+
+delete_fail:
+ btrfs_release_path(path);
+ mutex_unlock(&node->mutex);
+ return ret;
+}
+
+static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
+{
+ struct btrfs_delayed_root *delayed_root;
+
+ if (delayed_node && delayed_node->inode_dirty) {
+ BUG_ON(!delayed_node->root);
+ delayed_node->inode_dirty = 0;
+ delayed_node->count--;
+
+ delayed_root = delayed_node->root->fs_info->delayed_root;
+ atomic_dec(&delayed_root->items);
+ if (atomic_read(&delayed_root->items) <
+ BTRFS_DELAYED_BACKGROUND &&
+ waitqueue_active(&delayed_root->wait))
+ wake_up(&delayed_root->wait);
+ }
+}
+
+static int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_delayed_node *node)
+{
+ struct btrfs_key key;
+ struct btrfs_inode_item *inode_item;
+ struct extent_buffer *leaf;
+ int ret;
+
+ mutex_lock(&node->mutex);
+ if (!node->inode_dirty) {
+ mutex_unlock(&node->mutex);
+ return 0;
+ }
+
+ key.objectid = node->inode_id;
+ btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
+ key.offset = 0;
+ ret = btrfs_lookup_inode(trans, root, path, &key, 1);
+ if (ret > 0) {
+ btrfs_release_path(path);
+ mutex_unlock(&node->mutex);
+ return -ENOENT;
+ } else if (ret < 0) {
+ mutex_unlock(&node->mutex);
+ return ret;
+ }
+
+ btrfs_unlock_up_safe(path, 1);
+ leaf = path->nodes[0];
+ inode_item = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_inode_item);
+ write_extent_buffer(leaf, &node->inode_item, (unsigned long)inode_item,
+ sizeof(struct btrfs_inode_item));
+ btrfs_mark_buffer_dirty(leaf);
+ btrfs_release_path(path);
+
+ btrfs_delayed_inode_release_metadata(root, node);
+ btrfs_release_delayed_inode(node);
+ mutex_unlock(&node->mutex);
+
+ return 0;
+}
+
+/* Called when committing the transaction. */
+int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
+{
+ struct btrfs_delayed_root *delayed_root;
+ struct btrfs_delayed_node *curr_node, *prev_node;
+ struct btrfs_path *path;
+ int ret = 0;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+ path->leave_spinning = 1;
+
+ delayed_root = btrfs_get_delayed_root(root);
+
+ curr_node = btrfs_first_delayed_node(delayed_root);
+ while (curr_node) {
+ root = curr_node->root;
+ ret = btrfs_insert_delayed_items(trans, path, root,
+ curr_node);
+ if (!ret)
+ ret = btrfs_delete_delayed_items(trans, path, root,
+ curr_node);
+ if (!ret)
+ ret = btrfs_update_delayed_inode(trans, root, path,
+ curr_node);
+ if (ret) {
+ btrfs_release_delayed_node(curr_node);
+ break;
+ }
+
+ prev_node = curr_node;
+ curr_node = btrfs_next_delayed_node(curr_node);
+ btrfs_release_delayed_node(prev_node);
+ }
+
+ btrfs_free_path(path);
+ return ret;
+}
+
+static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
+ struct btrfs_delayed_node *node)
+{
+ struct btrfs_path *path;
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+ path->leave_spinning = 1;
+
+ ret = btrfs_insert_delayed_items(trans, path, node->root, node);
+ if (!ret)
+ ret = btrfs_delete_delayed_items(trans, path, node->root, node);
+ if (!ret)
+ ret = btrfs_update_delayed_inode(trans, node->root, path, node);
+ btrfs_free_path(path);
+
+ return ret;
+}
+
+int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
+ struct inode *inode)
+{
+ struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
+ int ret;
+
+ if (!delayed_node)
+ return 0;
+
+ mutex_lock(&delayed_node->mutex);
+ if (!delayed_node->count) {
+ mutex_unlock(&delayed_node->mutex);
+ btrfs_release_delayed_node(delayed_node);
+ return 0;
+ }
+ mutex_unlock(&delayed_node->mutex);
+
+ ret = __btrfs_commit_inode_delayed_items(trans, delayed_node);
+ btrfs_release_delayed_node(delayed_node);
+ return ret;
+}
+
+void btrfs_remove_delayed_node(struct inode *inode)
+{
+ struct btrfs_delayed_node *delayed_node;
+
+ delayed_node = ACCESS_ONCE(BTRFS_I(inode)->delayed_node);
+ if (!delayed_node)
+ return;
+
+ BTRFS_I(inode)->delayed_node = NULL;
+ btrfs_release_delayed_node(delayed_node);
+}
+
+struct btrfs_async_delayed_node {
+ struct btrfs_root *root;
+ struct btrfs_delayed_node *delayed_node;
+ struct btrfs_work work;
+};
+
+static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
+{
+ struct btrfs_async_delayed_node *async_node;
+ struct btrfs_trans_handle *trans;
+ struct btrfs_path *path;
+ struct btrfs_delayed_node *delayed_node = NULL;
+ struct btrfs_root *root;
+ unsigned long nr = 0;
+ int need_requeue = 0;
+ int ret;
+
+ async_node = container_of(work, struct btrfs_async_delayed_node, work);
+
+ path = btrfs_alloc_path();
+ if (!path)
+ goto out;
+ path->leave_spinning = 1;
+
+ delayed_node = async_node->delayed_node;
+ root = delayed_node->root;
+
- btrfs_set_stack_inode_block_group(inode_item,
- BTRFS_I(inode)->block_group);
++ trans = btrfs_join_transaction(root);
+ if (IS_ERR(trans))
+ goto free_path;
+
+ ret = btrfs_insert_delayed_items(trans, path, root, delayed_node);
+ if (!ret)
+ ret = btrfs_delete_delayed_items(trans, path, root,
+ delayed_node);
+
+ if (!ret)
+ btrfs_update_delayed_inode(trans, root, path, delayed_node);
+
+ /*
+ * Maybe new delayed items have been inserted, so we need requeue
+ * the work. Besides that, we must dequeue the empty delayed nodes
+ * to avoid the race between delayed items balance and the worker.
+ * The race like this:
+ * Task1 Worker thread
+ * count == 0, needn't requeue
+ * also needn't insert the
+ * delayed node into prepare
+ * list again.
+ * add lots of delayed items
+ * queue the delayed node
+ * already in the list,
+ * and not in the prepare
+ * list, it means the delayed
+ * node is being dealt with
+ * by the worker.
+ * do delayed items balance
+ * the delayed node is being
+ * dealt with by the worker
+ * now, just wait.
+ * the worker goto idle.
+ * Task1 will sleep until the transaction is commited.
+ */
+ mutex_lock(&delayed_node->mutex);
+ if (delayed_node->count)
+ need_requeue = 1;
+ else
+ btrfs_dequeue_delayed_node(root->fs_info->delayed_root,
+ delayed_node);
+ mutex_unlock(&delayed_node->mutex);
+
+ nr = trans->blocks_used;
+
+ btrfs_end_transaction_dmeta(trans, root);
+ __btrfs_btree_balance_dirty(root, nr);
+free_path:
+ btrfs_free_path(path);
+out:
+ if (need_requeue)
+ btrfs_requeue_work(&async_node->work);
+ else {
+ btrfs_release_prepared_delayed_node(delayed_node);
+ kfree(async_node);
+ }
+}
+
+static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
+ struct btrfs_root *root, int all)
+{
+ struct btrfs_async_delayed_node *async_node;
+ struct btrfs_delayed_node *curr;
+ int count = 0;
+
+again:
+ curr = btrfs_first_prepared_delayed_node(delayed_root);
+ if (!curr)
+ return 0;
+
+ async_node = kmalloc(sizeof(*async_node), GFP_NOFS);
+ if (!async_node) {
+ btrfs_release_prepared_delayed_node(curr);
+ return -ENOMEM;
+ }
+
+ async_node->root = root;
+ async_node->delayed_node = curr;
+
+ async_node->work.func = btrfs_async_run_delayed_node_done;
+ async_node->work.flags = 0;
+
+ btrfs_queue_worker(&root->fs_info->delayed_workers, &async_node->work);
+ count++;
+
+ if (all || count < 4)
+ goto again;
+
+ return 0;
+}
+
+void btrfs_balance_delayed_items(struct btrfs_root *root)
+{
+ struct btrfs_delayed_root *delayed_root;
+
+ delayed_root = btrfs_get_delayed_root(root);
+
+ if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
+ return;
+
+ if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) {
+ int ret;
+ ret = btrfs_wq_run_delayed_node(delayed_root, root, 1);
+ if (ret)
+ return;
+
+ wait_event_interruptible_timeout(
+ delayed_root->wait,
+ (atomic_read(&delayed_root->items) <
+ BTRFS_DELAYED_BACKGROUND),
+ HZ);
+ return;
+ }
+
+ btrfs_wq_run_delayed_node(delayed_root, root, 0);
+}
+
+int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, const char *name,
+ int name_len, struct inode *dir,
+ struct btrfs_disk_key *disk_key, u8 type,
+ u64 index)
+{
+ struct btrfs_delayed_node *delayed_node;
+ struct btrfs_delayed_item *delayed_item;
+ struct btrfs_dir_item *dir_item;
+ int ret;
+
+ delayed_node = btrfs_get_or_create_delayed_node(dir);
+ if (IS_ERR(delayed_node))
+ return PTR_ERR(delayed_node);
+
+ delayed_item = btrfs_alloc_delayed_item(sizeof(*dir_item) + name_len);
+ if (!delayed_item) {
+ ret = -ENOMEM;
+ goto release_node;
+ }
+
+ ret = btrfs_delayed_item_reserve_metadata(trans, root, delayed_item);
+ /*
+ * we have reserved enough space when we start a new transaction,
+ * so reserving metadata failure is impossible
+ */
+ BUG_ON(ret);
+
+ delayed_item->key.objectid = btrfs_ino(dir);
+ btrfs_set_key_type(&delayed_item->key, BTRFS_DIR_INDEX_KEY);
+ delayed_item->key.offset = index;
+
+ dir_item = (struct btrfs_dir_item *)delayed_item->data;
+ dir_item->location = *disk_key;
+ dir_item->transid = cpu_to_le64(trans->transid);
+ dir_item->data_len = 0;
+ dir_item->name_len = cpu_to_le16(name_len);
+ dir_item->type = type;
+ memcpy((char *)(dir_item + 1), name, name_len);
+
+ mutex_lock(&delayed_node->mutex);
+ ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item);
+ if (unlikely(ret)) {
+ printk(KERN_ERR "err add delayed dir index item(name: %s) into "
+ "the insertion tree of the delayed node"
+ "(root id: %llu, inode id: %llu, errno: %d)\n",
+ name,
+ (unsigned long long)delayed_node->root->objectid,
+ (unsigned long long)delayed_node->inode_id,
+ ret);
+ BUG();
+ }
+ mutex_unlock(&delayed_node->mutex);
+
+release_node:
+ btrfs_release_delayed_node(delayed_node);
+ return ret;
+}
+
+static int btrfs_delete_delayed_insertion_item(struct btrfs_root *root,
+ struct btrfs_delayed_node *node,
+ struct btrfs_key *key)
+{
+ struct btrfs_delayed_item *item;
+
+ mutex_lock(&node->mutex);
+ item = __btrfs_lookup_delayed_insertion_item(node, key);
+ if (!item) {
+ mutex_unlock(&node->mutex);
+ return 1;
+ }
+
+ btrfs_delayed_item_release_metadata(root, item);
+ btrfs_release_delayed_item(item);
+ mutex_unlock(&node->mutex);
+ return 0;
+}
+
+int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct inode *dir,
+ u64 index)
+{
+ struct btrfs_delayed_node *node;
+ struct btrfs_delayed_item *item;
+ struct btrfs_key item_key;
+ int ret;
+
+ node = btrfs_get_or_create_delayed_node(dir);
+ if (IS_ERR(node))
+ return PTR_ERR(node);
+
+ item_key.objectid = btrfs_ino(dir);
+ btrfs_set_key_type(&item_key, BTRFS_DIR_INDEX_KEY);
+ item_key.offset = index;
+
+ ret = btrfs_delete_delayed_insertion_item(root, node, &item_key);
+ if (!ret)
+ goto end;
+
+ item = btrfs_alloc_delayed_item(0);
+ if (!item) {
+ ret = -ENOMEM;
+ goto end;
+ }
+
+ item->key = item_key;
+
+ ret = btrfs_delayed_item_reserve_metadata(trans, root, item);
+ /*
+ * we have reserved enough space when we start a new transaction,
+ * so reserving metadata failure is impossible.
+ */
+ BUG_ON(ret);
+
+ mutex_lock(&node->mutex);
+ ret = __btrfs_add_delayed_deletion_item(node, item);
+ if (unlikely(ret)) {
+ printk(KERN_ERR "err add delayed dir index item(index: %llu) "
+ "into the deletion tree of the delayed node"
+ "(root id: %llu, inode id: %llu, errno: %d)\n",
+ (unsigned long long)index,
+ (unsigned long long)node->root->objectid,
+ (unsigned long long)node->inode_id,
+ ret);
+ BUG();
+ }
+ mutex_unlock(&node->mutex);
+end:
+ btrfs_release_delayed_node(node);
+ return ret;
+}
+
+int btrfs_inode_delayed_dir_index_count(struct inode *inode)
+{
+ struct btrfs_delayed_node *delayed_node = BTRFS_I(inode)->delayed_node;
+ int ret = 0;
+
+ if (!delayed_node)
+ return -ENOENT;
+
+ /*
+ * Since we have held i_mutex of this directory, it is impossible that
+ * a new directory index is added into the delayed node and index_cnt
+ * is updated now. So we needn't lock the delayed node.
+ */
+ if (!delayed_node->index_cnt)
+ return -EINVAL;
+
+ BTRFS_I(inode)->index_cnt = delayed_node->index_cnt;
+ return ret;
+}
+
+void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
+ struct list_head *del_list)
+{
+ struct btrfs_delayed_node *delayed_node;
+ struct btrfs_delayed_item *item;
+
+ delayed_node = btrfs_get_delayed_node(inode);
+ if (!delayed_node)
+ return;
+
+ mutex_lock(&delayed_node->mutex);
+ item = __btrfs_first_delayed_insertion_item(delayed_node);
+ while (item) {
+ atomic_inc(&item->refs);
+ list_add_tail(&item->readdir_list, ins_list);
+ item = __btrfs_next_delayed_item(item);
+ }
+
+ item = __btrfs_first_delayed_deletion_item(delayed_node);
+ while (item) {
+ atomic_inc(&item->refs);
+ list_add_tail(&item->readdir_list, del_list);
+ item = __btrfs_next_delayed_item(item);
+ }
+ mutex_unlock(&delayed_node->mutex);
+ /*
+ * This delayed node is still cached in the btrfs inode, so refs
+ * must be > 1 now, and we needn't check it is going to be freed
+ * or not.
+ *
+ * Besides that, this function is used to read dir, we do not
+ * insert/delete delayed items in this period. So we also needn't
+ * requeue or dequeue this delayed node.
+ */
+ atomic_dec(&delayed_node->refs);
+}
+
+void btrfs_put_delayed_items(struct list_head *ins_list,
+ struct list_head *del_list)
+{
+ struct btrfs_delayed_item *curr, *next;
+
+ list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
+ list_del(&curr->readdir_list);
+ if (atomic_dec_and_test(&curr->refs))
+ kfree(curr);
+ }
+
+ list_for_each_entry_safe(curr, next, del_list, readdir_list) {
+ list_del(&curr->readdir_list);
+ if (atomic_dec_and_test(&curr->refs))
+ kfree(curr);
+ }
+}
+
+int btrfs_should_delete_dir_index(struct list_head *del_list,
+ u64 index)
+{
+ struct btrfs_delayed_item *curr, *next;
+ int ret;
+
+ if (list_empty(del_list))
+ return 0;
+
+ list_for_each_entry_safe(curr, next, del_list, readdir_list) {
+ if (curr->key.offset > index)
+ break;
+
+ list_del(&curr->readdir_list);
+ ret = (curr->key.offset == index);
+
+ if (atomic_dec_and_test(&curr->refs))
+ kfree(curr);
+
+ if (ret)
+ return 1;
+ else
+ continue;
+ }
+ return 0;
+}
+
+/*
+ * btrfs_readdir_delayed_dir_index - read dir info stored in the delayed tree
+ *
+ */
+int btrfs_readdir_delayed_dir_index(struct file *filp, void *dirent,
+ filldir_t filldir,
+ struct list_head *ins_list)
+{
+ struct btrfs_dir_item *di;
+ struct btrfs_delayed_item *curr, *next;
+ struct btrfs_key location;
+ char *name;
+ int name_len;
+ int over = 0;
+ unsigned char d_type;
+
+ if (list_empty(ins_list))
+ return 0;
+
+ /*
+ * Changing the data of the delayed item is impossible. So
+ * we needn't lock them. And we have held i_mutex of the
+ * directory, nobody can delete any directory indexes now.
+ */
+ list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
+ list_del(&curr->readdir_list);
+
+ if (curr->key.offset < filp->f_pos) {
+ if (atomic_dec_and_test(&curr->refs))
+ kfree(curr);
+ continue;
+ }
+
+ filp->f_pos = curr->key.offset;
+
+ di = (struct btrfs_dir_item *)curr->data;
+ name = (char *)(di + 1);
+ name_len = le16_to_cpu(di->name_len);
+
+ d_type = btrfs_filetype_table[di->type];
+ btrfs_disk_key_to_cpu(&location, &di->location);
+
+ over = filldir(dirent, name, name_len, curr->key.offset,
+ location.objectid, d_type);
+
+ if (atomic_dec_and_test(&curr->refs))
+ kfree(curr);
+
+ if (over)
+ return 1;
+ }
+ return 0;
+}
+
+BTRFS_SETGET_STACK_FUNCS(stack_inode_generation, struct btrfs_inode_item,
+ generation, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_sequence, struct btrfs_inode_item,
+ sequence, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_transid, struct btrfs_inode_item,
+ transid, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_size, struct btrfs_inode_item, size, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_nbytes, struct btrfs_inode_item,
+ nbytes, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_block_group, struct btrfs_inode_item,
+ block_group, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_nlink, struct btrfs_inode_item, nlink, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_uid, struct btrfs_inode_item, uid, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_gid, struct btrfs_inode_item, gid, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, struct btrfs_inode_item, mode, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, struct btrfs_inode_item, rdev, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, struct btrfs_inode_item, flags, 64);
+
+BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_timespec, nsec, 32);
+
+static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
+ struct btrfs_inode_item *inode_item,
+ struct inode *inode)
+{
+ btrfs_set_stack_inode_uid(inode_item, inode->i_uid);
+ btrfs_set_stack_inode_gid(inode_item, inode->i_gid);
+ btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size);
+ btrfs_set_stack_inode_mode(inode_item, inode->i_mode);
+ btrfs_set_stack_inode_nlink(inode_item, inode->i_nlink);
+ btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode));
+ btrfs_set_stack_inode_generation(inode_item,
+ BTRFS_I(inode)->generation);
+ btrfs_set_stack_inode_sequence(inode_item, BTRFS_I(inode)->sequence);
+ btrfs_set_stack_inode_transid(inode_item, trans->transid);
+ btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev);
+ btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
++ btrfs_set_stack_inode_block_group(inode_item, 0);
+
+ btrfs_set_stack_timespec_sec(btrfs_inode_atime(inode_item),
+ inode->i_atime.tv_sec);
+ btrfs_set_stack_timespec_nsec(btrfs_inode_atime(inode_item),
+ inode->i_atime.tv_nsec);
+
+ btrfs_set_stack_timespec_sec(btrfs_inode_mtime(inode_item),
+ inode->i_mtime.tv_sec);
+ btrfs_set_stack_timespec_nsec(btrfs_inode_mtime(inode_item),
+ inode->i_mtime.tv_nsec);
+
+ btrfs_set_stack_timespec_sec(btrfs_inode_ctime(inode_item),
+ inode->i_ctime.tv_sec);
+ btrfs_set_stack_timespec_nsec(btrfs_inode_ctime(inode_item),
+ inode->i_ctime.tv_nsec);
+}
+
+int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct inode *inode)
+{
+ struct btrfs_delayed_node *delayed_node;
+ int ret;
+
+ delayed_node = btrfs_get_or_create_delayed_node(inode);
+ if (IS_ERR(delayed_node))
+ return PTR_ERR(delayed_node);
+
+ mutex_lock(&delayed_node->mutex);
+ if (delayed_node->inode_dirty) {
+ fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
+ goto release_node;
+ }
+
+ ret = btrfs_delayed_inode_reserve_metadata(trans, root, delayed_node);
+ /*
+ * we must reserve enough space when we start a new transaction,
+ * so reserving metadata failure is impossible
+ */
+ BUG_ON(ret);
+
+ fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
+ delayed_node->inode_dirty = 1;
+ delayed_node->count++;
+ atomic_inc(&root->fs_info->delayed_root->items);
+release_node:
+ mutex_unlock(&delayed_node->mutex);
+ btrfs_release_delayed_node(delayed_node);
+ return ret;
+}
+
+static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
+{
+ struct btrfs_root *root = delayed_node->root;
+ struct btrfs_delayed_item *curr_item, *prev_item;
+
+ mutex_lock(&delayed_node->mutex);
+ curr_item = __btrfs_first_delayed_insertion_item(delayed_node);
+ while (curr_item) {
+ btrfs_delayed_item_release_metadata(root, curr_item);
+ prev_item = curr_item;
+ curr_item = __btrfs_next_delayed_item(prev_item);
+ btrfs_release_delayed_item(prev_item);
+ }
+
+ curr_item = __btrfs_first_delayed_deletion_item(delayed_node);
+ while (curr_item) {
+ btrfs_delayed_item_release_metadata(root, curr_item);
+ prev_item = curr_item;
+ curr_item = __btrfs_next_delayed_item(prev_item);
+ btrfs_release_delayed_item(prev_item);
+ }
+
+ if (delayed_node->inode_dirty) {
+ btrfs_delayed_inode_release_metadata(root, delayed_node);
+ btrfs_release_delayed_inode(delayed_node);
+ }
+ mutex_unlock(&delayed_node->mutex);
+}
+
+void btrfs_kill_delayed_inode_items(struct inode *inode)
+{
+ struct btrfs_delayed_node *delayed_node;
+
+ delayed_node = btrfs_get_delayed_node(inode);
+ if (!delayed_node)
+ return;
+
+ __btrfs_kill_delayed_node(delayed_node);
+ btrfs_release_delayed_node(delayed_node);
+}
+
+void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
+{
+ u64 inode_id = 0;
+ struct btrfs_delayed_node *delayed_nodes[8];
+ int i, n;
+
+ while (1) {
+ spin_lock(&root->inode_lock);
+ n = radix_tree_gang_lookup(&root->delayed_nodes_tree,
+ (void **)delayed_nodes, inode_id,
+ ARRAY_SIZE(delayed_nodes));
+ if (!n) {
+ spin_unlock(&root->inode_lock);
+ break;
+ }
+
+ inode_id = delayed_nodes[n - 1]->inode_id + 1;
+
+ for (i = 0; i < n; i++)
+ atomic_inc(&delayed_nodes[i]->refs);
+ spin_unlock(&root->inode_lock);
+
+ for (i = 0; i < n; i++) {
+ __btrfs_kill_delayed_node(delayed_nodes[i]);
+ btrfs_release_delayed_node(delayed_nodes[i]);
+ }
+ }
+}
fs_info->sb = sb;
fs_info->max_inline = 8192 * 1024;
fs_info->metadata_ratio = 0;
+ fs_info->defrag_inodes = RB_ROOT;
+ fs_info->trans_no_join = 0;
fs_info->thread_pool_size = min_t(unsigned long,
num_online_cpus() + 2, 8);
if (ret)
break;
- caching_ctl->progress = last;
- btrfs_release_path(path);
- up_read(&fs_info->extent_commit_sem);
- mutex_unlock(&caching_ctl->mutex);
- if (btrfs_transaction_in_commit(fs_info))
- schedule_timeout(1);
- else
+ if (need_resched() ||
+ btrfs_next_leaf(extent_root, path)) {
+ caching_ctl->progress = last;
- btrfs_release_path(extent_root, path);
++ btrfs_release_path(path);
+ up_read(&fs_info->extent_commit_sem);
+ mutex_unlock(&caching_ctl->mutex);
cond_resched();
- goto again;
+ goto again;
+ }
+ leaf = path->nodes[0];
+ nritems = btrfs_header_nritems(leaf);
+ continue;
}
if (key.objectid < block_group->key.objectid) {
WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
}
-static u64 calc_trans_metadata_size(struct btrfs_root *root, int num_items)
-{
- return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) *
- 3 * num_items;
-}
-
+ int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_block_rsv *rsv)
+ {
+ struct btrfs_block_rsv *trans_rsv = &root->fs_info->trans_block_rsv;
+ u64 num_bytes;
+ int ret;
+
+ /*
+ * Truncate should be freeing data, but give us 2 items just in case it
+ * needs to use some space. We may want to be smarter about this in the
+ * future.
+ */
- num_bytes = calc_trans_metadata_size(root, 2);
++ num_bytes = btrfs_calc_trans_metadata_size(root, 2);
+
+ /* We already have enough bytes, just return */
+ if (rsv->reserved >= num_bytes)
+ return 0;
+
+ num_bytes -= rsv->reserved;
+
+ /*
+ * You should have reserved enough space before hand to do this, so this
+ * should not fail.
+ */
+ ret = block_rsv_migrate_bytes(trans_rsv, rsv, num_bytes);
+ BUG_ON(ret);
+
+ return 0;
+ }
+
int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
int num_items)
struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv;
/*
- * one for deleting orphan item, one for updating inode and
- * two for calling btrfs_truncate_inode_items.
- *
- * btrfs_truncate_inode_items is a delete operation, it frees
- * more space than it uses in most cases. So two units of
- * metadata space should be enough for calling it many times.
- * If all of the metadata space is used, we can commit
- * transaction and use space it freed.
+ * We need to hold space in order to delete our orphan item once we've
+ * added it, so this takes the reservation so we can release it later
+ * when we are truly done with the orphan item.
*/
- u64 num_bytes = btrfs_calc_trans_metadata_size(root, 4);
- u64 num_bytes = calc_trans_metadata_size(root, 1);
++ u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
}
void btrfs_orphan_release_metadata(struct inode *inode)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
- u64 num_bytes = btrfs_calc_trans_metadata_size(root, 4);
- u64 num_bytes = calc_trans_metadata_size(root, 1);
++ u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes);
}
if (unlikely(block_group->ro))
goto loop;
- spin_lock(&block_group->tree_lock);
++ spin_lock(&block_group->free_space_ctl->tree_lock);
+ if (cached &&
- block_group->free_space < num_bytes + empty_size) {
- spin_unlock(&block_group->tree_lock);
++ block_group->free_space_ctl->free_space <
++ num_bytes + empty_size) {
++ spin_unlock(&block_group->free_space_ctl->tree_lock);
+ goto loop;
+ }
- spin_unlock(&block_group->tree_lock);
++ spin_unlock(&block_group->free_space_ctl->tree_lock);
+
/*
* Ok we want to try and use the cluster allocator, so lets look
* there, unless we are on LOOP_NO_EMPTY_SIZE, since we will
return ret;
}
-#if 0
-static unsigned long calc_ra(unsigned long start, unsigned long last,
- unsigned long nr)
-{
- return min(last, start + nr - 1);
-}
-
-static noinline int relocate_inode_pages(struct inode *inode, u64 start,
- u64 len)
+static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
{
- u64 page_start;
- u64 page_end;
- unsigned long first_index;
- unsigned long last_index;
- unsigned long i;
- struct page *page;
- struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
- struct file_ra_state *ra;
- struct btrfs_ordered_extent *ordered;
- unsigned int total_read = 0;
- unsigned int total_dirty = 0;
- int ret = 0;
-
- ra = kzalloc(sizeof(*ra), GFP_NOFS);
- if (!ra)
- return -ENOMEM;
+ u64 num_devices;
+ u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
+ BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
- mutex_lock(&inode->i_mutex);
- first_index = start >> PAGE_CACHE_SHIFT;
- last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
+ /*
+ * we add in the count of missing devices because we want
+ * to make sure that any RAID levels on a degraded FS
+ * continue to be honored.
+ */
+ num_devices = root->fs_info->fs_devices->rw_devices +
+ root->fs_info->fs_devices->missing_devices;
- /* make sure the dirty trick played by the caller work */
- ret = invalidate_inode_pages2_range(inode->i_mapping,
- first_index, last_index);
- if (ret)
- goto out_unlock;
+ if (num_devices == 1) {
+ stripped |= BTRFS_BLOCK_GROUP_DUP;
+ stripped = flags & ~stripped;
- file_ra_state_init(ra, inode->i_mapping);
+ /* turn raid0 into single device chunks */
+ if (flags & BTRFS_BLOCK_GROUP_RAID0)
+ return stripped;
- for (i = first_index ; i <= last_index; i++) {
- if (total_read % ra->ra_pages == 0) {
- btrfs_force_ra(inode->i_mapping, ra, NULL, i,
- calc_ra(i, last_index, ra->ra_pages));
- }
- total_read++;
-again:
- if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode))
- BUG_ON(1);
- page = grab_cache_page(inode->i_mapping, i);
- if (!page) {
- ret = -ENOMEM;
- goto out_unlock;
- }
- if (!PageUptodate(page)) {
- btrfs_readpage(NULL, page);
- lock_page(page);
- if (!PageUptodate(page)) {
- unlock_page(page);
- page_cache_release(page);
- ret = -EIO;
- goto out_unlock;
- }
- }
- wait_on_page_writeback(page);
-
- page_start = (u64)page->index << PAGE_CACHE_SHIFT;
- page_end = page_start + PAGE_CACHE_SIZE - 1;
- lock_extent(io_tree, page_start, page_end, GFP_NOFS);
-
- ordered = btrfs_lookup_ordered_extent(inode, page_start);
- if (ordered) {
- unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
- unlock_page(page);
- page_cache_release(page);
- btrfs_start_ordered_extent(inode, ordered, 1);
- btrfs_put_ordered_extent(ordered);
- goto again;
- }
- set_page_extent_mapped(page);
+ /* turn mirroring into duplication */
+ if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_RAID10))
+ return stripped | BTRFS_BLOCK_GROUP_DUP;
+ return flags;
+ } else {
+ /* they already had raid on here, just return */
+ if (flags & stripped)
+ return flags;
- if (i == first_index)
- set_extent_bits(io_tree, page_start, page_end,
- EXTENT_BOUNDARY, GFP_NOFS);
- btrfs_set_extent_delalloc(inode, page_start, page_end);
+ stripped |= BTRFS_BLOCK_GROUP_DUP;
+ stripped = flags & ~stripped;
- set_page_dirty(page);
- total_dirty++;
+ /* switch duplicated blocks with raid1 */
+ if (flags & BTRFS_BLOCK_GROUP_DUP)
+ return stripped | BTRFS_BLOCK_GROUP_RAID1;
- unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
- unlock_page(page);
- page_cache_release(page);
+ /* turn single device chunks into raid0 */
+ return stripped | BTRFS_BLOCK_GROUP_RAID0;
}
-
-out_unlock:
- kfree(ra);
- mutex_unlock(&inode->i_mutex);
- balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty);
- return ret;
+ return flags;
}
-static noinline int relocate_data_extent(struct inode *reloc_inode,
- struct btrfs_key *extent_key,
- u64 offset)
+static int set_block_group_ro(struct btrfs_block_group_cache *cache)
{
- struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
- struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree;
- struct extent_map *em;
- u64 start = extent_key->objectid - offset;
- u64 end = start + extent_key->offset - 1;
+ struct btrfs_space_info *sinfo = cache->space_info;
+ u64 num_bytes;
+ int ret = -ENOSPC;
- em = alloc_extent_map(GFP_NOFS);
- BUG_ON(!em);
+ if (cache->ro)
+ return 0;
- em->start = start;
- em->len = extent_key->offset;
- em->block_len = extent_key->offset;
- em->block_start = extent_key->objectid;
- em->bdev = root->fs_info->fs_devices->latest_bdev;
- set_bit(EXTENT_FLAG_PINNED, &em->flags);
+ spin_lock(&sinfo->lock);
+ spin_lock(&cache->lock);
+ num_bytes = cache->key.offset - cache->reserved - cache->pinned -
+ cache->bytes_super - btrfs_block_group_used(&cache->item);
- /* setup extent map to cheat btrfs_readpage */
- lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
- while (1) {
- int ret;
- write_lock(&em_tree->lock);
- ret = add_extent_mapping(em_tree, em);
- write_unlock(&em_tree->lock);
- if (ret != -EEXIST) {
- free_extent_map(em);
- break;
- }
- btrfs_drop_extent_cache(reloc_inode, start, end, 0);
+ if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned +
+ sinfo->bytes_may_use + sinfo->bytes_readonly +
+ cache->reserved_pinned + num_bytes <= sinfo->total_bytes) {
+ sinfo->bytes_readonly += num_bytes;
+ sinfo->bytes_reserved += cache->reserved_pinned;
+ cache->reserved_pinned = 0;
+ cache->ro = 1;
+ ret = 0;
}
- unlock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
- return relocate_inode_pages(reloc_inode, start, extent_key->offset);
+ spin_unlock(&cache->lock);
+ spin_unlock(&sinfo->lock);
+ return ret;
}
-struct btrfs_ref_path {
- u64 extent_start;
- u64 nodes[BTRFS_MAX_LEVEL];
- u64 root_objectid;
- u64 root_generation;
- u64 owner_objectid;
- u32 num_refs;
- int lowest_level;
- int current_level;
- int shared_level;
-
- struct btrfs_key node_keys[BTRFS_MAX_LEVEL];
- u64 new_nodes[BTRFS_MAX_LEVEL];
-};
-
-struct disk_extent {
- u64 ram_bytes;
- u64 disk_bytenr;
- u64 disk_num_bytes;
- u64 offset;
- u64 num_bytes;
- u8 compression;
- u8 encryption;
- u16 other_encoding;
-};
+int btrfs_set_block_group_ro(struct btrfs_root *root,
+ struct btrfs_block_group_cache *cache)
-static int is_cowonly_root(u64 root_objectid)
{
- if (root_objectid == BTRFS_ROOT_TREE_OBJECTID ||
- root_objectid == BTRFS_EXTENT_TREE_OBJECTID ||
- root_objectid == BTRFS_CHUNK_TREE_OBJECTID ||
- root_objectid == BTRFS_DEV_TREE_OBJECTID ||
- root_objectid == BTRFS_TREE_LOG_OBJECTID ||
- root_objectid == BTRFS_CSUM_TREE_OBJECTID)
- return 1;
- return 0;
-}
+ struct btrfs_trans_handle *trans;
+ u64 alloc_flags;
+ int ret;
-static noinline int __next_ref_path(struct btrfs_trans_handle *trans,
- struct btrfs_root *extent_root,
- struct btrfs_ref_path *ref_path,
- int first_time)
-{
- struct extent_buffer *leaf;
- struct btrfs_path *path;
- struct btrfs_extent_ref *ref;
- struct btrfs_key key;
- struct btrfs_key found_key;
- u64 bytenr;
- u32 nritems;
- int level;
- int ret = 1;
+ BUG_ON(cache->ro);
- trans = btrfs_join_transaction(root, 1);
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
++ trans = btrfs_join_transaction(root);
+ BUG_ON(IS_ERR(trans));
- if (first_time) {
- ref_path->lowest_level = -1;
- ref_path->current_level = -1;
- ref_path->shared_level = -1;
- goto walk_up;
- }
-walk_down:
- level = ref_path->current_level - 1;
- while (level >= -1) {
- u64 parent;
- if (level < ref_path->lowest_level)
- break;
+ alloc_flags = update_block_group_flags(root, cache->flags);
+ if (alloc_flags != cache->flags)
+ do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
+ CHUNK_ALLOC_FORCE);
- if (level >= 0)
- bytenr = ref_path->nodes[level];
- else
- bytenr = ref_path->extent_start;
- BUG_ON(bytenr == 0);
+ ret = set_block_group_ro(cache);
+ if (!ret)
+ goto out;
+ alloc_flags = get_alloc_profile(root, cache->space_info->flags);
+ ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
+ CHUNK_ALLOC_FORCE);
+ if (ret < 0)
+ goto out;
+ ret = set_block_group_ro(cache);
+out:
+ btrfs_end_transaction(trans, root);
+ return ret;
+}
- parent = ref_path->nodes[level + 1];
- ref_path->nodes[level + 1] = 0;
- ref_path->current_level = level;
- BUG_ON(parent == 0);
+int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 type)
+{
+ u64 alloc_flags = get_alloc_profile(root, type);
+ return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
+ CHUNK_ALLOC_FORCE);
+}
- key.objectid = bytenr;
- key.offset = parent + 1;
- key.type = BTRFS_EXTENT_REF_KEY;
+/*
+ * helper to account the unused space of all the readonly block group in the
+ * list. takes mirrors into account.
+ */
+static u64 __btrfs_get_ro_block_group_free_space(struct list_head *groups_list)
+{
+ struct btrfs_block_group_cache *block_group;
+ u64 free_bytes = 0;
+ int factor;
- ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
- if (ret < 0)
- goto out;
- BUG_ON(ret == 0);
+ list_for_each_entry(block_group, groups_list, list) {
+ spin_lock(&block_group->lock);
- leaf = path->nodes[0];
- nritems = btrfs_header_nritems(leaf);
- if (path->slots[0] >= nritems) {
- ret = btrfs_next_leaf(extent_root, path);
- if (ret < 0)
- goto out;
- if (ret > 0)
- goto next;
- leaf = path->nodes[0];
+ if (!block_group->ro) {
+ spin_unlock(&block_group->lock);
+ continue;
}
- btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
- if (found_key.objectid == bytenr &&
- found_key.type == BTRFS_EXTENT_REF_KEY) {
- if (level < ref_path->shared_level)
- ref_path->shared_level = level;
- goto found;
- }
-next:
- level--;
- btrfs_release_path(extent_root, path);
- cond_resched();
- }
- /* reached lowest level */
- ret = 1;
- goto out;
-walk_up:
- level = ref_path->current_level;
- while (level < BTRFS_MAX_LEVEL - 1) {
- u64 ref_objectid;
-
- if (level >= 0)
- bytenr = ref_path->nodes[level];
+ if (block_group->flags & (BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_RAID10 |
+ BTRFS_BLOCK_GROUP_DUP))
+ factor = 2;
else
- bytenr = ref_path->extent_start;
+ factor = 1;
- BUG_ON(bytenr == 0);
+ free_bytes += (block_group->key.offset -
+ btrfs_block_group_used(&block_group->item)) *
+ factor;
- key.objectid = bytenr;
- key.offset = 0;
- key.type = BTRFS_EXTENT_REF_KEY;
+ spin_unlock(&block_group->lock);
+ }
- ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
- if (ret < 0)
- goto out;
-
- leaf = path->nodes[0];
- nritems = btrfs_header_nritems(leaf);
- if (path->slots[0] >= nritems) {
- ret = btrfs_next_leaf(extent_root, path);
- if (ret < 0)
- goto out;
- if (ret > 0) {
- /* the extent was freed by someone */
- if (ref_path->lowest_level == level)
- goto out;
- btrfs_release_path(extent_root, path);
- goto walk_down;
- }
- leaf = path->nodes[0];
- }
-
- btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
- if (found_key.objectid != bytenr ||
- found_key.type != BTRFS_EXTENT_REF_KEY) {
- /* the extent was freed by someone */
- if (ref_path->lowest_level == level) {
- ret = 1;
- goto out;
- }
- btrfs_release_path(extent_root, path);
- goto walk_down;
- }
-found:
- ref = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_extent_ref);
- ref_objectid = btrfs_ref_objectid(leaf, ref);
- if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID) {
- if (first_time) {
- level = (int)ref_objectid;
- BUG_ON(level >= BTRFS_MAX_LEVEL);
- ref_path->lowest_level = level;
- ref_path->current_level = level;
- ref_path->nodes[level] = bytenr;
- } else {
- WARN_ON(ref_objectid != level);
- }
- } else {
- WARN_ON(level != -1);
- }
- first_time = 0;
-
- if (ref_path->lowest_level == level) {
- ref_path->owner_objectid = ref_objectid;
- ref_path->num_refs = btrfs_ref_num_refs(leaf, ref);
- }
-
- /*
- * the block is tree root or the block isn't in reference
- * counted tree.
- */
- if (found_key.objectid == found_key.offset ||
- is_cowonly_root(btrfs_ref_root(leaf, ref))) {
- ref_path->root_objectid = btrfs_ref_root(leaf, ref);
- ref_path->root_generation =
- btrfs_ref_generation(leaf, ref);
- if (level < 0) {
- /* special reference from the tree log */
- ref_path->nodes[0] = found_key.offset;
- ref_path->current_level = 0;
- }
- ret = 0;
- goto out;
- }
-
- level++;
- BUG_ON(ref_path->nodes[level] != 0);
- ref_path->nodes[level] = found_key.offset;
- ref_path->current_level = level;
-
- /*
- * the reference was created in the running transaction,
- * no need to continue walking up.
- */
- if (btrfs_ref_generation(leaf, ref) == trans->transid) {
- ref_path->root_objectid = btrfs_ref_root(leaf, ref);
- ref_path->root_generation =
- btrfs_ref_generation(leaf, ref);
- ret = 0;
- goto out;
- }
-
- btrfs_release_path(extent_root, path);
- cond_resched();
- }
- /* reached max tree level, but no tree root found. */
- BUG();
-out:
- btrfs_free_path(path);
- return ret;
-}
-
-static int btrfs_first_ref_path(struct btrfs_trans_handle *trans,
- struct btrfs_root *extent_root,
- struct btrfs_ref_path *ref_path,
- u64 extent_start)
-{
- memset(ref_path, 0, sizeof(*ref_path));
- ref_path->extent_start = extent_start;
-
- return __next_ref_path(trans, extent_root, ref_path, 1);
-}
-
-static int btrfs_next_ref_path(struct btrfs_trans_handle *trans,
- struct btrfs_root *extent_root,
- struct btrfs_ref_path *ref_path)
-{
- return __next_ref_path(trans, extent_root, ref_path, 0);
-}
-
-static noinline int get_new_locations(struct inode *reloc_inode,
- struct btrfs_key *extent_key,
- u64 offset, int no_fragment,
- struct disk_extent **extents,
- int *nr_extents)
-{
- struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
- struct btrfs_path *path;
- struct btrfs_file_extent_item *fi;
- struct extent_buffer *leaf;
- struct disk_extent *exts = *extents;
- struct btrfs_key found_key;
- u64 cur_pos;
- u64 last_byte;
- u32 nritems;
- int nr = 0;
- int max = *nr_extents;
- int ret;
-
- WARN_ON(!no_fragment && *extents);
- if (!exts) {
- max = 1;
- exts = kmalloc(sizeof(*exts) * max, GFP_NOFS);
- if (!exts)
- return -ENOMEM;
- }
-
- path = btrfs_alloc_path();
- if (!path) {
- if (exts != *extents)
- kfree(exts);
- return -ENOMEM;
- }
-
- cur_pos = extent_key->objectid - offset;
- last_byte = extent_key->objectid + extent_key->offset;
- ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino,
- cur_pos, 0);
- if (ret < 0)
- goto out;
- if (ret > 0) {
- ret = -ENOENT;
- goto out;
- }
-
- while (1) {
- leaf = path->nodes[0];
- nritems = btrfs_header_nritems(leaf);
- if (path->slots[0] >= nritems) {
- ret = btrfs_next_leaf(root, path);
- if (ret < 0)
- goto out;
- if (ret > 0)
- break;
- leaf = path->nodes[0];
- }
-
- btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
- if (found_key.offset != cur_pos ||
- found_key.type != BTRFS_EXTENT_DATA_KEY ||
- found_key.objectid != reloc_inode->i_ino)
- break;
-
- fi = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_file_extent_item);
- if (btrfs_file_extent_type(leaf, fi) !=
- BTRFS_FILE_EXTENT_REG ||
- btrfs_file_extent_disk_bytenr(leaf, fi) == 0)
- break;
-
- if (nr == max) {
- struct disk_extent *old = exts;
- max *= 2;
- exts = kzalloc(sizeof(*exts) * max, GFP_NOFS);
- if (!exts) {
- ret = -ENOMEM;
- goto out;
- }
- memcpy(exts, old, sizeof(*exts) * nr);
- if (old != *extents)
- kfree(old);
- }
-
- exts[nr].disk_bytenr =
- btrfs_file_extent_disk_bytenr(leaf, fi);
- exts[nr].disk_num_bytes =
- btrfs_file_extent_disk_num_bytes(leaf, fi);
- exts[nr].offset = btrfs_file_extent_offset(leaf, fi);
- exts[nr].num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
- exts[nr].ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
- exts[nr].compression = btrfs_file_extent_compression(leaf, fi);
- exts[nr].encryption = btrfs_file_extent_encryption(leaf, fi);
- exts[nr].other_encoding = btrfs_file_extent_other_encoding(leaf,
- fi);
- BUG_ON(exts[nr].offset > 0);
- BUG_ON(exts[nr].compression || exts[nr].encryption);
- BUG_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes);
-
- cur_pos += exts[nr].num_bytes;
- nr++;
-
- if (cur_pos + offset >= last_byte)
- break;
-
- if (no_fragment) {
- ret = 1;
- goto out;
- }
- path->slots[0]++;
- }
-
- BUG_ON(cur_pos + offset > last_byte);
- if (cur_pos + offset < last_byte) {
- ret = -ENOENT;
- goto out;
- }
- ret = 0;
-out:
- btrfs_free_path(path);
- if (ret) {
- if (exts != *extents)
- kfree(exts);
- } else {
- *extents = exts;
- *nr_extents = nr;
- }
- return ret;
-}
-
-static noinline int replace_one_extent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct btrfs_key *extent_key,
- struct btrfs_key *leaf_key,
- struct btrfs_ref_path *ref_path,
- struct disk_extent *new_extents,
- int nr_extents)
-{
- struct extent_buffer *leaf;
- struct btrfs_file_extent_item *fi;
- struct inode *inode = NULL;
- struct btrfs_key key;
- u64 lock_start = 0;
- u64 lock_end = 0;
- u64 num_bytes;
- u64 ext_offset;
- u64 search_end = (u64)-1;
- u32 nritems;
- int nr_scaned = 0;
- int extent_locked = 0;
- int extent_type;
- int ret;
-
- memcpy(&key, leaf_key, sizeof(key));
- if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
- if (key.objectid < ref_path->owner_objectid ||
- (key.objectid == ref_path->owner_objectid &&
- key.type < BTRFS_EXTENT_DATA_KEY)) {
- key.objectid = ref_path->owner_objectid;
- key.type = BTRFS_EXTENT_DATA_KEY;
- key.offset = 0;
- }
- }
-
- while (1) {
- ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
- if (ret < 0)
- goto out;
-
- leaf = path->nodes[0];
- nritems = btrfs_header_nritems(leaf);
-next:
- if (extent_locked && ret > 0) {
- /*
- * the file extent item was modified by someone
- * before the extent got locked.
- */
- unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
- lock_end, GFP_NOFS);
- extent_locked = 0;
- }
-
- if (path->slots[0] >= nritems) {
- if (++nr_scaned > 2)
- break;
-
- BUG_ON(extent_locked);
- ret = btrfs_next_leaf(root, path);
- if (ret < 0)
- goto out;
- if (ret > 0)
- break;
- leaf = path->nodes[0];
- nritems = btrfs_header_nritems(leaf);
- }
-
- btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
-
- if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
- if ((key.objectid > ref_path->owner_objectid) ||
- (key.objectid == ref_path->owner_objectid &&
- key.type > BTRFS_EXTENT_DATA_KEY) ||
- key.offset >= search_end)
- break;
- }
-
- if (inode && key.objectid != inode->i_ino) {
- BUG_ON(extent_locked);
- btrfs_release_path(root, path);
- mutex_unlock(&inode->i_mutex);
- iput(inode);
- inode = NULL;
- continue;
- }
-
- if (key.type != BTRFS_EXTENT_DATA_KEY) {
- path->slots[0]++;
- ret = 1;
- goto next;
- }
- fi = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_file_extent_item);
- extent_type = btrfs_file_extent_type(leaf, fi);
- if ((extent_type != BTRFS_FILE_EXTENT_REG &&
- extent_type != BTRFS_FILE_EXTENT_PREALLOC) ||
- (btrfs_file_extent_disk_bytenr(leaf, fi) !=
- extent_key->objectid)) {
- path->slots[0]++;
- ret = 1;
- goto next;
- }
-
- num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
- ext_offset = btrfs_file_extent_offset(leaf, fi);
-
- if (search_end == (u64)-1) {
- search_end = key.offset - ext_offset +
- btrfs_file_extent_ram_bytes(leaf, fi);
- }
-
- if (!extent_locked) {
- lock_start = key.offset;
- lock_end = lock_start + num_bytes - 1;
- } else {
- if (lock_start > key.offset ||
- lock_end + 1 < key.offset + num_bytes) {
- unlock_extent(&BTRFS_I(inode)->io_tree,
- lock_start, lock_end, GFP_NOFS);
- extent_locked = 0;
- }
- }
-
- if (!inode) {
- btrfs_release_path(root, path);
-
- inode = btrfs_iget_locked(root->fs_info->sb,
- key.objectid, root);
- if (inode->i_state & I_NEW) {
- BTRFS_I(inode)->root = root;
- BTRFS_I(inode)->location.objectid =
- key.objectid;
- BTRFS_I(inode)->location.type =
- BTRFS_INODE_ITEM_KEY;
- BTRFS_I(inode)->location.offset = 0;
- btrfs_read_locked_inode(inode);
- unlock_new_inode(inode);
- }
- /*
- * some code call btrfs_commit_transaction while
- * holding the i_mutex, so we can't use mutex_lock
- * here.
- */
- if (is_bad_inode(inode) ||
- !mutex_trylock(&inode->i_mutex)) {
- iput(inode);
- inode = NULL;
- key.offset = (u64)-1;
- goto skip;
- }
- }
-
- if (!extent_locked) {
- struct btrfs_ordered_extent *ordered;
-
- btrfs_release_path(root, path);
-
- lock_extent(&BTRFS_I(inode)->io_tree, lock_start,
- lock_end, GFP_NOFS);
- ordered = btrfs_lookup_first_ordered_extent(inode,
- lock_end);
- if (ordered &&
- ordered->file_offset <= lock_end &&
- ordered->file_offset + ordered->len > lock_start) {
- unlock_extent(&BTRFS_I(inode)->io_tree,
- lock_start, lock_end, GFP_NOFS);
- btrfs_start_ordered_extent(inode, ordered, 1);
- btrfs_put_ordered_extent(ordered);
- key.offset += num_bytes;
- goto skip;
- }
- if (ordered)
- btrfs_put_ordered_extent(ordered);
-
- extent_locked = 1;
- continue;
- }
-
- if (nr_extents == 1) {
- /* update extent pointer in place */
- btrfs_set_file_extent_disk_bytenr(leaf, fi,
- new_extents[0].disk_bytenr);
- btrfs_set_file_extent_disk_num_bytes(leaf, fi,
- new_extents[0].disk_num_bytes);
- btrfs_mark_buffer_dirty(leaf);
-
- btrfs_drop_extent_cache(inode, key.offset,
- key.offset + num_bytes - 1, 0);
-
- ret = btrfs_inc_extent_ref(trans, root,
- new_extents[0].disk_bytenr,
- new_extents[0].disk_num_bytes,
- leaf->start,
- root->root_key.objectid,
- trans->transid,
- key.objectid);
- BUG_ON(ret);
-
- ret = btrfs_free_extent(trans, root,
- extent_key->objectid,
- extent_key->offset,
- leaf->start,
- btrfs_header_owner(leaf),
- btrfs_header_generation(leaf),
- key.objectid, 0);
- BUG_ON(ret);
-
- btrfs_release_path(root, path);
- key.offset += num_bytes;
- } else {
- BUG_ON(1);
-#if 0
- u64 alloc_hint;
- u64 extent_len;
- int i;
- /*
- * drop old extent pointer at first, then insert the
- * new pointers one bye one
- */
- btrfs_release_path(root, path);
- ret = btrfs_drop_extents(trans, root, inode, key.offset,
- key.offset + num_bytes,
- key.offset, &alloc_hint);
- BUG_ON(ret);
-
- for (i = 0; i < nr_extents; i++) {
- if (ext_offset >= new_extents[i].num_bytes) {
- ext_offset -= new_extents[i].num_bytes;
- continue;
- }
- extent_len = min(new_extents[i].num_bytes -
- ext_offset, num_bytes);
-
- ret = btrfs_insert_empty_item(trans, root,
- path, &key,
- sizeof(*fi));
- BUG_ON(ret);
-
- leaf = path->nodes[0];
- fi = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_file_extent_item);
- btrfs_set_file_extent_generation(leaf, fi,
- trans->transid);
- btrfs_set_file_extent_type(leaf, fi,
- BTRFS_FILE_EXTENT_REG);
- btrfs_set_file_extent_disk_bytenr(leaf, fi,
- new_extents[i].disk_bytenr);
- btrfs_set_file_extent_disk_num_bytes(leaf, fi,
- new_extents[i].disk_num_bytes);
- btrfs_set_file_extent_ram_bytes(leaf, fi,
- new_extents[i].ram_bytes);
-
- btrfs_set_file_extent_compression(leaf, fi,
- new_extents[i].compression);
- btrfs_set_file_extent_encryption(leaf, fi,
- new_extents[i].encryption);
- btrfs_set_file_extent_other_encoding(leaf, fi,
- new_extents[i].other_encoding);
-
- btrfs_set_file_extent_num_bytes(leaf, fi,
- extent_len);
- ext_offset += new_extents[i].offset;
- btrfs_set_file_extent_offset(leaf, fi,
- ext_offset);
- btrfs_mark_buffer_dirty(leaf);
-
- btrfs_drop_extent_cache(inode, key.offset,
- key.offset + extent_len - 1, 0);
-
- ret = btrfs_inc_extent_ref(trans, root,
- new_extents[i].disk_bytenr,
- new_extents[i].disk_num_bytes,
- leaf->start,
- root->root_key.objectid,
- trans->transid, key.objectid);
- BUG_ON(ret);
- btrfs_release_path(root, path);
-
- inode_add_bytes(inode, extent_len);
-
- ext_offset = 0;
- num_bytes -= extent_len;
- key.offset += extent_len;
-
- if (num_bytes == 0)
- break;
- }
- BUG_ON(i >= nr_extents);
-#endif
- }
-
- if (extent_locked) {
- unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
- lock_end, GFP_NOFS);
- extent_locked = 0;
- }
-skip:
- if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS &&
- key.offset >= search_end)
- break;
-
- cond_resched();
- }
- ret = 0;
-out:
- btrfs_release_path(root, path);
- if (inode) {
- mutex_unlock(&inode->i_mutex);
- if (extent_locked) {
- unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
- lock_end, GFP_NOFS);
- }
- iput(inode);
- }
- return ret;
-}
-
-int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct extent_buffer *buf, u64 orig_start)
-{
- int level;
- int ret;
-
- BUG_ON(btrfs_header_generation(buf) != trans->transid);
- BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
-
- level = btrfs_header_level(buf);
- if (level == 0) {
- struct btrfs_leaf_ref *ref;
- struct btrfs_leaf_ref *orig_ref;
-
- orig_ref = btrfs_lookup_leaf_ref(root, orig_start);
- if (!orig_ref)
- return -ENOENT;
-
- ref = btrfs_alloc_leaf_ref(root, orig_ref->nritems);
- if (!ref) {
- btrfs_free_leaf_ref(root, orig_ref);
- return -ENOMEM;
- }
-
- ref->nritems = orig_ref->nritems;
- memcpy(ref->extents, orig_ref->extents,
- sizeof(ref->extents[0]) * ref->nritems);
-
- btrfs_free_leaf_ref(root, orig_ref);
-
- ref->root_gen = trans->transid;
- ref->bytenr = buf->start;
- ref->owner = btrfs_header_owner(buf);
- ref->generation = btrfs_header_generation(buf);
-
- ret = btrfs_add_leaf_ref(root, ref, 0);
- WARN_ON(ret);
- btrfs_free_leaf_ref(root, ref);
- }
- return 0;
-}
-
-static noinline int invalidate_extent_cache(struct btrfs_root *root,
- struct extent_buffer *leaf,
- struct btrfs_block_group_cache *group,
- struct btrfs_root *target_root)
-{
- struct btrfs_key key;
- struct inode *inode = NULL;
- struct btrfs_file_extent_item *fi;
- struct extent_state *cached_state = NULL;
- u64 num_bytes;
- u64 skip_objectid = 0;
- u32 nritems;
- u32 i;
-
- nritems = btrfs_header_nritems(leaf);
- for (i = 0; i < nritems; i++) {
- btrfs_item_key_to_cpu(leaf, &key, i);
- if (key.objectid == skip_objectid ||
- key.type != BTRFS_EXTENT_DATA_KEY)
- continue;
- fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
- if (btrfs_file_extent_type(leaf, fi) ==
- BTRFS_FILE_EXTENT_INLINE)
- continue;
- if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0)
- continue;
- if (!inode || inode->i_ino != key.objectid) {
- iput(inode);
- inode = btrfs_ilookup(target_root->fs_info->sb,
- key.objectid, target_root, 1);
- }
- if (!inode) {
- skip_objectid = key.objectid;
- continue;
- }
- num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
-
- lock_extent_bits(&BTRFS_I(inode)->io_tree, key.offset,
- key.offset + num_bytes - 1, 0, &cached_state,
- GFP_NOFS);
- btrfs_drop_extent_cache(inode, key.offset,
- key.offset + num_bytes - 1, 1);
- unlock_extent_cached(&BTRFS_I(inode)->io_tree, key.offset,
- key.offset + num_bytes - 1, &cached_state,
- GFP_NOFS);
- cond_resched();
- }
- iput(inode);
- return 0;
-}
-
-static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct extent_buffer *leaf,
- struct btrfs_block_group_cache *group,
- struct inode *reloc_inode)
-{
- struct btrfs_key key;
- struct btrfs_key extent_key;
- struct btrfs_file_extent_item *fi;
- struct btrfs_leaf_ref *ref;
- struct disk_extent *new_extent;
- u64 bytenr;
- u64 num_bytes;
- u32 nritems;
- u32 i;
- int ext_index;
- int nr_extent;
- int ret;
-
- new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS);
- if (!new_extent)
- return -ENOMEM;
-
- ref = btrfs_lookup_leaf_ref(root, leaf->start);
- BUG_ON(!ref);
-
- ext_index = -1;
- nritems = btrfs_header_nritems(leaf);
- for (i = 0; i < nritems; i++) {
- btrfs_item_key_to_cpu(leaf, &key, i);
- if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
- continue;
- fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
- if (btrfs_file_extent_type(leaf, fi) ==
- BTRFS_FILE_EXTENT_INLINE)
- continue;
- bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
- num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
- if (bytenr == 0)
- continue;
-
- ext_index++;
- if (bytenr >= group->key.objectid + group->key.offset ||
- bytenr + num_bytes <= group->key.objectid)
- continue;
-
- extent_key.objectid = bytenr;
- extent_key.offset = num_bytes;
- extent_key.type = BTRFS_EXTENT_ITEM_KEY;
- nr_extent = 1;
- ret = get_new_locations(reloc_inode, &extent_key,
- group->key.objectid, 1,
- &new_extent, &nr_extent);
- if (ret > 0)
- continue;
- BUG_ON(ret < 0);
-
- BUG_ON(ref->extents[ext_index].bytenr != bytenr);
- BUG_ON(ref->extents[ext_index].num_bytes != num_bytes);
- ref->extents[ext_index].bytenr = new_extent->disk_bytenr;
- ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes;
-
- btrfs_set_file_extent_disk_bytenr(leaf, fi,
- new_extent->disk_bytenr);
- btrfs_set_file_extent_disk_num_bytes(leaf, fi,
- new_extent->disk_num_bytes);
- btrfs_mark_buffer_dirty(leaf);
-
- ret = btrfs_inc_extent_ref(trans, root,
- new_extent->disk_bytenr,
- new_extent->disk_num_bytes,
- leaf->start,
- root->root_key.objectid,
- trans->transid, key.objectid);
- BUG_ON(ret);
-
- ret = btrfs_free_extent(trans, root,
- bytenr, num_bytes, leaf->start,
- btrfs_header_owner(leaf),
- btrfs_header_generation(leaf),
- key.objectid, 0);
- BUG_ON(ret);
- cond_resched();
- }
- kfree(new_extent);
- BUG_ON(ext_index + 1 != ref->nritems);
- btrfs_free_leaf_ref(root, ref);
- return 0;
-}
-
-int btrfs_free_reloc_root(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
-{
- struct btrfs_root *reloc_root;
- int ret;
-
- if (root->reloc_root) {
- reloc_root = root->reloc_root;
- root->reloc_root = NULL;
- list_add(&reloc_root->dead_list,
- &root->fs_info->dead_reloc_roots);
-
- btrfs_set_root_bytenr(&reloc_root->root_item,
- reloc_root->node->start);
- btrfs_set_root_level(&root->root_item,
- btrfs_header_level(reloc_root->node));
- memset(&reloc_root->root_item.drop_progress, 0,
- sizeof(struct btrfs_disk_key));
- reloc_root->root_item.drop_level = 0;
-
- ret = btrfs_update_root(trans, root->fs_info->tree_root,
- &reloc_root->root_key,
- &reloc_root->root_item);
- BUG_ON(ret);
- }
- return 0;
-}
-
-int btrfs_drop_dead_reloc_roots(struct btrfs_root *root)
-{
- struct btrfs_trans_handle *trans;
- struct btrfs_root *reloc_root;
- struct btrfs_root *prev_root = NULL;
- struct list_head dead_roots;
- int ret;
- unsigned long nr;
-
- INIT_LIST_HEAD(&dead_roots);
- list_splice_init(&root->fs_info->dead_reloc_roots, &dead_roots);
-
- while (!list_empty(&dead_roots)) {
- reloc_root = list_entry(dead_roots.prev,
- struct btrfs_root, dead_list);
- list_del_init(&reloc_root->dead_list);
-
- BUG_ON(reloc_root->commit_root != NULL);
- while (1) {
- trans = btrfs_join_transaction(root);
- BUG_ON(IS_ERR(trans));
-
- mutex_lock(&root->fs_info->drop_mutex);
- ret = btrfs_drop_snapshot(trans, reloc_root);
- if (ret != -EAGAIN)
- break;
- mutex_unlock(&root->fs_info->drop_mutex);
-
- nr = trans->blocks_used;
- ret = btrfs_end_transaction(trans, root);
- BUG_ON(ret);
- btrfs_btree_balance_dirty(root, nr);
- }
-
- free_extent_buffer(reloc_root->node);
-
- ret = btrfs_del_root(trans, root->fs_info->tree_root,
- &reloc_root->root_key);
- BUG_ON(ret);
- mutex_unlock(&root->fs_info->drop_mutex);
-
- nr = trans->blocks_used;
- ret = btrfs_end_transaction(trans, root);
- BUG_ON(ret);
- btrfs_btree_balance_dirty(root, nr);
-
- kfree(prev_root);
- prev_root = reloc_root;
- }
- if (prev_root) {
- btrfs_remove_leaf_refs(prev_root, (u64)-1, 0);
- kfree(prev_root);
- }
- return 0;
-}
-
-int btrfs_add_dead_reloc_root(struct btrfs_root *root)
-{
- list_add(&root->dead_list, &root->fs_info->dead_reloc_roots);
- return 0;
-}
-
-int btrfs_cleanup_reloc_trees(struct btrfs_root *root)
-{
- struct btrfs_root *reloc_root;
- struct btrfs_trans_handle *trans;
- struct btrfs_key location;
- int found;
- int ret;
-
- mutex_lock(&root->fs_info->tree_reloc_mutex);
- ret = btrfs_find_dead_roots(root, BTRFS_TREE_RELOC_OBJECTID, NULL);
- BUG_ON(ret);
- found = !list_empty(&root->fs_info->dead_reloc_roots);
- mutex_unlock(&root->fs_info->tree_reloc_mutex);
-
- if (found) {
- trans = btrfs_start_transaction(root, 1);
- BUG_ON(IS_ERR(trans));
- ret = btrfs_commit_transaction(trans, root);
- BUG_ON(ret);
- }
-
- location.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
- location.offset = (u64)-1;
- location.type = BTRFS_ROOT_ITEM_KEY;
-
- reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
- BUG_ON(!reloc_root);
- ret = btrfs_orphan_cleanup(reloc_root);
- BUG_ON(ret);
- return 0;
-}
-
-static noinline int init_reloc_tree(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
-{
- struct btrfs_root *reloc_root;
- struct extent_buffer *eb;
- struct btrfs_root_item *root_item;
- struct btrfs_key root_key;
- int ret;
-
- BUG_ON(!root->ref_cows);
- if (root->reloc_root)
- return 0;
-
- root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
- if (!root_item)
- return -ENOMEM;
-
- ret = btrfs_copy_root(trans, root, root->commit_root,
- &eb, BTRFS_TREE_RELOC_OBJECTID);
- BUG_ON(ret);
-
- root_key.objectid = BTRFS_TREE_RELOC_OBJECTID;
- root_key.offset = root->root_key.objectid;
- root_key.type = BTRFS_ROOT_ITEM_KEY;
-
- memcpy(root_item, &root->root_item, sizeof(root_item));
- btrfs_set_root_refs(root_item, 0);
- btrfs_set_root_bytenr(root_item, eb->start);
- btrfs_set_root_level(root_item, btrfs_header_level(eb));
- btrfs_set_root_generation(root_item, trans->transid);
-
- btrfs_tree_unlock(eb);
- free_extent_buffer(eb);
-
- ret = btrfs_insert_root(trans, root->fs_info->tree_root,
- &root_key, root_item);
- BUG_ON(ret);
- kfree(root_item);
-
- reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root,
- &root_key);
- BUG_ON(IS_ERR(reloc_root));
- reloc_root->last_trans = trans->transid;
- reloc_root->commit_root = NULL;
- reloc_root->ref_tree = &root->fs_info->reloc_ref_tree;
-
- root->reloc_root = reloc_root;
- return 0;
-}
-
-/*
- * Core function of space balance.
- *
- * The idea is using reloc trees to relocate tree blocks in reference
- * counted roots. There is one reloc tree for each subvol, and all
- * reloc trees share same root key objectid. Reloc trees are snapshots
- * of the latest committed roots of subvols (root->commit_root).
- *
- * To relocate a tree block referenced by a subvol, there are two steps.
- * COW the block through subvol's reloc tree, then update block pointer
- * in the subvol to point to the new block. Since all reloc trees share
- * same root key objectid, doing special handing for tree blocks owned
- * by them is easy. Once a tree block has been COWed in one reloc tree,
- * we can use the resulting new block directly when the same block is
- * required to COW again through other reloc trees. By this way, relocated
- * tree blocks are shared between reloc trees, so they are also shared
- * between subvols.
- */
-static noinline int relocate_one_path(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct btrfs_key *first_key,
- struct btrfs_ref_path *ref_path,
- struct btrfs_block_group_cache *group,
- struct inode *reloc_inode)
-{
- struct btrfs_root *reloc_root;
- struct extent_buffer *eb = NULL;
- struct btrfs_key *keys;
- u64 *nodes;
- int level;
- int shared_level;
- int lowest_level = 0;
- int ret;
-
- if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
- lowest_level = ref_path->owner_objectid;
-
- if (!root->ref_cows) {
- path->lowest_level = lowest_level;
- ret = btrfs_search_slot(trans, root, first_key, path, 0, 1);
- BUG_ON(ret < 0);
- path->lowest_level = 0;
- btrfs_release_path(root, path);
- return 0;
- }
-
- mutex_lock(&root->fs_info->tree_reloc_mutex);
- ret = init_reloc_tree(trans, root);
- BUG_ON(ret);
- reloc_root = root->reloc_root;
-
- shared_level = ref_path->shared_level;
- ref_path->shared_level = BTRFS_MAX_LEVEL - 1;
-
- keys = ref_path->node_keys;
- nodes = ref_path->new_nodes;
- memset(&keys[shared_level + 1], 0,
- sizeof(*keys) * (BTRFS_MAX_LEVEL - shared_level - 1));
- memset(&nodes[shared_level + 1], 0,
- sizeof(*nodes) * (BTRFS_MAX_LEVEL - shared_level - 1));
-
- if (nodes[lowest_level] == 0) {
- path->lowest_level = lowest_level;
- ret = btrfs_search_slot(trans, reloc_root, first_key, path,
- 0, 1);
- BUG_ON(ret);
- for (level = lowest_level; level < BTRFS_MAX_LEVEL; level++) {
- eb = path->nodes[level];
- if (!eb || eb == reloc_root->node)
- break;
- nodes[level] = eb->start;
- if (level == 0)
- btrfs_item_key_to_cpu(eb, &keys[level], 0);
- else
- btrfs_node_key_to_cpu(eb, &keys[level], 0);
- }
- if (nodes[0] &&
- ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
- eb = path->nodes[0];
- ret = replace_extents_in_leaf(trans, reloc_root, eb,
- group, reloc_inode);
- BUG_ON(ret);
- }
- btrfs_release_path(reloc_root, path);
- } else {
- ret = btrfs_merge_path(trans, reloc_root, keys, nodes,
- lowest_level);
- BUG_ON(ret);
- }
-
- /*
- * replace tree blocks in the fs tree with tree blocks in
- * the reloc tree.
- */
- ret = btrfs_merge_path(trans, root, keys, nodes, lowest_level);
- BUG_ON(ret < 0);
-
- if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
- ret = btrfs_search_slot(trans, reloc_root, first_key, path,
- 0, 0);
- BUG_ON(ret);
- extent_buffer_get(path->nodes[0]);
- eb = path->nodes[0];
- btrfs_release_path(reloc_root, path);
- ret = invalidate_extent_cache(reloc_root, eb, group, root);
- BUG_ON(ret);
- free_extent_buffer(eb);
- }
-
- mutex_unlock(&root->fs_info->tree_reloc_mutex);
- path->lowest_level = 0;
- return 0;
-}
-
-static noinline int relocate_tree_block(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct btrfs_key *first_key,
- struct btrfs_ref_path *ref_path)
-{
- int ret;
-
- ret = relocate_one_path(trans, root, path, first_key,
- ref_path, NULL, NULL);
- BUG_ON(ret);
-
- return 0;
-}
-
-static noinline int del_extent_zero(struct btrfs_trans_handle *trans,
- struct btrfs_root *extent_root,
- struct btrfs_path *path,
- struct btrfs_key *extent_key)
-{
- int ret;
-
- ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1);
- if (ret)
- goto out;
- ret = btrfs_del_item(trans, extent_root, path);
-out:
- btrfs_release_path(extent_root, path);
- return ret;
-}
-
-static noinline struct btrfs_root *read_ref_root(struct btrfs_fs_info *fs_info,
- struct btrfs_ref_path *ref_path)
-{
- struct btrfs_key root_key;
-
- root_key.objectid = ref_path->root_objectid;
- root_key.type = BTRFS_ROOT_ITEM_KEY;
- if (is_cowonly_root(ref_path->root_objectid))
- root_key.offset = 0;
- else
- root_key.offset = (u64)-1;
-
- return btrfs_read_fs_root_no_name(fs_info, &root_key);
-}
-
-static noinline int relocate_one_extent(struct btrfs_root *extent_root,
- struct btrfs_path *path,
- struct btrfs_key *extent_key,
- struct btrfs_block_group_cache *group,
- struct inode *reloc_inode, int pass)
-{
- struct btrfs_trans_handle *trans;
- struct btrfs_root *found_root;
- struct btrfs_ref_path *ref_path = NULL;
- struct disk_extent *new_extents = NULL;
- int nr_extents = 0;
- int loops;
- int ret;
- int level;
- struct btrfs_key first_key;
- u64 prev_block = 0;
-
-
- trans = btrfs_start_transaction(extent_root, 1);
- BUG_ON(IS_ERR(trans));
-
- if (extent_key->objectid == 0) {
- ret = del_extent_zero(trans, extent_root, path, extent_key);
- goto out;
- }
-
- ref_path = kmalloc(sizeof(*ref_path), GFP_NOFS);
- if (!ref_path) {
- ret = -ENOMEM;
- goto out;
- }
-
- for (loops = 0; ; loops++) {
- if (loops == 0) {
- ret = btrfs_first_ref_path(trans, extent_root, ref_path,
- extent_key->objectid);
- } else {
- ret = btrfs_next_ref_path(trans, extent_root, ref_path);
- }
- if (ret < 0)
- goto out;
- if (ret > 0)
- break;
-
- if (ref_path->root_objectid == BTRFS_TREE_LOG_OBJECTID ||
- ref_path->root_objectid == BTRFS_TREE_RELOC_OBJECTID)
- continue;
-
- found_root = read_ref_root(extent_root->fs_info, ref_path);
- BUG_ON(!found_root);
- /*
- * for reference counted tree, only process reference paths
- * rooted at the latest committed root.
- */
- if (found_root->ref_cows &&
- ref_path->root_generation != found_root->root_key.offset)
- continue;
-
- if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
- if (pass == 0) {
- /*
- * copy data extents to new locations
- */
- u64 group_start = group->key.objectid;
- ret = relocate_data_extent(reloc_inode,
- extent_key,
- group_start);
- if (ret < 0)
- goto out;
- break;
- }
- level = 0;
- } else {
- level = ref_path->owner_objectid;
- }
-
- if (prev_block != ref_path->nodes[level]) {
- struct extent_buffer *eb;
- u64 block_start = ref_path->nodes[level];
- u64 block_size = btrfs_level_size(found_root, level);
-
- eb = read_tree_block(found_root, block_start,
- block_size, 0);
- if (!eb) {
- ret = -EIO;
- goto out;
- }
- btrfs_tree_lock(eb);
- BUG_ON(level != btrfs_header_level(eb));
-
- if (level == 0)
- btrfs_item_key_to_cpu(eb, &first_key, 0);
- else
- btrfs_node_key_to_cpu(eb, &first_key, 0);
-
- btrfs_tree_unlock(eb);
- free_extent_buffer(eb);
- prev_block = block_start;
- }
-
- mutex_lock(&extent_root->fs_info->trans_mutex);
- btrfs_record_root_in_trans(found_root);
- mutex_unlock(&extent_root->fs_info->trans_mutex);
- if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
- /*
- * try to update data extent references while
- * keeping metadata shared between snapshots.
- */
- if (pass == 1) {
- ret = relocate_one_path(trans, found_root,
- path, &first_key, ref_path,
- group, reloc_inode);
- if (ret < 0)
- goto out;
- continue;
- }
- /*
- * use fallback method to process the remaining
- * references.
- */
- if (!new_extents) {
- u64 group_start = group->key.objectid;
- new_extents = kmalloc(sizeof(*new_extents),
- GFP_NOFS);
- if (!new_extents) {
- ret = -ENOMEM;
- goto out;
- }
- nr_extents = 1;
- ret = get_new_locations(reloc_inode,
- extent_key,
- group_start, 1,
- &new_extents,
- &nr_extents);
- if (ret)
- goto out;
- }
- ret = replace_one_extent(trans, found_root,
- path, extent_key,
- &first_key, ref_path,
- new_extents, nr_extents);
- } else {
- ret = relocate_tree_block(trans, found_root, path,
- &first_key, ref_path);
- }
- if (ret < 0)
- goto out;
- }
- ret = 0;
-out:
- btrfs_end_transaction(trans, extent_root);
- kfree(new_extents);
- kfree(ref_path);
- return ret;
-}
-#endif
-
-static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
-{
- u64 num_devices;
- u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
- BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
-
- /*
- * we add in the count of missing devices because we want
- * to make sure that any RAID levels on a degraded FS
- * continue to be honored.
- */
- num_devices = root->fs_info->fs_devices->rw_devices +
- root->fs_info->fs_devices->missing_devices;
-
- if (num_devices == 1) {
- stripped |= BTRFS_BLOCK_GROUP_DUP;
- stripped = flags & ~stripped;
-
- /* turn raid0 into single device chunks */
- if (flags & BTRFS_BLOCK_GROUP_RAID0)
- return stripped;
-
- /* turn mirroring into duplication */
- if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID10))
- return stripped | BTRFS_BLOCK_GROUP_DUP;
- return flags;
- } else {
- /* they already had raid on here, just return */
- if (flags & stripped)
- return flags;
-
- stripped |= BTRFS_BLOCK_GROUP_DUP;
- stripped = flags & ~stripped;
-
- /* switch duplicated blocks with raid1 */
- if (flags & BTRFS_BLOCK_GROUP_DUP)
- return stripped | BTRFS_BLOCK_GROUP_RAID1;
-
- /* turn single device chunks into raid0 */
- return stripped | BTRFS_BLOCK_GROUP_RAID0;
- }
- return flags;
-}
-
-static int set_block_group_ro(struct btrfs_block_group_cache *cache)
-{
- struct btrfs_space_info *sinfo = cache->space_info;
- u64 num_bytes;
- int ret = -ENOSPC;
-
- if (cache->ro)
- return 0;
-
- spin_lock(&sinfo->lock);
- spin_lock(&cache->lock);
- num_bytes = cache->key.offset - cache->reserved - cache->pinned -
- cache->bytes_super - btrfs_block_group_used(&cache->item);
-
- if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned +
- sinfo->bytes_may_use + sinfo->bytes_readonly +
- cache->reserved_pinned + num_bytes <= sinfo->total_bytes) {
- sinfo->bytes_readonly += num_bytes;
- sinfo->bytes_reserved += cache->reserved_pinned;
- cache->reserved_pinned = 0;
- cache->ro = 1;
- ret = 0;
- }
-
- spin_unlock(&cache->lock);
- spin_unlock(&sinfo->lock);
- return ret;
-}
-
-int btrfs_set_block_group_ro(struct btrfs_root *root,
- struct btrfs_block_group_cache *cache)
-
-{
- struct btrfs_trans_handle *trans;
- u64 alloc_flags;
- int ret;
-
- BUG_ON(cache->ro);
-
- trans = btrfs_join_transaction(root);
- BUG_ON(IS_ERR(trans));
-
- alloc_flags = update_block_group_flags(root, cache->flags);
- if (alloc_flags != cache->flags)
- do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
- CHUNK_ALLOC_FORCE);
-
- ret = set_block_group_ro(cache);
- if (!ret)
- goto out;
- alloc_flags = get_alloc_profile(root, cache->space_info->flags);
- ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
- CHUNK_ALLOC_FORCE);
- if (ret < 0)
- goto out;
- ret = set_block_group_ro(cache);
-out:
- btrfs_end_transaction(trans, root);
- return ret;
-}
-
-int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 type)
-{
- u64 alloc_flags = get_alloc_profile(root, type);
- return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
- CHUNK_ALLOC_FORCE);
-}
-
-/*
- * helper to account the unused space of all the readonly block group in the
- * list. takes mirrors into account.
- */
-static u64 __btrfs_get_ro_block_group_free_space(struct list_head *groups_list)
-{
- struct btrfs_block_group_cache *block_group;
- u64 free_bytes = 0;
- int factor;
-
- list_for_each_entry(block_group, groups_list, list) {
- spin_lock(&block_group->lock);
-
- if (!block_group->ro) {
- spin_unlock(&block_group->lock);
- continue;
- }
-
- if (block_group->flags & (BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID10 |
- BTRFS_BLOCK_GROUP_DUP))
- factor = 2;
- else
- factor = 1;
-
- free_bytes += (block_group->key.offset -
- btrfs_block_group_used(&block_group->item)) *
- factor;
-
- spin_unlock(&block_group->lock);
- }
-
- return free_bytes;
-}
+ return free_bytes;
+}
/*
* helper to account the unused space of all the readonly block group in the
}
if (entry->type == BTRFS_FREE_SPACE_EXTENT) {
- spin_lock(&block_group->tree_lock);
- ret = link_free_space(block_group, e);
- spin_unlock(&block_group->tree_lock);
+ spin_lock(&ctl->tree_lock);
+ ret = link_free_space(ctl, e);
+ spin_unlock(&ctl->tree_lock);
- BUG_ON(ret);
+ if (ret) {
+ printk(KERN_ERR "Duplicate entries in "
+ "free space cache, dumping\n");
+ kunmap(page);
+ unlock_page(page);
+ page_cache_release(page);
+ goto free_cache;
+ }
} else {
e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
if (!e->bitmap) {
page_cache_release(page);
goto free_cache;
}
- spin_lock(&block_group->tree_lock);
- ret = link_free_space(block_group, e);
- block_group->total_bitmaps++;
- recalculate_thresholds(block_group);
- spin_unlock(&block_group->tree_lock);
+ spin_lock(&ctl->tree_lock);
+ ret2 = link_free_space(ctl, e);
+ ctl->total_bitmaps++;
+ ctl->op->recalc_thresholds(ctl);
+ spin_unlock(&ctl->tree_lock);
list_add_tail(&e->list, &bitmaps);
+ if (ret) {
+ printk(KERN_ERR "Duplicate entries in "
+ "free space cache, dumping\n");
+ kunmap(page);
+ unlock_page(page);
+ page_cache_release(page);
+ goto free_cache;
+ }
}
num_entries--;
return -ENOMEM;
path->leave_spinning = 1;
- btrfs_set_trans_block_group(trans, inode);
- key.objectid = inode->i_ino;
+ key.objectid = btrfs_ino(inode);
key.offset = start;
btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
datasize = btrfs_file_extent_calc_inline_size(cur_size);
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
int ret = 0;
- BUG_ON(root == root->fs_info->tree_root);
+ BUG_ON(is_free_space_inode(root, inode));
- trans = btrfs_join_transaction(root, 1);
+ trans = btrfs_join_transaction(root);
BUG_ON(IS_ERR(trans));
- btrfs_set_trans_block_group(trans, inode);
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
num_bytes = (end - start + blocksize) & ~(blocksize - 1);
path = btrfs_alloc_path();
BUG_ON(!path);
- if (root == root->fs_info->tree_root) {
- nolock = true;
+
+ nolock = is_free_space_inode(root, inode);
+
+ if (nolock)
- trans = btrfs_join_transaction_nolock(root, 1);
+ trans = btrfs_join_transaction_nolock(root);
- } else {
+ else
- trans = btrfs_join_transaction(root, 1);
+ trans = btrfs_join_transaction(root);
- }
++
BUG_ON(IS_ERR(trans));
+ trans->block_rsv = &root->fs_info->delalloc_block_rsv;
cow_start = (u64)-1;
cur_offset = start;
if (IS_ERR(trans))
return PTR_ERR(trans);
- btrfs_set_trans_block_group(trans, dir);
-
- if (unlikely(inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
+ if (unlikely(btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
err = btrfs_unlink_subvol(trans, root, dir,
BTRFS_I(inode)->location.objectid,
dentry->d_name.name,
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
- path->reada = 2;
++
+ path->reada = 1;
+ if (key_type == BTRFS_DIR_INDEX_KEY) {
+ INIT_LIST_HEAD(&ins_list);
+ INIT_LIST_HEAD(&del_list);
+ btrfs_get_delayed_items(inode, &ins_list, &del_list);
+ }
+
btrfs_set_key_type(&key, key_type);
key.offset = filp->f_pos;
- key.objectid = inode->i_ino;
+ key.objectid = btrfs_ino(inode);
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
btrfs_end_transaction(trans, root);
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
- if (printk_ratelimit()) {
- printk(KERN_ERR "btrfs: fail to "
- "dirty inode %lu error %ld\n",
- inode->i_ino, PTR_ERR(trans));
- }
+ printk_ratelimited(KERN_ERR "btrfs: fail to "
+ "dirty inode %llu error %ld\n",
+ (unsigned long long)btrfs_ino(inode),
+ PTR_ERR(trans));
return;
}
- btrfs_set_trans_block_group(trans, inode);
ret = btrfs_update_inode(trans, root, inode);
if (ret) {
if (IS_ERR(trans))
return PTR_ERR(trans);
- btrfs_set_trans_block_group(trans, dir);
-
+ err = btrfs_find_free_ino(root, &objectid);
+ if (err)
+ goto out_unlock;
+
inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
- dentry->d_name.len, dir->i_ino, objectid,
+ dentry->d_name.len, btrfs_ino(dir), objectid,
- BTRFS_I(dir)->block_group, mode, &index);
+ mode, &index);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
goto out_unlock;
if (IS_ERR(trans))
return PTR_ERR(trans);
- btrfs_set_trans_block_group(trans, dir);
-
+ err = btrfs_find_free_ino(root, &objectid);
+ if (err)
+ goto out_unlock;
+
inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
- dentry->d_name.len, dir->i_ino, objectid,
+ dentry->d_name.len, btrfs_ino(dir), objectid,
- BTRFS_I(dir)->block_group, mode, &index);
+ mode, &index);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
goto out_unlock;
trans = btrfs_start_transaction(root, 5);
if (IS_ERR(trans))
return PTR_ERR(trans);
- btrfs_set_trans_block_group(trans, dir);
+ err = btrfs_find_free_ino(root, &objectid);
+ if (err)
+ goto out_fail;
+
inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
- dentry->d_name.len, dir->i_ino, objectid,
+ dentry->d_name.len, btrfs_ino(dir), objectid,
- BTRFS_I(dir)->block_group, S_IFDIR | mode,
- &index);
+ S_IFDIR | mode, &index);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
goto out_fail;
kunmap(page);
free_extent_map(em);
em = NULL;
- btrfs_release_path(root, path);
++
+ btrfs_release_path(path);
- trans = btrfs_join_transaction(root, 1);
+ trans = btrfs_join_transaction(root);
++
if (IS_ERR(trans))
return ERR_CAST(trans);
goto again;
spin_unlock(&root->fs_info->ordered_extent_lock);
}
- if (root == root->fs_info->tree_root) {
- struct btrfs_block_group_cache *block_group;
-
- block_group = btrfs_lookup_block_group(root->fs_info,
- BTRFS_I(inode)->block_group);
- if (block_group && block_group->inode == inode) {
- spin_lock(&block_group->lock);
- block_group->inode = NULL;
- spin_unlock(&block_group->lock);
- btrfs_put_block_group(block_group);
- } else if (block_group) {
- btrfs_put_block_group(block_group);
- }
- }
-
spin_lock(&root->orphan_lock);
if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
- printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n",
- inode->i_ino);
+ printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n",
+ (unsigned long long)btrfs_ino(inode));
list_del_init(&BTRFS_I(inode)->i_orphan);
}
spin_unlock(&root->orphan_lock);
if (IS_ERR(trans))
return PTR_ERR(trans);
- btrfs_set_trans_block_group(trans, dir);
-
+ err = btrfs_find_free_ino(root, &objectid);
+ if (err)
+ goto out_unlock;
+
inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
- dentry->d_name.len, dir->i_ino, objectid,
+ dentry->d_name.len, btrfs_ino(dir), objectid,
- BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO,
- &index);
+ S_IFLNK|S_IRWXUGO, &index);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
goto out_unlock;
/*
* either allocate a new transaction or hop into the existing one
*/
- static noinline int join_transaction(struct btrfs_root *root)
+ static noinline int join_transaction(struct btrfs_root *root, int nofail)
{
struct btrfs_transaction *cur_trans;
+
+ spin_lock(&root->fs_info->trans_lock);
+ if (root->fs_info->trans_no_join) {
+ if (!nofail) {
+ spin_unlock(&root->fs_info->trans_lock);
+ return -EBUSY;
+ }
+ }
+
cur_trans = root->fs_info->running_transaction;
- if (!cur_trans) {
- cur_trans = kmem_cache_alloc(btrfs_transaction_cachep,
- GFP_NOFS);
- if (!cur_trans)
- return -ENOMEM;
- root->fs_info->generation++;
- atomic_set(&cur_trans->num_writers, 1);
- cur_trans->num_joined = 0;
- cur_trans->transid = root->fs_info->generation;
- init_waitqueue_head(&cur_trans->writer_wait);
- init_waitqueue_head(&cur_trans->commit_wait);
- cur_trans->in_commit = 0;
- cur_trans->blocked = 0;
- atomic_set(&cur_trans->use_count, 1);
- cur_trans->commit_done = 0;
- cur_trans->start_time = get_seconds();
-
- cur_trans->delayed_refs.root = RB_ROOT;
- cur_trans->delayed_refs.num_entries = 0;
- cur_trans->delayed_refs.num_heads_ready = 0;
- cur_trans->delayed_refs.num_heads = 0;
- cur_trans->delayed_refs.flushing = 0;
- cur_trans->delayed_refs.run_delayed_start = 0;
- spin_lock_init(&cur_trans->delayed_refs.lock);
-
- INIT_LIST_HEAD(&cur_trans->pending_snapshots);
- list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
- extent_io_tree_init(&cur_trans->dirty_pages,
- root->fs_info->btree_inode->i_mapping);
- spin_lock(&root->fs_info->new_trans_lock);
- root->fs_info->running_transaction = cur_trans;
- spin_unlock(&root->fs_info->new_trans_lock);
- } else {
+ if (cur_trans) {
+ atomic_inc(&cur_trans->use_count);
atomic_inc(&cur_trans->num_writers);
cur_trans->num_joined++;
+ spin_unlock(&root->fs_info->trans_lock);
+ return 0;
}
- root->fs_info->btree_inode->i_mapping,
- GFP_NOFS);
+ spin_unlock(&root->fs_info->trans_lock);
+
+ cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS);
+ if (!cur_trans)
+ return -ENOMEM;
+ spin_lock(&root->fs_info->trans_lock);
+ if (root->fs_info->running_transaction) {
+ kmem_cache_free(btrfs_transaction_cachep, cur_trans);
+ cur_trans = root->fs_info->running_transaction;
+ atomic_inc(&cur_trans->use_count);
+ atomic_inc(&cur_trans->num_writers);
+ cur_trans->num_joined++;
+ spin_unlock(&root->fs_info->trans_lock);
+ return 0;
+ }
+ atomic_set(&cur_trans->num_writers, 1);
+ cur_trans->num_joined = 0;
+ init_waitqueue_head(&cur_trans->writer_wait);
+ init_waitqueue_head(&cur_trans->commit_wait);
+ cur_trans->in_commit = 0;
+ cur_trans->blocked = 0;
+ /*
+ * One for this trans handle, one so it will live on until we
+ * commit the transaction.
+ */
+ atomic_set(&cur_trans->use_count, 2);
+ cur_trans->commit_done = 0;
+ cur_trans->start_time = get_seconds();
+
+ cur_trans->delayed_refs.root = RB_ROOT;
+ cur_trans->delayed_refs.num_entries = 0;
+ cur_trans->delayed_refs.num_heads_ready = 0;
+ cur_trans->delayed_refs.num_heads = 0;
+ cur_trans->delayed_refs.flushing = 0;
+ cur_trans->delayed_refs.run_delayed_start = 0;
+ spin_lock_init(&cur_trans->commit_lock);
+ spin_lock_init(&cur_trans->delayed_refs.lock);
+
+ INIT_LIST_HEAD(&cur_trans->pending_snapshots);
+ list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
+ extent_io_tree_init(&cur_trans->dirty_pages,
++ root->fs_info->btree_inode->i_mapping);
+ root->fs_info->generation++;
+ cur_trans->transid = root->fs_info->generation;
+ root->fs_info->running_transaction = cur_trans;
+ spin_unlock(&root->fs_info->trans_lock);
return 0;
}
return ret;
}
-#if 0
-/*
- * rate limit against the drop_snapshot code. This helps to slow down new
- * operations if the drop_snapshot code isn't able to keep up.
- */
-static void throttle_on_drops(struct btrfs_root *root)
-{
- struct btrfs_fs_info *info = root->fs_info;
- int harder_count = 0;
-
-harder:
- if (atomic_read(&info->throttles)) {
- DEFINE_WAIT(wait);
- int thr;
- thr = atomic_read(&info->throttle_gen);
-
- do {
- prepare_to_wait(&info->transaction_throttle,
- &wait, TASK_UNINTERRUPTIBLE);
- if (!atomic_read(&info->throttles)) {
- finish_wait(&info->transaction_throttle, &wait);
- break;
- }
- schedule();
- finish_wait(&info->transaction_throttle, &wait);
- } while (thr == atomic_read(&info->throttle_gen));
- harder_count++;
-
- if (root->fs_info->total_ref_cache_size > 1 * 1024 * 1024 &&
- harder_count < 2)
- goto harder;
-
- if (root->fs_info->total_ref_cache_size > 5 * 1024 * 1024 &&
- harder_count < 10)
- goto harder;
-
- if (root->fs_info->total_ref_cache_size > 10 * 1024 * 1024 &&
- harder_count < 20)
- goto harder;
- }
-}
-#endif
-
void btrfs_throttle(struct btrfs_root *root)
{
- mutex_lock(&root->fs_info->trans_mutex);
- if (!root->fs_info->open_ioctl_trans)
+ if (!atomic_read(&root->fs_info->open_ioctl_trans))
wait_current_trans(root);
- mutex_unlock(&root->fs_info->trans_mutex);
}
static int should_end_transaction(struct btrfs_trans_handle *trans,
trace_btrfs_transaction_commit(root);
- mutex_unlock(&root->fs_info->trans_mutex);
-
+ btrfs_scrub_continue(root);
+
if (current->journal_info == trans)
current->journal_info = NULL;