]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blobdiff - fs/btrfs/extent-tree.c
Btrfs: Lower contention on the csum mutex
[mirror_ubuntu-artful-kernel.git] / fs / btrfs / extent-tree.c
index 4765248000fd2be988078a2850e876202d30f490..fff219ed61d9a7be5d76cbb8cff7771341fa5c3b 100644 (file)
@@ -893,10 +893,10 @@ out:
        return ret;
 }
 
-int btrfs_cross_ref_exists(struct btrfs_root *root,
+int btrfs_cross_ref_exists(struct btrfs_trans_handle *trans,
+                          struct btrfs_root *root,
                           struct btrfs_key *key, u64 bytenr)
 {
-       struct btrfs_trans_handle *trans;
        struct btrfs_root *old_root;
        struct btrfs_path *path = NULL;
        struct extent_buffer *eb;
@@ -908,6 +908,7 @@ int btrfs_cross_ref_exists(struct btrfs_root *root,
        int level;
        int ret;
 
+       BUG_ON(trans == NULL);
        BUG_ON(key->type != BTRFS_EXTENT_DATA_KEY);
        ret = get_reference_status(root, bytenr, 0, key->objectid,
                                   &min_generation, &ref_count);
@@ -917,7 +918,6 @@ int btrfs_cross_ref_exists(struct btrfs_root *root,
        if (ref_count != 1)
                return 1;
 
-       trans = btrfs_start_transaction(root, 0);
        old_root = root->dirty_root->root;
        ref_generation = old_root->root_key.offset;
 
@@ -973,7 +973,6 @@ int btrfs_cross_ref_exists(struct btrfs_root *root,
 out:
        if (path)
                btrfs_free_path(path);
-       btrfs_end_transaction(trans, root);
        return ret;
 }
 
@@ -1230,7 +1229,6 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
                found->total_bytes += total_bytes;
                found->bytes_used += bytes_used;
                found->full = 0;
-               WARN_ON(found->total_bytes < found->bytes_used);
                *space_info = found;
                return 0;
        }
@@ -2118,6 +2116,15 @@ again:
        return 0;
 }
 
+int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
+{
+       maybe_lock_mutex(root);
+       set_extent_dirty(&root->fs_info->free_space_cache,
+                        start, start + len - 1, GFP_NOFS);
+       maybe_unlock_mutex(root);
+       return 0;
+}
+
 int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
                                  struct btrfs_root *root,
                                  u64 num_bytes, u64 min_alloc_size,
@@ -2267,6 +2274,26 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
        maybe_unlock_mutex(root);
        return ret;
 }
+
+struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
+                                           struct btrfs_root *root,
+                                           u64 bytenr, u32 blocksize)
+{
+       struct extent_buffer *buf;
+
+       buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
+       if (!buf)
+               return ERR_PTR(-ENOMEM);
+       btrfs_set_header_generation(buf, trans->transid);
+       btrfs_tree_lock(buf);
+       clean_tree_block(trans, root, buf);
+       btrfs_set_buffer_uptodate(buf);
+       set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
+                        buf->start + buf->len - 1, GFP_NOFS);
+       trans->blocks_used++;
+       return buf;
+}
+
 /*
  * helper function to allocate a block for a given tree
  * returns the tree buffer or NULL.
@@ -2293,26 +2320,8 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
                BUG_ON(ret > 0);
                return ERR_PTR(ret);
        }
-       buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize);
-       if (!buf) {
-               btrfs_free_extent(trans, root, ins.objectid, blocksize,
-                                 root->root_key.objectid, ref_generation,
-                                 0, 0, 0);
-               return ERR_PTR(-ENOMEM);
-       }
-       btrfs_set_header_generation(buf, trans->transid);
-       btrfs_tree_lock(buf);
-       clean_tree_block(trans, root, buf);
-       btrfs_set_buffer_uptodate(buf);
 
-       if (PageDirty(buf->first_page)) {
-               printk("page %lu dirty\n", buf->first_page->index);
-               WARN_ON(1);
-       }
-
-       set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
-                        buf->start + buf->len - 1, GFP_NOFS);
-       trans->blocks_used++;
+       buf = btrfs_init_new_buffer(trans, root, ins.objectid, blocksize);
        return buf;
 }
 
@@ -2333,8 +2342,6 @@ static int noinline drop_leaf_ref_no_cache(struct btrfs_trans_handle *trans,
        leaf_owner = btrfs_header_owner(leaf);
        leaf_generation = btrfs_header_generation(leaf);
 
-       mutex_unlock(&root->fs_info->alloc_mutex);
-
        for (i = 0; i < nritems; i++) {
                u64 disk_bytenr;
                cond_resched();
@@ -2360,10 +2367,13 @@ static int noinline drop_leaf_ref_no_cache(struct btrfs_trans_handle *trans,
                                leaf_owner, leaf_generation,
                                key.objectid, key.offset, 0);
                mutex_unlock(&root->fs_info->alloc_mutex);
+
+               atomic_inc(&root->fs_info->throttle_gen);
+               wake_up(&root->fs_info->transaction_throttle);
+               cond_resched();
+
                BUG_ON(ret);
        }
-
-       mutex_lock(&root->fs_info->alloc_mutex);
        return 0;
 }
 
@@ -2375,7 +2385,6 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
        int ret;
        struct btrfs_extent_info *info = ref->extents;
 
-       mutex_unlock(&root->fs_info->alloc_mutex);
        for (i = 0; i < ref->nritems; i++) {
                mutex_lock(&root->fs_info->alloc_mutex);
                ret = __btrfs_free_extent(trans, root,
@@ -2383,67 +2392,55 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
                                        ref->owner, ref->generation,
                                        info->objectid, info->offset, 0);
                mutex_unlock(&root->fs_info->alloc_mutex);
+
+               atomic_inc(&root->fs_info->throttle_gen);
+               wake_up(&root->fs_info->transaction_throttle);
+               cond_resched();
+
                BUG_ON(ret);
                info++;
        }
-       mutex_lock(&root->fs_info->alloc_mutex);
 
        return 0;
 }
 
-static void noinline reada_walk_down(struct btrfs_root *root,
-                                    struct extent_buffer *node,
-                                    int slot)
+int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len,
+                             u32 *refs)
 {
-       u64 bytenr;
-       u64 last = 0;
-       u32 nritems;
-       u32 refs;
-       u32 blocksize;
        int ret;
-       int i;
-       int level;
-       int skipped = 0;
-
-       nritems = btrfs_header_nritems(node);
-       level = btrfs_header_level(node);
-       if (level)
-               return;
-
-       for (i = slot; i < nritems && skipped < 32; i++) {
-               bytenr = btrfs_node_blockptr(node, i);
-               if (last && ((bytenr > last && bytenr - last > 32 * 1024) ||
-                            (last > bytenr && last - bytenr > 32 * 1024))) {
-                       skipped++;
-                       continue;
+
+       ret = lookup_extent_ref(NULL, root, start, len, refs);
+       BUG_ON(ret);
+
+#if 0 // some debugging code in case we see problems here
+       /* if the refs count is one, it won't get increased again.  But
+        * if the ref count is > 1, someone may be decreasing it at
+        * the same time we are.
+        */
+       if (*refs != 1) {
+               struct extent_buffer *eb = NULL;
+               eb = btrfs_find_create_tree_block(root, start, len);
+               if (eb)
+                       btrfs_tree_lock(eb);
+
+               mutex_lock(&root->fs_info->alloc_mutex);
+               ret = lookup_extent_ref(NULL, root, start, len, refs);
+               BUG_ON(ret);
+               mutex_unlock(&root->fs_info->alloc_mutex);
+
+               if (eb) {
+                       btrfs_tree_unlock(eb);
+                       free_extent_buffer(eb);
                }
-               blocksize = btrfs_level_size(root, level - 1);
-               if (i != slot) {
-                       ret = lookup_extent_ref(NULL, root, bytenr,
-                                               blocksize, &refs);
-                       BUG_ON(ret);
-                       if (refs != 1) {
-                               skipped++;
-                               continue;
-                       }
+               if (*refs == 1) {
+                       printk("block %llu went down to one during drop_snap\n",
+                              (unsigned long long)start);
                }
-               ret = readahead_tree_block(root, bytenr, blocksize,
-                                          btrfs_node_ptr_generation(node, i));
-               last = bytenr + blocksize;
-               cond_resched();
-               if (ret)
-                       break;
+
        }
-}
+#endif
 
-int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len,
-                             u32 *refs)
-{
-       int ret;
-       mutex_unlock(&root->fs_info->alloc_mutex);
-       ret = lookup_extent_ref(NULL, root, start, len, refs);
        cond_resched();
-       mutex_lock(&root->fs_info->alloc_mutex);
        return ret;
 }
 
@@ -2467,8 +2464,6 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
        int ret;
        u32 refs;
 
-       mutex_lock(&root->fs_info->alloc_mutex);
-
        WARN_ON(*level < 0);
        WARN_ON(*level >= BTRFS_MAX_LEVEL);
        ret = drop_snap_lookup_refcount(root, path->nodes[*level]->start,
@@ -2507,13 +2502,26 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
                        root_owner = btrfs_header_owner(parent);
                        root_gen = btrfs_header_generation(parent);
                        path->slots[*level]++;
+
+                       mutex_lock(&root->fs_info->alloc_mutex);
                        ret = __btrfs_free_extent(trans, root, bytenr,
                                                blocksize, root_owner,
                                                root_gen, 0, 0, 1);
                        BUG_ON(ret);
+                       mutex_unlock(&root->fs_info->alloc_mutex);
+
+                       atomic_inc(&root->fs_info->throttle_gen);
+                       wake_up(&root->fs_info->transaction_throttle);
+                       cond_resched();
+
                        continue;
                }
-
+               /*
+                * at this point, we have a single ref, and since the
+                * only place referencing this extent is a dead root
+                * the reference count should never go higher.
+                * So, we don't need to check it again
+                */
                if (*level == 1) {
                        struct btrfs_key key;
                        btrfs_node_key_to_cpu(cur, &key, path->slots[*level]);
@@ -2526,37 +2534,28 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
                                *level = 0;
                                break;
                        }
+                       if (printk_ratelimit())
+                               printk("leaf ref miss for bytenr %llu\n",
+                                      (unsigned long long)bytenr);
                }
                next = btrfs_find_tree_block(root, bytenr, blocksize);
                if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
                        free_extent_buffer(next);
-                       mutex_unlock(&root->fs_info->alloc_mutex);
 
-                       if (path->slots[*level] == 0)
-                               reada_walk_down(root, cur, path->slots[*level]);
                        next = read_tree_block(root, bytenr, blocksize,
                                               ptr_gen);
                        cond_resched();
-                       mutex_lock(&root->fs_info->alloc_mutex);
-
-                       /* we've dropped the lock, double check */
+#if 0
+                       /*
+                        * this is a debugging check and can go away
+                        * the ref should never go all the way down to 1
+                        * at this point
+                        */
                        ret = lookup_extent_ref(NULL, root, bytenr, blocksize,
                                                &refs);
                        BUG_ON(ret);
-                       if (refs != 1) {
-                               parent = path->nodes[*level];
-                               root_owner = btrfs_header_owner(parent);
-                               root_gen = btrfs_header_generation(parent);
-
-                               path->slots[*level]++;
-                               free_extent_buffer(next);
-                               ret = __btrfs_free_extent(trans, root, bytenr,
-                                                       blocksize,
-                                                       root_owner,
-                                                       root_gen, 0, 0, 1);
-                               BUG_ON(ret);
-                               continue;
-                       }
+                       WARN_ON(refs != 1);
+#endif
                }
                WARN_ON(*level <= 0);
                if (path->nodes[*level-1])
@@ -2564,6 +2563,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
                path->nodes[*level-1] = next;
                *level = btrfs_header_level(next);
                path->slots[*level] = 0;
+               cond_resched();
        }
 out:
        WARN_ON(*level < 0);
@@ -2581,6 +2581,7 @@ out:
        root_owner = btrfs_header_owner(parent);
        root_gen = btrfs_header_generation(parent);
 
+       mutex_lock(&root->fs_info->alloc_mutex);
        ret = __btrfs_free_extent(trans, root, bytenr, blocksize,
                                  root_owner, root_gen, 0, 0, 1);
        free_extent_buffer(path->nodes[*level]);
@@ -2588,6 +2589,7 @@ out:
        *level += 1;
        BUG_ON(ret);
        mutex_unlock(&root->fs_info->alloc_mutex);
+
        cond_resched();
        return 0;
 }
@@ -2701,7 +2703,6 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
                }
        }
        while(1) {
-               atomic_inc(&root->fs_info->throttle_gen);
                wret = walk_down_tree(trans, root, path, &level);
                if (wret > 0)
                        break;
@@ -2717,6 +2718,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
                        ret = -EAGAIN;
                        break;
                }
+               atomic_inc(&root->fs_info->throttle_gen);
                wake_up(&root->fs_info->transaction_throttle);
        }
        for (i = 0; i <= orig_level; i++) {
@@ -2831,9 +2833,13 @@ again:
                }
                set_page_extent_mapped(page);
 
+               /*
+                * make sure page_mkwrite is called for this page if userland
+                * wants to change it from mmap
+                */
+               clear_page_dirty_for_io(page);
 
-               set_extent_delalloc(io_tree, page_start,
-                                   page_end, GFP_NOFS);
+               btrfs_set_extent_delalloc(inode, page_start, page_end);
                set_page_dirty(page);
 
                unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
@@ -3310,6 +3316,13 @@ again:
        key.type = 0;
        cur_byte = key.objectid;
 
+       mutex_unlock(&root->fs_info->alloc_mutex);
+
+       btrfs_start_delalloc_inodes(root);
+       btrfs_wait_ordered_extents(tree_root, 0);
+
+       mutex_lock(&root->fs_info->alloc_mutex);
+
        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
        if (ret < 0)
                goto out;
@@ -3392,7 +3405,8 @@ next:
 
                btrfs_clean_old_snapshots(tree_root);
 
-               btrfs_wait_ordered_extents(tree_root);
+               btrfs_start_delalloc_inodes(root);
+               btrfs_wait_ordered_extents(tree_root, 0);
 
                trans = btrfs_start_transaction(tree_root, 1);
                btrfs_commit_transaction(trans, tree_root);
@@ -3428,8 +3442,10 @@ next:
                           key.objectid, key.objectid + key.offset - 1,
                           (unsigned int)-1, GFP_NOFS);
 
+       /*
        memset(shrink_block_group, 0, sizeof(*shrink_block_group));
        kfree(shrink_block_group);
+       */
 
        btrfs_del_item(trans, root, path);
        btrfs_release_path(root, path);