2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
18 #include <linux/sched.h>
19 #include <linux/pagemap.h>
20 #include <linux/writeback.h>
21 #include <linux/blkdev.h>
22 #include <linux/version.h>
28 #include "print-tree.h"
29 #include "transaction.h"
32 #include "ref-cache.h"
35 #define PENDING_EXTENT_INSERT 0
36 #define PENDING_EXTENT_DELETE 1
37 #define PENDING_BACKREF_UPDATE 2
39 struct pending_extent_op
{
48 struct list_head list
;
52 static int finish_current_insert(struct btrfs_trans_handle
*trans
, struct
53 btrfs_root
*extent_root
, int all
);
54 static int del_pending_extents(struct btrfs_trans_handle
*trans
, struct
55 btrfs_root
*extent_root
, int all
);
56 static struct btrfs_block_group_cache
*
57 __btrfs_find_block_group(struct btrfs_root
*root
,
58 struct btrfs_block_group_cache
*hint
,
59 u64 search_start
, int data
, int owner
);
60 static int pin_down_bytes(struct btrfs_trans_handle
*trans
,
61 struct btrfs_root
*root
,
62 u64 bytenr
, u64 num_bytes
, int is_data
);
63 static int update_block_group(struct btrfs_trans_handle
*trans
,
64 struct btrfs_root
*root
,
65 u64 bytenr
, u64 num_bytes
, int alloc
,
68 static int block_group_bits(struct btrfs_block_group_cache
*cache
, u64 bits
)
70 return (cache
->flags
& bits
) == bits
;
74 * this adds the block group to the fs_info rb tree for the block group
77 static int btrfs_add_block_group_cache(struct btrfs_fs_info
*info
,
78 struct btrfs_block_group_cache
*block_group
)
81 struct rb_node
*parent
= NULL
;
82 struct btrfs_block_group_cache
*cache
;
84 spin_lock(&info
->block_group_cache_lock
);
85 p
= &info
->block_group_cache_tree
.rb_node
;
89 cache
= rb_entry(parent
, struct btrfs_block_group_cache
,
91 if (block_group
->key
.objectid
< cache
->key
.objectid
) {
93 } else if (block_group
->key
.objectid
> cache
->key
.objectid
) {
96 spin_unlock(&info
->block_group_cache_lock
);
101 rb_link_node(&block_group
->cache_node
, parent
, p
);
102 rb_insert_color(&block_group
->cache_node
,
103 &info
->block_group_cache_tree
);
104 spin_unlock(&info
->block_group_cache_lock
);
110 * This will return the block group at or after bytenr if contains is 0, else
111 * it will return the block group that contains the bytenr
113 static struct btrfs_block_group_cache
*
114 block_group_cache_tree_search(struct btrfs_fs_info
*info
, u64 bytenr
,
117 struct btrfs_block_group_cache
*cache
, *ret
= NULL
;
121 spin_lock(&info
->block_group_cache_lock
);
122 n
= info
->block_group_cache_tree
.rb_node
;
125 cache
= rb_entry(n
, struct btrfs_block_group_cache
,
127 end
= cache
->key
.objectid
+ cache
->key
.offset
- 1;
128 start
= cache
->key
.objectid
;
130 if (bytenr
< start
) {
131 if (!contains
&& (!ret
|| start
< ret
->key
.objectid
))
134 } else if (bytenr
> start
) {
135 if (contains
&& bytenr
<= end
) {
145 spin_unlock(&info
->block_group_cache_lock
);
151 * this is only called by cache_block_group, since we could have freed extents
152 * we need to check the pinned_extents for any extents that can't be used yet
153 * since their free space will be released as soon as the transaction commits.
155 static int add_new_free_space(struct btrfs_block_group_cache
*block_group
,
156 struct btrfs_fs_info
*info
, u64 start
, u64 end
)
158 u64 extent_start
, extent_end
, size
;
161 mutex_lock(&info
->pinned_mutex
);
162 while (start
< end
) {
163 ret
= find_first_extent_bit(&info
->pinned_extents
, start
,
164 &extent_start
, &extent_end
,
169 if (extent_start
== start
) {
170 start
= extent_end
+ 1;
171 } else if (extent_start
> start
&& extent_start
< end
) {
172 size
= extent_start
- start
;
173 ret
= btrfs_add_free_space(block_group
, start
,
176 start
= extent_end
+ 1;
184 ret
= btrfs_add_free_space(block_group
, start
, size
);
187 mutex_unlock(&info
->pinned_mutex
);
192 static int cache_block_group(struct btrfs_root
*root
,
193 struct btrfs_block_group_cache
*block_group
)
195 struct btrfs_path
*path
;
197 struct btrfs_key key
;
198 struct extent_buffer
*leaf
;
207 root
= root
->fs_info
->extent_root
;
209 if (block_group
->cached
)
212 path
= btrfs_alloc_path();
218 * we get into deadlocks with paths held by callers of this function.
219 * since the alloc_mutex is protecting things right now, just
220 * skip the locking here
222 path
->skip_locking
= 1;
223 first_free
= max_t(u64
, block_group
->key
.objectid
,
224 BTRFS_SUPER_INFO_OFFSET
+ BTRFS_SUPER_INFO_SIZE
);
225 key
.objectid
= block_group
->key
.objectid
;
227 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
228 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
231 ret
= btrfs_previous_item(root
, path
, 0, BTRFS_EXTENT_ITEM_KEY
);
235 leaf
= path
->nodes
[0];
236 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
237 if (key
.objectid
+ key
.offset
> first_free
)
238 first_free
= key
.objectid
+ key
.offset
;
241 leaf
= path
->nodes
[0];
242 slot
= path
->slots
[0];
243 if (slot
>= btrfs_header_nritems(leaf
)) {
244 ret
= btrfs_next_leaf(root
, path
);
252 btrfs_item_key_to_cpu(leaf
, &key
, slot
);
253 if (key
.objectid
< block_group
->key
.objectid
)
256 if (key
.objectid
>= block_group
->key
.objectid
+
257 block_group
->key
.offset
)
260 if (btrfs_key_type(&key
) == BTRFS_EXTENT_ITEM_KEY
) {
266 add_new_free_space(block_group
, root
->fs_info
, last
,
269 last
= key
.objectid
+ key
.offset
;
278 add_new_free_space(block_group
, root
->fs_info
, last
,
279 block_group
->key
.objectid
+
280 block_group
->key
.offset
);
282 block_group
->cached
= 1;
285 btrfs_free_path(path
);
290 * return the block group that starts at or after bytenr
292 static struct btrfs_block_group_cache
*btrfs_lookup_first_block_group(struct
296 struct btrfs_block_group_cache
*cache
;
298 cache
= block_group_cache_tree_search(info
, bytenr
, 0);
304 * return the block group that contains teh given bytenr
306 struct btrfs_block_group_cache
*btrfs_lookup_block_group(struct
310 struct btrfs_block_group_cache
*cache
;
312 cache
= block_group_cache_tree_search(info
, bytenr
, 1);
317 static struct btrfs_space_info
*__find_space_info(struct btrfs_fs_info
*info
,
320 struct list_head
*head
= &info
->space_info
;
321 struct list_head
*cur
;
322 struct btrfs_space_info
*found
;
323 list_for_each(cur
, head
) {
324 found
= list_entry(cur
, struct btrfs_space_info
, list
);
325 if (found
->flags
== flags
)
331 static u64
div_factor(u64 num
, int factor
)
340 static struct btrfs_block_group_cache
*
341 __btrfs_find_block_group(struct btrfs_root
*root
,
342 struct btrfs_block_group_cache
*hint
,
343 u64 search_start
, int data
, int owner
)
345 struct btrfs_block_group_cache
*cache
;
346 struct btrfs_block_group_cache
*found_group
= NULL
;
347 struct btrfs_fs_info
*info
= root
->fs_info
;
355 if (data
& BTRFS_BLOCK_GROUP_METADATA
)
359 struct btrfs_block_group_cache
*shint
;
360 shint
= btrfs_lookup_first_block_group(info
, search_start
);
361 if (shint
&& block_group_bits(shint
, data
)) {
362 spin_lock(&shint
->lock
);
363 used
= btrfs_block_group_used(&shint
->item
);
364 if (used
+ shint
->pinned
+ shint
->reserved
<
365 div_factor(shint
->key
.offset
, factor
)) {
366 spin_unlock(&shint
->lock
);
369 spin_unlock(&shint
->lock
);
372 if (hint
&& block_group_bits(hint
, data
)) {
373 spin_lock(&hint
->lock
);
374 used
= btrfs_block_group_used(&hint
->item
);
375 if (used
+ hint
->pinned
+ hint
->reserved
<
376 div_factor(hint
->key
.offset
, factor
)) {
377 spin_unlock(&hint
->lock
);
380 spin_unlock(&hint
->lock
);
381 last
= hint
->key
.objectid
+ hint
->key
.offset
;
384 last
= max(hint
->key
.objectid
, search_start
);
390 cache
= btrfs_lookup_first_block_group(root
->fs_info
, last
);
394 spin_lock(&cache
->lock
);
395 last
= cache
->key
.objectid
+ cache
->key
.offset
;
396 used
= btrfs_block_group_used(&cache
->item
);
398 if (block_group_bits(cache
, data
)) {
399 free_check
= div_factor(cache
->key
.offset
, factor
);
400 if (used
+ cache
->pinned
+ cache
->reserved
<
403 spin_unlock(&cache
->lock
);
407 spin_unlock(&cache
->lock
);
415 if (!full_search
&& factor
< 10) {
425 struct btrfs_block_group_cache
*btrfs_find_block_group(struct btrfs_root
*root
,
426 struct btrfs_block_group_cache
427 *hint
, u64 search_start
,
431 struct btrfs_block_group_cache
*ret
;
432 ret
= __btrfs_find_block_group(root
, hint
, search_start
, data
, owner
);
436 /* simple helper to search for an existing extent at a given offset */
437 int btrfs_lookup_extent(struct btrfs_root
*root
, u64 start
, u64 len
)
440 struct btrfs_key key
;
441 struct btrfs_path
*path
;
443 path
= btrfs_alloc_path();
445 key
.objectid
= start
;
447 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
448 ret
= btrfs_search_slot(NULL
, root
->fs_info
->extent_root
, &key
, path
,
450 btrfs_free_path(path
);
455 * Back reference rules. Back refs have three main goals:
457 * 1) differentiate between all holders of references to an extent so that
458 * when a reference is dropped we can make sure it was a valid reference
459 * before freeing the extent.
461 * 2) Provide enough information to quickly find the holders of an extent
462 * if we notice a given block is corrupted or bad.
464 * 3) Make it easy to migrate blocks for FS shrinking or storage pool
465 * maintenance. This is actually the same as #2, but with a slightly
466 * different use case.
468 * File extents can be referenced by:
470 * - multiple snapshots, subvolumes, or different generations in one subvol
471 * - different files inside a single subvolume
472 * - different offsets inside a file (bookend extents in file.c)
474 * The extent ref structure has fields for:
476 * - Objectid of the subvolume root
477 * - Generation number of the tree holding the reference
478 * - objectid of the file holding the reference
479 * - number of references holding by parent node (alway 1 for tree blocks)
481 * Btree leaf may hold multiple references to a file extent. In most cases,
482 * these references are from same file and the corresponding offsets inside
483 * the file are close together.
485 * When a file extent is allocated the fields are filled in:
486 * (root_key.objectid, trans->transid, inode objectid, 1)
488 * When a leaf is cow'd new references are added for every file extent found
489 * in the leaf. It looks similar to the create case, but trans->transid will
490 * be different when the block is cow'd.
492 * (root_key.objectid, trans->transid, inode objectid,
493 * number of references in the leaf)
495 * When a file extent is removed either during snapshot deletion or
496 * file truncation, we find the corresponding back reference and check
497 * the following fields:
499 * (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
502 * Btree extents can be referenced by:
504 * - Different subvolumes
505 * - Different generations of the same subvolume
507 * When a tree block is created, back references are inserted:
509 * (root->root_key.objectid, trans->transid, level, 1)
511 * When a tree block is cow'd, new back references are added for all the
512 * blocks it points to. If the tree block isn't in reference counted root,
513 * the old back references are removed. These new back references are of
514 * the form (trans->transid will have increased since creation):
516 * (root->root_key.objectid, trans->transid, level, 1)
518 * When a backref is in deleting, the following fields are checked:
520 * if backref was for a tree root:
521 * (btrfs_header_owner(itself), btrfs_header_generation(itself), level)
523 * (btrfs_header_owner(parent), btrfs_header_generation(parent), level)
525 * Back Reference Key composing:
527 * The key objectid corresponds to the first byte in the extent, the key
528 * type is set to BTRFS_EXTENT_REF_KEY, and the key offset is the first
529 * byte of parent extent. If a extent is tree root, the key offset is set
530 * to the key objectid.
533 static int noinline
lookup_extent_backref(struct btrfs_trans_handle
*trans
,
534 struct btrfs_root
*root
,
535 struct btrfs_path
*path
,
536 u64 bytenr
, u64 parent
,
537 u64 ref_root
, u64 ref_generation
,
538 u64 owner_objectid
, int del
)
540 struct btrfs_key key
;
541 struct btrfs_extent_ref
*ref
;
542 struct extent_buffer
*leaf
;
546 key
.objectid
= bytenr
;
547 key
.type
= BTRFS_EXTENT_REF_KEY
;
550 ret
= btrfs_search_slot(trans
, root
, &key
, path
, del
? -1 : 0, 1);
558 leaf
= path
->nodes
[0];
559 ref
= btrfs_item_ptr(leaf
, path
->slots
[0], struct btrfs_extent_ref
);
560 ref_objectid
= btrfs_ref_objectid(leaf
, ref
);
561 if (btrfs_ref_root(leaf
, ref
) != ref_root
||
562 btrfs_ref_generation(leaf
, ref
) != ref_generation
||
563 (ref_objectid
!= owner_objectid
&&
564 ref_objectid
!= BTRFS_MULTIPLE_OBJECTIDS
)) {
575 * updates all the backrefs that are pending on update_list for the
578 static int noinline
update_backrefs(struct btrfs_trans_handle
*trans
,
579 struct btrfs_root
*extent_root
,
580 struct btrfs_path
*path
,
581 struct list_head
*update_list
)
583 struct btrfs_key key
;
584 struct btrfs_extent_ref
*ref
;
585 struct btrfs_fs_info
*info
= extent_root
->fs_info
;
586 struct pending_extent_op
*op
;
587 struct extent_buffer
*leaf
;
589 struct list_head
*cur
= update_list
->next
;
591 u64 ref_root
= extent_root
->root_key
.objectid
;
593 op
= list_entry(cur
, struct pending_extent_op
, list
);
596 key
.objectid
= op
->bytenr
;
597 key
.type
= BTRFS_EXTENT_REF_KEY
;
598 key
.offset
= op
->orig_parent
;
600 ret
= btrfs_search_slot(trans
, extent_root
, &key
, path
, 0, 1);
603 leaf
= path
->nodes
[0];
606 ref
= btrfs_item_ptr(leaf
, path
->slots
[0], struct btrfs_extent_ref
);
608 ref_objectid
= btrfs_ref_objectid(leaf
, ref
);
610 if (btrfs_ref_root(leaf
, ref
) != ref_root
||
611 btrfs_ref_generation(leaf
, ref
) != op
->orig_generation
||
612 (ref_objectid
!= op
->level
&&
613 ref_objectid
!= BTRFS_MULTIPLE_OBJECTIDS
)) {
614 printk(KERN_ERR
"couldn't find %Lu, parent %Lu, root %Lu, "
615 "owner %u\n", op
->bytenr
, op
->orig_parent
,
616 ref_root
, op
->level
);
617 btrfs_print_leaf(extent_root
, leaf
);
621 key
.objectid
= op
->bytenr
;
622 key
.offset
= op
->parent
;
623 key
.type
= BTRFS_EXTENT_REF_KEY
;
624 ret
= btrfs_set_item_key_safe(trans
, extent_root
, path
, &key
);
626 ref
= btrfs_item_ptr(leaf
, path
->slots
[0], struct btrfs_extent_ref
);
627 btrfs_set_ref_generation(leaf
, ref
, op
->generation
);
631 list_del_init(&op
->list
);
632 unlock_extent(&info
->extent_ins
, op
->bytenr
,
633 op
->bytenr
+ op
->num_bytes
- 1, GFP_NOFS
);
636 if (cur
== update_list
) {
637 btrfs_mark_buffer_dirty(path
->nodes
[0]);
638 btrfs_release_path(extent_root
, path
);
642 op
= list_entry(cur
, struct pending_extent_op
, list
);
645 while (path
->slots
[0] < btrfs_header_nritems(leaf
)) {
646 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
647 if (key
.objectid
== op
->bytenr
&&
648 key
.type
== BTRFS_EXTENT_REF_KEY
)
653 btrfs_mark_buffer_dirty(path
->nodes
[0]);
654 btrfs_release_path(extent_root
, path
);
661 static int noinline
insert_extents(struct btrfs_trans_handle
*trans
,
662 struct btrfs_root
*extent_root
,
663 struct btrfs_path
*path
,
664 struct list_head
*insert_list
, int nr
)
666 struct btrfs_key
*keys
;
668 struct pending_extent_op
*op
;
669 struct extent_buffer
*leaf
;
670 struct list_head
*cur
= insert_list
->next
;
671 struct btrfs_fs_info
*info
= extent_root
->fs_info
;
672 u64 ref_root
= extent_root
->root_key
.objectid
;
673 int i
= 0, last
= 0, ret
;
679 keys
= kzalloc(total
* sizeof(struct btrfs_key
), GFP_NOFS
);
683 data_size
= kzalloc(total
* sizeof(u32
), GFP_NOFS
);
689 list_for_each_entry(op
, insert_list
, list
) {
690 keys
[i
].objectid
= op
->bytenr
;
691 keys
[i
].offset
= op
->num_bytes
;
692 keys
[i
].type
= BTRFS_EXTENT_ITEM_KEY
;
693 data_size
[i
] = sizeof(struct btrfs_extent_item
);
696 keys
[i
].objectid
= op
->bytenr
;
697 keys
[i
].offset
= op
->parent
;
698 keys
[i
].type
= BTRFS_EXTENT_REF_KEY
;
699 data_size
[i
] = sizeof(struct btrfs_extent_ref
);
703 op
= list_entry(cur
, struct pending_extent_op
, list
);
707 ret
= btrfs_insert_some_items(trans
, extent_root
, path
,
708 keys
+i
, data_size
+i
, total
-i
);
714 leaf
= path
->nodes
[0];
715 for (c
= 0; c
< ret
; c
++) {
716 int ref_first
= keys
[i
].type
== BTRFS_EXTENT_REF_KEY
;
719 * if the first item we inserted was a backref, then
720 * the EXTENT_ITEM will be the odd c's, else it will
723 if ((ref_first
&& (c
% 2)) ||
724 (!ref_first
&& !(c
% 2))) {
725 struct btrfs_extent_item
*itm
;
727 itm
= btrfs_item_ptr(leaf
, path
->slots
[0] + c
,
728 struct btrfs_extent_item
);
729 btrfs_set_extent_refs(path
->nodes
[0], itm
, 1);
732 struct btrfs_extent_ref
*ref
;
734 ref
= btrfs_item_ptr(leaf
, path
->slots
[0] + c
,
735 struct btrfs_extent_ref
);
736 btrfs_set_ref_root(leaf
, ref
, ref_root
);
737 btrfs_set_ref_generation(leaf
, ref
,
739 btrfs_set_ref_objectid(leaf
, ref
, op
->level
);
740 btrfs_set_ref_num_refs(leaf
, ref
, 1);
745 * using del to see when its ok to free up the
746 * pending_extent_op. In the case where we insert the
747 * last item on the list in order to help do batching
748 * we need to not free the extent op until we actually
749 * insert the extent_item
752 unlock_extent(&info
->extent_ins
, op
->bytenr
,
753 op
->bytenr
+ op
->num_bytes
- 1,
756 list_del_init(&op
->list
);
758 if (cur
!= insert_list
)
760 struct pending_extent_op
,
764 btrfs_mark_buffer_dirty(leaf
);
765 btrfs_release_path(extent_root
, path
);
768 * Ok backref's and items usually go right next to eachother,
769 * but if we could only insert 1 item that means that we
770 * inserted on the end of a leaf, and we have no idea what may
771 * be on the next leaf so we just play it safe. In order to
772 * try and help this case we insert the last thing on our
773 * insert list so hopefully it will end up being the last
774 * thing on the leaf and everything else will be before it,
775 * which will let us insert a whole bunch of items at the same
778 if (ret
== 1 && !last
&& (i
+ ret
< total
)) {
780 * last: where we will pick up the next time around
781 * i: our current key to insert, will be total - 1
782 * cur: the current op we are screwing with
787 cur
= insert_list
->prev
;
788 op
= list_entry(cur
, struct pending_extent_op
, list
);
791 * ok we successfully inserted the last item on the
792 * list, lets reset everything
794 * i: our current key to insert, so where we left off
796 * last: done with this
797 * cur: the op we are messing with
799 * total: since we inserted the last key, we need to
800 * decrement total so we dont overflow
806 cur
= insert_list
->next
;
807 op
= list_entry(cur
, struct pending_extent_op
,
822 static int noinline
insert_extent_backref(struct btrfs_trans_handle
*trans
,
823 struct btrfs_root
*root
,
824 struct btrfs_path
*path
,
825 u64 bytenr
, u64 parent
,
826 u64 ref_root
, u64 ref_generation
,
829 struct btrfs_key key
;
830 struct extent_buffer
*leaf
;
831 struct btrfs_extent_ref
*ref
;
835 key
.objectid
= bytenr
;
836 key
.type
= BTRFS_EXTENT_REF_KEY
;
839 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
, sizeof(*ref
));
841 leaf
= path
->nodes
[0];
842 ref
= btrfs_item_ptr(leaf
, path
->slots
[0],
843 struct btrfs_extent_ref
);
844 btrfs_set_ref_root(leaf
, ref
, ref_root
);
845 btrfs_set_ref_generation(leaf
, ref
, ref_generation
);
846 btrfs_set_ref_objectid(leaf
, ref
, owner_objectid
);
847 btrfs_set_ref_num_refs(leaf
, ref
, 1);
848 } else if (ret
== -EEXIST
) {
850 BUG_ON(owner_objectid
< BTRFS_FIRST_FREE_OBJECTID
);
851 leaf
= path
->nodes
[0];
852 ref
= btrfs_item_ptr(leaf
, path
->slots
[0],
853 struct btrfs_extent_ref
);
854 if (btrfs_ref_root(leaf
, ref
) != ref_root
||
855 btrfs_ref_generation(leaf
, ref
) != ref_generation
) {
861 num_refs
= btrfs_ref_num_refs(leaf
, ref
);
862 BUG_ON(num_refs
== 0);
863 btrfs_set_ref_num_refs(leaf
, ref
, num_refs
+ 1);
865 existing_owner
= btrfs_ref_objectid(leaf
, ref
);
866 if (existing_owner
!= owner_objectid
&&
867 existing_owner
!= BTRFS_MULTIPLE_OBJECTIDS
) {
868 btrfs_set_ref_objectid(leaf
, ref
,
869 BTRFS_MULTIPLE_OBJECTIDS
);
875 btrfs_mark_buffer_dirty(path
->nodes
[0]);
877 btrfs_release_path(root
, path
);
881 static int noinline
remove_extent_backref(struct btrfs_trans_handle
*trans
,
882 struct btrfs_root
*root
,
883 struct btrfs_path
*path
)
885 struct extent_buffer
*leaf
;
886 struct btrfs_extent_ref
*ref
;
890 leaf
= path
->nodes
[0];
891 ref
= btrfs_item_ptr(leaf
, path
->slots
[0], struct btrfs_extent_ref
);
892 num_refs
= btrfs_ref_num_refs(leaf
, ref
);
893 BUG_ON(num_refs
== 0);
896 ret
= btrfs_del_item(trans
, root
, path
);
898 btrfs_set_ref_num_refs(leaf
, ref
, num_refs
);
899 btrfs_mark_buffer_dirty(leaf
);
901 btrfs_release_path(root
, path
);
905 #ifdef BIO_RW_DISCARD
906 static void btrfs_issue_discard(struct block_device
*bdev
,
909 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,28)
910 blkdev_issue_discard(bdev
, start
>> 9, len
>> 9, GFP_KERNEL
);
912 blkdev_issue_discard(bdev
, start
>> 9, len
>> 9);
917 static int noinline
free_extents(struct btrfs_trans_handle
*trans
,
918 struct btrfs_root
*extent_root
,
919 struct list_head
*del_list
)
921 struct btrfs_fs_info
*info
= extent_root
->fs_info
;
922 struct btrfs_path
*path
;
923 struct btrfs_key key
, found_key
;
924 struct extent_buffer
*leaf
;
925 struct list_head
*cur
;
926 struct pending_extent_op
*op
;
927 struct btrfs_extent_item
*ei
;
928 int ret
, num_to_del
, extent_slot
= 0, found_extent
= 0;
932 path
= btrfs_alloc_path();
938 /* search for the backref for the current ref we want to delete */
939 cur
= del_list
->next
;
940 op
= list_entry(cur
, struct pending_extent_op
, list
);
941 ret
= lookup_extent_backref(trans
, extent_root
, path
, op
->bytenr
,
943 extent_root
->root_key
.objectid
,
944 op
->orig_generation
, op
->level
, 1);
946 printk("Unable to find backref byte nr %Lu root %Lu gen %Lu "
947 "owner %u\n", op
->bytenr
,
948 extent_root
->root_key
.objectid
, op
->orig_generation
,
950 btrfs_print_leaf(extent_root
, path
->nodes
[0]);
955 extent_slot
= path
->slots
[0];
960 * if we aren't the first item on the leaf we can move back one and see
961 * if our ref is right next to our extent item
963 if (likely(extent_slot
)) {
965 btrfs_item_key_to_cpu(path
->nodes
[0], &found_key
,
967 if (found_key
.objectid
== op
->bytenr
&&
968 found_key
.type
== BTRFS_EXTENT_ITEM_KEY
&&
969 found_key
.offset
== op
->num_bytes
) {
976 * if we didn't find the extent we need to delete the backref and then
977 * search for the extent item key so we can update its ref count
980 key
.objectid
= op
->bytenr
;
981 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
982 key
.offset
= op
->num_bytes
;
984 ret
= remove_extent_backref(trans
, extent_root
, path
);
986 btrfs_release_path(extent_root
, path
);
987 ret
= btrfs_search_slot(trans
, extent_root
, &key
, path
, -1, 1);
989 extent_slot
= path
->slots
[0];
992 /* this is where we update the ref count for the extent */
993 leaf
= path
->nodes
[0];
994 ei
= btrfs_item_ptr(leaf
, extent_slot
, struct btrfs_extent_item
);
995 refs
= btrfs_extent_refs(leaf
, ei
);
998 btrfs_set_extent_refs(leaf
, ei
, refs
);
1000 btrfs_mark_buffer_dirty(leaf
);
1003 * This extent needs deleting. The reason cur_slot is extent_slot +
1004 * num_to_del is because extent_slot points to the slot where the extent
1005 * is, and if the backref was not right next to the extent we will be
1006 * deleting at least 1 item, and will want to start searching at the
1007 * slot directly next to extent_slot. However if we did find the
1008 * backref next to the extent item them we will be deleting at least 2
1009 * items and will want to start searching directly after the ref slot
1012 struct list_head
*pos
, *n
, *end
;
1013 int cur_slot
= extent_slot
+num_to_del
;
1017 path
->slots
[0] = extent_slot
;
1018 bytes_freed
= op
->num_bytes
;
1020 mutex_lock(&info
->pinned_mutex
);
1021 ret
= pin_down_bytes(trans
, extent_root
, op
->bytenr
,
1022 op
->num_bytes
, op
->level
>=
1023 BTRFS_FIRST_FREE_OBJECTID
);
1024 mutex_unlock(&info
->pinned_mutex
);
1029 * we need to see if we can delete multiple things at once, so
1030 * start looping through the list of extents we are wanting to
1031 * delete and see if their extent/backref's are right next to
1032 * eachother and the extents only have 1 ref
1034 for (pos
= cur
->next
; pos
!= del_list
; pos
= pos
->next
) {
1035 struct pending_extent_op
*tmp
;
1037 tmp
= list_entry(pos
, struct pending_extent_op
, list
);
1039 /* we only want to delete extent+ref at this stage */
1040 if (cur_slot
>= btrfs_header_nritems(leaf
) - 1)
1043 btrfs_item_key_to_cpu(leaf
, &found_key
, cur_slot
);
1044 if (found_key
.objectid
!= tmp
->bytenr
||
1045 found_key
.type
!= BTRFS_EXTENT_ITEM_KEY
||
1046 found_key
.offset
!= tmp
->num_bytes
)
1049 /* check to make sure this extent only has one ref */
1050 ei
= btrfs_item_ptr(leaf
, cur_slot
,
1051 struct btrfs_extent_item
);
1052 if (btrfs_extent_refs(leaf
, ei
) != 1)
1055 btrfs_item_key_to_cpu(leaf
, &found_key
, cur_slot
+1);
1056 if (found_key
.objectid
!= tmp
->bytenr
||
1057 found_key
.type
!= BTRFS_EXTENT_REF_KEY
||
1058 found_key
.offset
!= tmp
->orig_parent
)
1062 * the ref is right next to the extent, we can set the
1063 * ref count to 0 since we will delete them both now
1065 btrfs_set_extent_refs(leaf
, ei
, 0);
1067 /* pin down the bytes for this extent */
1068 mutex_lock(&info
->pinned_mutex
);
1069 ret
= pin_down_bytes(trans
, extent_root
, tmp
->bytenr
,
1070 tmp
->num_bytes
, tmp
->level
>=
1071 BTRFS_FIRST_FREE_OBJECTID
);
1072 mutex_unlock(&info
->pinned_mutex
);
1076 * use the del field to tell if we need to go ahead and
1077 * free up the extent when we delete the item or not.
1080 bytes_freed
+= tmp
->num_bytes
;
1087 /* update the free space counters */
1088 spin_lock_irq(&info
->delalloc_lock
);
1089 super_used
= btrfs_super_bytes_used(&info
->super_copy
);
1090 btrfs_set_super_bytes_used(&info
->super_copy
,
1091 super_used
- bytes_freed
);
1092 spin_unlock_irq(&info
->delalloc_lock
);
1094 root_used
= btrfs_root_used(&extent_root
->root_item
);
1095 btrfs_set_root_used(&extent_root
->root_item
,
1096 root_used
- bytes_freed
);
1098 /* delete the items */
1099 ret
= btrfs_del_items(trans
, extent_root
, path
,
1100 path
->slots
[0], num_to_del
);
1104 * loop through the extents we deleted and do the cleanup work
1107 for (pos
= cur
, n
= pos
->next
; pos
!= end
;
1108 pos
= n
, n
= pos
->next
) {
1109 struct pending_extent_op
*tmp
;
1110 #ifdef BIO_RW_DISCARD
1112 struct btrfs_multi_bio
*multi
= NULL
;
1114 tmp
= list_entry(pos
, struct pending_extent_op
, list
);
1117 * remember tmp->del tells us wether or not we pinned
1120 ret
= update_block_group(trans
, extent_root
,
1121 tmp
->bytenr
, tmp
->num_bytes
, 0,
1125 #ifdef BIO_RW_DISCARD
1126 map_length
= tmp
->num_bytes
;
1127 ret
= btrfs_map_block(&info
->mapping_tree
, READ
,
1128 tmp
->bytenr
, &map_length
, &multi
,
1131 struct btrfs_bio_stripe
*stripe
;
1134 stripe
= multi
->stripes
;
1136 if (map_length
> tmp
->num_bytes
)
1137 map_length
= tmp
->num_bytes
;
1139 for (i
= 0; i
< multi
->num_stripes
;
1141 btrfs_issue_discard(stripe
->dev
->bdev
,
1147 list_del_init(&tmp
->list
);
1148 unlock_extent(&info
->extent_ins
, tmp
->bytenr
,
1149 tmp
->bytenr
+ tmp
->num_bytes
- 1,
1153 } else if (refs
&& found_extent
) {
1155 * the ref and extent were right next to eachother, but the
1156 * extent still has a ref, so just free the backref and keep
1159 ret
= remove_extent_backref(trans
, extent_root
, path
);
1162 list_del_init(&op
->list
);
1163 unlock_extent(&info
->extent_ins
, op
->bytenr
,
1164 op
->bytenr
+ op
->num_bytes
- 1, GFP_NOFS
);
1168 * the extent has multiple refs and the backref we were looking
1169 * for was not right next to it, so just unlock and go next,
1172 list_del_init(&op
->list
);
1173 unlock_extent(&info
->extent_ins
, op
->bytenr
,
1174 op
->bytenr
+ op
->num_bytes
- 1, GFP_NOFS
);
1178 btrfs_release_path(extent_root
, path
);
1179 if (!list_empty(del_list
))
1183 btrfs_free_path(path
);
1187 static int __btrfs_update_extent_ref(struct btrfs_trans_handle
*trans
,
1188 struct btrfs_root
*root
, u64 bytenr
,
1189 u64 orig_parent
, u64 parent
,
1190 u64 orig_root
, u64 ref_root
,
1191 u64 orig_generation
, u64 ref_generation
,
1195 struct btrfs_root
*extent_root
= root
->fs_info
->extent_root
;
1196 struct btrfs_path
*path
;
1198 if (root
== root
->fs_info
->extent_root
) {
1199 struct pending_extent_op
*extent_op
;
1202 BUG_ON(owner_objectid
>= BTRFS_MAX_LEVEL
);
1203 num_bytes
= btrfs_level_size(root
, (int)owner_objectid
);
1204 mutex_lock(&root
->fs_info
->extent_ins_mutex
);
1205 if (test_range_bit(&root
->fs_info
->extent_ins
, bytenr
,
1206 bytenr
+ num_bytes
- 1, EXTENT_WRITEBACK
, 0)) {
1208 ret
= get_state_private(&root
->fs_info
->extent_ins
,
1211 extent_op
= (struct pending_extent_op
*)
1212 (unsigned long)priv
;
1213 BUG_ON(extent_op
->parent
!= orig_parent
);
1214 BUG_ON(extent_op
->generation
!= orig_generation
);
1216 extent_op
->parent
= parent
;
1217 extent_op
->generation
= ref_generation
;
1219 extent_op
= kmalloc(sizeof(*extent_op
), GFP_NOFS
);
1222 extent_op
->type
= PENDING_BACKREF_UPDATE
;
1223 extent_op
->bytenr
= bytenr
;
1224 extent_op
->num_bytes
= num_bytes
;
1225 extent_op
->parent
= parent
;
1226 extent_op
->orig_parent
= orig_parent
;
1227 extent_op
->generation
= ref_generation
;
1228 extent_op
->orig_generation
= orig_generation
;
1229 extent_op
->level
= (int)owner_objectid
;
1230 INIT_LIST_HEAD(&extent_op
->list
);
1233 set_extent_bits(&root
->fs_info
->extent_ins
,
1234 bytenr
, bytenr
+ num_bytes
- 1,
1235 EXTENT_WRITEBACK
, GFP_NOFS
);
1236 set_state_private(&root
->fs_info
->extent_ins
,
1237 bytenr
, (unsigned long)extent_op
);
1239 mutex_unlock(&root
->fs_info
->extent_ins_mutex
);
1243 path
= btrfs_alloc_path();
1246 ret
= lookup_extent_backref(trans
, extent_root
, path
,
1247 bytenr
, orig_parent
, orig_root
,
1248 orig_generation
, owner_objectid
, 1);
1251 ret
= remove_extent_backref(trans
, extent_root
, path
);
1254 ret
= insert_extent_backref(trans
, extent_root
, path
, bytenr
,
1255 parent
, ref_root
, ref_generation
,
1258 finish_current_insert(trans
, extent_root
, 0);
1259 del_pending_extents(trans
, extent_root
, 0);
1261 btrfs_free_path(path
);
1265 int btrfs_update_extent_ref(struct btrfs_trans_handle
*trans
,
1266 struct btrfs_root
*root
, u64 bytenr
,
1267 u64 orig_parent
, u64 parent
,
1268 u64 ref_root
, u64 ref_generation
,
1272 if (ref_root
== BTRFS_TREE_LOG_OBJECTID
&&
1273 owner_objectid
< BTRFS_FIRST_FREE_OBJECTID
)
1275 ret
= __btrfs_update_extent_ref(trans
, root
, bytenr
, orig_parent
,
1276 parent
, ref_root
, ref_root
,
1277 ref_generation
, ref_generation
,
1282 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle
*trans
,
1283 struct btrfs_root
*root
, u64 bytenr
,
1284 u64 orig_parent
, u64 parent
,
1285 u64 orig_root
, u64 ref_root
,
1286 u64 orig_generation
, u64 ref_generation
,
1289 struct btrfs_path
*path
;
1291 struct btrfs_key key
;
1292 struct extent_buffer
*l
;
1293 struct btrfs_extent_item
*item
;
1296 path
= btrfs_alloc_path();
1301 key
.objectid
= bytenr
;
1302 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
1303 key
.offset
= (u64
)-1;
1305 ret
= btrfs_search_slot(trans
, root
->fs_info
->extent_root
, &key
, path
,
1309 BUG_ON(ret
== 0 || path
->slots
[0] == 0);
1314 btrfs_item_key_to_cpu(l
, &key
, path
->slots
[0]);
1315 if (key
.objectid
!= bytenr
) {
1316 btrfs_print_leaf(root
->fs_info
->extent_root
, path
->nodes
[0]);
1317 printk("wanted %Lu found %Lu\n", bytenr
, key
.objectid
);
1320 BUG_ON(key
.type
!= BTRFS_EXTENT_ITEM_KEY
);
1322 item
= btrfs_item_ptr(l
, path
->slots
[0], struct btrfs_extent_item
);
1323 refs
= btrfs_extent_refs(l
, item
);
1324 btrfs_set_extent_refs(l
, item
, refs
+ 1);
1325 btrfs_mark_buffer_dirty(path
->nodes
[0]);
1327 btrfs_release_path(root
->fs_info
->extent_root
, path
);
1330 ret
= insert_extent_backref(trans
, root
->fs_info
->extent_root
,
1331 path
, bytenr
, parent
,
1332 ref_root
, ref_generation
,
1335 finish_current_insert(trans
, root
->fs_info
->extent_root
, 0);
1336 del_pending_extents(trans
, root
->fs_info
->extent_root
, 0);
1338 btrfs_free_path(path
);
1342 int btrfs_inc_extent_ref(struct btrfs_trans_handle
*trans
,
1343 struct btrfs_root
*root
,
1344 u64 bytenr
, u64 num_bytes
, u64 parent
,
1345 u64 ref_root
, u64 ref_generation
,
1349 if (ref_root
== BTRFS_TREE_LOG_OBJECTID
&&
1350 owner_objectid
< BTRFS_FIRST_FREE_OBJECTID
)
1352 ret
= __btrfs_inc_extent_ref(trans
, root
, bytenr
, 0, parent
,
1353 0, ref_root
, 0, ref_generation
,
1358 int btrfs_extent_post_op(struct btrfs_trans_handle
*trans
,
1359 struct btrfs_root
*root
)
1361 finish_current_insert(trans
, root
->fs_info
->extent_root
, 1);
1362 del_pending_extents(trans
, root
->fs_info
->extent_root
, 1);
1366 int btrfs_lookup_extent_ref(struct btrfs_trans_handle
*trans
,
1367 struct btrfs_root
*root
, u64 bytenr
,
1368 u64 num_bytes
, u32
*refs
)
1370 struct btrfs_path
*path
;
1372 struct btrfs_key key
;
1373 struct extent_buffer
*l
;
1374 struct btrfs_extent_item
*item
;
1376 WARN_ON(num_bytes
< root
->sectorsize
);
1377 path
= btrfs_alloc_path();
1379 key
.objectid
= bytenr
;
1380 key
.offset
= num_bytes
;
1381 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
1382 ret
= btrfs_search_slot(trans
, root
->fs_info
->extent_root
, &key
, path
,
1387 btrfs_print_leaf(root
, path
->nodes
[0]);
1388 printk("failed to find block number %Lu\n", bytenr
);
1392 item
= btrfs_item_ptr(l
, path
->slots
[0], struct btrfs_extent_item
);
1393 *refs
= btrfs_extent_refs(l
, item
);
1395 btrfs_free_path(path
);
1399 int btrfs_cross_ref_exist(struct btrfs_trans_handle
*trans
,
1400 struct btrfs_root
*root
, u64 bytenr
)
1402 struct btrfs_root
*extent_root
= root
->fs_info
->extent_root
;
1403 struct btrfs_path
*path
;
1404 struct extent_buffer
*leaf
;
1405 struct btrfs_extent_ref
*ref_item
;
1406 struct btrfs_key key
;
1407 struct btrfs_key found_key
;
1413 key
.objectid
= bytenr
;
1414 key
.offset
= (u64
)-1;
1415 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
1417 path
= btrfs_alloc_path();
1418 ret
= btrfs_search_slot(NULL
, extent_root
, &key
, path
, 0, 0);
1424 if (path
->slots
[0] == 0)
1428 leaf
= path
->nodes
[0];
1429 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
1431 if (found_key
.objectid
!= bytenr
||
1432 found_key
.type
!= BTRFS_EXTENT_ITEM_KEY
)
1435 last_snapshot
= btrfs_root_last_snapshot(&root
->root_item
);
1437 leaf
= path
->nodes
[0];
1438 nritems
= btrfs_header_nritems(leaf
);
1439 if (path
->slots
[0] >= nritems
) {
1440 ret
= btrfs_next_leaf(extent_root
, path
);
1447 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
1448 if (found_key
.objectid
!= bytenr
)
1451 if (found_key
.type
!= BTRFS_EXTENT_REF_KEY
) {
1456 ref_item
= btrfs_item_ptr(leaf
, path
->slots
[0],
1457 struct btrfs_extent_ref
);
1458 ref_root
= btrfs_ref_root(leaf
, ref_item
);
1459 if (ref_root
!= root
->root_key
.objectid
&&
1460 ref_root
!= BTRFS_TREE_LOG_OBJECTID
) {
1464 if (btrfs_ref_generation(leaf
, ref_item
) <= last_snapshot
) {
1473 btrfs_free_path(path
);
1477 int btrfs_cache_ref(struct btrfs_trans_handle
*trans
, struct btrfs_root
*root
,
1478 struct extent_buffer
*buf
, u32 nr_extents
)
1480 struct btrfs_key key
;
1481 struct btrfs_file_extent_item
*fi
;
1489 if (!root
->ref_cows
)
1492 if (root
->root_key
.objectid
!= BTRFS_TREE_RELOC_OBJECTID
) {
1494 root_gen
= root
->root_key
.offset
;
1497 root_gen
= trans
->transid
- 1;
1500 level
= btrfs_header_level(buf
);
1501 nritems
= btrfs_header_nritems(buf
);
1504 struct btrfs_leaf_ref
*ref
;
1505 struct btrfs_extent_info
*info
;
1507 ref
= btrfs_alloc_leaf_ref(root
, nr_extents
);
1513 ref
->root_gen
= root_gen
;
1514 ref
->bytenr
= buf
->start
;
1515 ref
->owner
= btrfs_header_owner(buf
);
1516 ref
->generation
= btrfs_header_generation(buf
);
1517 ref
->nritems
= nr_extents
;
1518 info
= ref
->extents
;
1520 for (i
= 0; nr_extents
> 0 && i
< nritems
; i
++) {
1522 btrfs_item_key_to_cpu(buf
, &key
, i
);
1523 if (btrfs_key_type(&key
) != BTRFS_EXTENT_DATA_KEY
)
1525 fi
= btrfs_item_ptr(buf
, i
,
1526 struct btrfs_file_extent_item
);
1527 if (btrfs_file_extent_type(buf
, fi
) ==
1528 BTRFS_FILE_EXTENT_INLINE
)
1530 disk_bytenr
= btrfs_file_extent_disk_bytenr(buf
, fi
);
1531 if (disk_bytenr
== 0)
1534 info
->bytenr
= disk_bytenr
;
1536 btrfs_file_extent_disk_num_bytes(buf
, fi
);
1537 info
->objectid
= key
.objectid
;
1538 info
->offset
= key
.offset
;
1542 ret
= btrfs_add_leaf_ref(root
, ref
, shared
);
1543 if (ret
== -EEXIST
&& shared
) {
1544 struct btrfs_leaf_ref
*old
;
1545 old
= btrfs_lookup_leaf_ref(root
, ref
->bytenr
);
1547 btrfs_remove_leaf_ref(root
, old
);
1548 btrfs_free_leaf_ref(root
, old
);
1549 ret
= btrfs_add_leaf_ref(root
, ref
, shared
);
1552 btrfs_free_leaf_ref(root
, ref
);
1558 int btrfs_inc_ref(struct btrfs_trans_handle
*trans
, struct btrfs_root
*root
,
1559 struct extent_buffer
*orig_buf
, struct extent_buffer
*buf
,
1566 u64 orig_generation
;
1568 u32 nr_file_extents
= 0;
1569 struct btrfs_key key
;
1570 struct btrfs_file_extent_item
*fi
;
1575 int (*process_func
)(struct btrfs_trans_handle
*, struct btrfs_root
*,
1576 u64
, u64
, u64
, u64
, u64
, u64
, u64
, u64
);
1578 ref_root
= btrfs_header_owner(buf
);
1579 ref_generation
= btrfs_header_generation(buf
);
1580 orig_root
= btrfs_header_owner(orig_buf
);
1581 orig_generation
= btrfs_header_generation(orig_buf
);
1583 nritems
= btrfs_header_nritems(buf
);
1584 level
= btrfs_header_level(buf
);
1586 if (root
->ref_cows
) {
1587 process_func
= __btrfs_inc_extent_ref
;
1590 root
->root_key
.objectid
!= BTRFS_TREE_LOG_OBJECTID
)
1593 root
->root_key
.objectid
== BTRFS_TREE_LOG_OBJECTID
)
1595 process_func
= __btrfs_update_extent_ref
;
1598 for (i
= 0; i
< nritems
; i
++) {
1601 btrfs_item_key_to_cpu(buf
, &key
, i
);
1602 if (btrfs_key_type(&key
) != BTRFS_EXTENT_DATA_KEY
)
1604 fi
= btrfs_item_ptr(buf
, i
,
1605 struct btrfs_file_extent_item
);
1606 if (btrfs_file_extent_type(buf
, fi
) ==
1607 BTRFS_FILE_EXTENT_INLINE
)
1609 bytenr
= btrfs_file_extent_disk_bytenr(buf
, fi
);
1615 ret
= process_func(trans
, root
, bytenr
,
1616 orig_buf
->start
, buf
->start
,
1617 orig_root
, ref_root
,
1618 orig_generation
, ref_generation
,
1627 bytenr
= btrfs_node_blockptr(buf
, i
);
1628 ret
= process_func(trans
, root
, bytenr
,
1629 orig_buf
->start
, buf
->start
,
1630 orig_root
, ref_root
,
1631 orig_generation
, ref_generation
,
1643 *nr_extents
= nr_file_extents
;
1645 *nr_extents
= nritems
;
1653 int btrfs_update_ref(struct btrfs_trans_handle
*trans
,
1654 struct btrfs_root
*root
, struct extent_buffer
*orig_buf
,
1655 struct extent_buffer
*buf
, int start_slot
, int nr
)
1662 u64 orig_generation
;
1663 struct btrfs_key key
;
1664 struct btrfs_file_extent_item
*fi
;
1670 BUG_ON(start_slot
< 0);
1671 BUG_ON(start_slot
+ nr
> btrfs_header_nritems(buf
));
1673 ref_root
= btrfs_header_owner(buf
);
1674 ref_generation
= btrfs_header_generation(buf
);
1675 orig_root
= btrfs_header_owner(orig_buf
);
1676 orig_generation
= btrfs_header_generation(orig_buf
);
1677 level
= btrfs_header_level(buf
);
1679 if (!root
->ref_cows
) {
1681 root
->root_key
.objectid
!= BTRFS_TREE_LOG_OBJECTID
)
1684 root
->root_key
.objectid
== BTRFS_TREE_LOG_OBJECTID
)
1688 for (i
= 0, slot
= start_slot
; i
< nr
; i
++, slot
++) {
1691 btrfs_item_key_to_cpu(buf
, &key
, slot
);
1692 if (btrfs_key_type(&key
) != BTRFS_EXTENT_DATA_KEY
)
1694 fi
= btrfs_item_ptr(buf
, slot
,
1695 struct btrfs_file_extent_item
);
1696 if (btrfs_file_extent_type(buf
, fi
) ==
1697 BTRFS_FILE_EXTENT_INLINE
)
1699 bytenr
= btrfs_file_extent_disk_bytenr(buf
, fi
);
1702 ret
= __btrfs_update_extent_ref(trans
, root
, bytenr
,
1703 orig_buf
->start
, buf
->start
,
1704 orig_root
, ref_root
,
1705 orig_generation
, ref_generation
,
1710 bytenr
= btrfs_node_blockptr(buf
, slot
);
1711 ret
= __btrfs_update_extent_ref(trans
, root
, bytenr
,
1712 orig_buf
->start
, buf
->start
,
1713 orig_root
, ref_root
,
1714 orig_generation
, ref_generation
,
1726 static int write_one_cache_group(struct btrfs_trans_handle
*trans
,
1727 struct btrfs_root
*root
,
1728 struct btrfs_path
*path
,
1729 struct btrfs_block_group_cache
*cache
)
1733 struct btrfs_root
*extent_root
= root
->fs_info
->extent_root
;
1735 struct extent_buffer
*leaf
;
1737 ret
= btrfs_search_slot(trans
, extent_root
, &cache
->key
, path
, 0, 1);
1742 leaf
= path
->nodes
[0];
1743 bi
= btrfs_item_ptr_offset(leaf
, path
->slots
[0]);
1744 write_extent_buffer(leaf
, &cache
->item
, bi
, sizeof(cache
->item
));
1745 btrfs_mark_buffer_dirty(leaf
);
1746 btrfs_release_path(extent_root
, path
);
1748 finish_current_insert(trans
, extent_root
, 0);
1749 pending_ret
= del_pending_extents(trans
, extent_root
, 0);
1758 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle
*trans
,
1759 struct btrfs_root
*root
)
1761 struct btrfs_block_group_cache
*cache
, *entry
;
1765 struct btrfs_path
*path
;
1768 path
= btrfs_alloc_path();
1774 spin_lock(&root
->fs_info
->block_group_cache_lock
);
1775 for (n
= rb_first(&root
->fs_info
->block_group_cache_tree
);
1776 n
; n
= rb_next(n
)) {
1777 entry
= rb_entry(n
, struct btrfs_block_group_cache
,
1784 spin_unlock(&root
->fs_info
->block_group_cache_lock
);
1790 last
+= cache
->key
.offset
;
1792 err
= write_one_cache_group(trans
, root
,
1795 * if we fail to write the cache group, we want
1796 * to keep it marked dirty in hopes that a later
1804 btrfs_free_path(path
);
1808 static int update_space_info(struct btrfs_fs_info
*info
, u64 flags
,
1809 u64 total_bytes
, u64 bytes_used
,
1810 struct btrfs_space_info
**space_info
)
1812 struct btrfs_space_info
*found
;
1814 found
= __find_space_info(info
, flags
);
1816 spin_lock(&found
->lock
);
1817 found
->total_bytes
+= total_bytes
;
1818 found
->bytes_used
+= bytes_used
;
1820 spin_unlock(&found
->lock
);
1821 *space_info
= found
;
1824 found
= kzalloc(sizeof(*found
), GFP_NOFS
);
1828 list_add(&found
->list
, &info
->space_info
);
1829 INIT_LIST_HEAD(&found
->block_groups
);
1830 init_rwsem(&found
->groups_sem
);
1831 spin_lock_init(&found
->lock
);
1832 found
->flags
= flags
;
1833 found
->total_bytes
= total_bytes
;
1834 found
->bytes_used
= bytes_used
;
1835 found
->bytes_pinned
= 0;
1836 found
->bytes_reserved
= 0;
1837 found
->bytes_readonly
= 0;
1839 found
->force_alloc
= 0;
1840 *space_info
= found
;
1844 static void set_avail_alloc_bits(struct btrfs_fs_info
*fs_info
, u64 flags
)
1846 u64 extra_flags
= flags
& (BTRFS_BLOCK_GROUP_RAID0
|
1847 BTRFS_BLOCK_GROUP_RAID1
|
1848 BTRFS_BLOCK_GROUP_RAID10
|
1849 BTRFS_BLOCK_GROUP_DUP
);
1851 if (flags
& BTRFS_BLOCK_GROUP_DATA
)
1852 fs_info
->avail_data_alloc_bits
|= extra_flags
;
1853 if (flags
& BTRFS_BLOCK_GROUP_METADATA
)
1854 fs_info
->avail_metadata_alloc_bits
|= extra_flags
;
1855 if (flags
& BTRFS_BLOCK_GROUP_SYSTEM
)
1856 fs_info
->avail_system_alloc_bits
|= extra_flags
;
1860 static void set_block_group_readonly(struct btrfs_block_group_cache
*cache
)
1862 spin_lock(&cache
->space_info
->lock
);
1863 spin_lock(&cache
->lock
);
1865 cache
->space_info
->bytes_readonly
+= cache
->key
.offset
-
1866 btrfs_block_group_used(&cache
->item
);
1869 spin_unlock(&cache
->lock
);
1870 spin_unlock(&cache
->space_info
->lock
);
1873 u64
btrfs_reduce_alloc_profile(struct btrfs_root
*root
, u64 flags
)
1875 u64 num_devices
= root
->fs_info
->fs_devices
->rw_devices
;
1877 if (num_devices
== 1)
1878 flags
&= ~(BTRFS_BLOCK_GROUP_RAID1
| BTRFS_BLOCK_GROUP_RAID0
);
1879 if (num_devices
< 4)
1880 flags
&= ~BTRFS_BLOCK_GROUP_RAID10
;
1882 if ((flags
& BTRFS_BLOCK_GROUP_DUP
) &&
1883 (flags
& (BTRFS_BLOCK_GROUP_RAID1
|
1884 BTRFS_BLOCK_GROUP_RAID10
))) {
1885 flags
&= ~BTRFS_BLOCK_GROUP_DUP
;
1888 if ((flags
& BTRFS_BLOCK_GROUP_RAID1
) &&
1889 (flags
& BTRFS_BLOCK_GROUP_RAID10
)) {
1890 flags
&= ~BTRFS_BLOCK_GROUP_RAID1
;
1893 if ((flags
& BTRFS_BLOCK_GROUP_RAID0
) &&
1894 ((flags
& BTRFS_BLOCK_GROUP_RAID1
) |
1895 (flags
& BTRFS_BLOCK_GROUP_RAID10
) |
1896 (flags
& BTRFS_BLOCK_GROUP_DUP
)))
1897 flags
&= ~BTRFS_BLOCK_GROUP_RAID0
;
1901 static int do_chunk_alloc(struct btrfs_trans_handle
*trans
,
1902 struct btrfs_root
*extent_root
, u64 alloc_bytes
,
1903 u64 flags
, int force
)
1905 struct btrfs_space_info
*space_info
;
1909 mutex_lock(&extent_root
->fs_info
->chunk_mutex
);
1911 flags
= btrfs_reduce_alloc_profile(extent_root
, flags
);
1913 space_info
= __find_space_info(extent_root
->fs_info
, flags
);
1915 ret
= update_space_info(extent_root
->fs_info
, flags
,
1919 BUG_ON(!space_info
);
1921 spin_lock(&space_info
->lock
);
1922 if (space_info
->force_alloc
) {
1924 space_info
->force_alloc
= 0;
1926 if (space_info
->full
) {
1927 spin_unlock(&space_info
->lock
);
1931 thresh
= space_info
->total_bytes
- space_info
->bytes_readonly
;
1932 thresh
= div_factor(thresh
, 6);
1934 (space_info
->bytes_used
+ space_info
->bytes_pinned
+
1935 space_info
->bytes_reserved
+ alloc_bytes
) < thresh
) {
1936 spin_unlock(&space_info
->lock
);
1939 spin_unlock(&space_info
->lock
);
1941 ret
= btrfs_alloc_chunk(trans
, extent_root
, flags
);
1943 printk("space info full %Lu\n", flags
);
1944 space_info
->full
= 1;
1947 mutex_unlock(&extent_root
->fs_info
->chunk_mutex
);
1951 static int update_block_group(struct btrfs_trans_handle
*trans
,
1952 struct btrfs_root
*root
,
1953 u64 bytenr
, u64 num_bytes
, int alloc
,
1956 struct btrfs_block_group_cache
*cache
;
1957 struct btrfs_fs_info
*info
= root
->fs_info
;
1958 u64 total
= num_bytes
;
1963 cache
= btrfs_lookup_block_group(info
, bytenr
);
1966 byte_in_group
= bytenr
- cache
->key
.objectid
;
1967 WARN_ON(byte_in_group
> cache
->key
.offset
);
1969 spin_lock(&cache
->space_info
->lock
);
1970 spin_lock(&cache
->lock
);
1972 old_val
= btrfs_block_group_used(&cache
->item
);
1973 num_bytes
= min(total
, cache
->key
.offset
- byte_in_group
);
1975 old_val
+= num_bytes
;
1976 cache
->space_info
->bytes_used
+= num_bytes
;
1978 cache
->space_info
->bytes_readonly
-= num_bytes
;
1981 btrfs_set_block_group_used(&cache
->item
, old_val
);
1982 spin_unlock(&cache
->lock
);
1983 spin_unlock(&cache
->space_info
->lock
);
1985 old_val
-= num_bytes
;
1986 cache
->space_info
->bytes_used
-= num_bytes
;
1988 cache
->space_info
->bytes_readonly
+= num_bytes
;
1989 btrfs_set_block_group_used(&cache
->item
, old_val
);
1990 spin_unlock(&cache
->lock
);
1991 spin_unlock(&cache
->space_info
->lock
);
1994 ret
= btrfs_add_free_space(cache
, bytenr
,
2001 bytenr
+= num_bytes
;
2006 static u64
first_logical_byte(struct btrfs_root
*root
, u64 search_start
)
2008 struct btrfs_block_group_cache
*cache
;
2010 cache
= btrfs_lookup_first_block_group(root
->fs_info
, search_start
);
2014 return cache
->key
.objectid
;
2017 int btrfs_update_pinned_extents(struct btrfs_root
*root
,
2018 u64 bytenr
, u64 num
, int pin
)
2021 struct btrfs_block_group_cache
*cache
;
2022 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
2024 WARN_ON(!mutex_is_locked(&root
->fs_info
->pinned_mutex
));
2026 set_extent_dirty(&fs_info
->pinned_extents
,
2027 bytenr
, bytenr
+ num
- 1, GFP_NOFS
);
2029 clear_extent_dirty(&fs_info
->pinned_extents
,
2030 bytenr
, bytenr
+ num
- 1, GFP_NOFS
);
2033 cache
= btrfs_lookup_block_group(fs_info
, bytenr
);
2035 len
= min(num
, cache
->key
.offset
-
2036 (bytenr
- cache
->key
.objectid
));
2038 spin_lock(&cache
->space_info
->lock
);
2039 spin_lock(&cache
->lock
);
2040 cache
->pinned
+= len
;
2041 cache
->space_info
->bytes_pinned
+= len
;
2042 spin_unlock(&cache
->lock
);
2043 spin_unlock(&cache
->space_info
->lock
);
2044 fs_info
->total_pinned
+= len
;
2046 spin_lock(&cache
->space_info
->lock
);
2047 spin_lock(&cache
->lock
);
2048 cache
->pinned
-= len
;
2049 cache
->space_info
->bytes_pinned
-= len
;
2050 spin_unlock(&cache
->lock
);
2051 spin_unlock(&cache
->space_info
->lock
);
2052 fs_info
->total_pinned
-= len
;
2054 btrfs_add_free_space(cache
, bytenr
, len
);
2062 static int update_reserved_extents(struct btrfs_root
*root
,
2063 u64 bytenr
, u64 num
, int reserve
)
2066 struct btrfs_block_group_cache
*cache
;
2067 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
2070 cache
= btrfs_lookup_block_group(fs_info
, bytenr
);
2072 len
= min(num
, cache
->key
.offset
-
2073 (bytenr
- cache
->key
.objectid
));
2075 spin_lock(&cache
->space_info
->lock
);
2076 spin_lock(&cache
->lock
);
2078 cache
->reserved
+= len
;
2079 cache
->space_info
->bytes_reserved
+= len
;
2081 cache
->reserved
-= len
;
2082 cache
->space_info
->bytes_reserved
-= len
;
2084 spin_unlock(&cache
->lock
);
2085 spin_unlock(&cache
->space_info
->lock
);
2092 int btrfs_copy_pinned(struct btrfs_root
*root
, struct extent_io_tree
*copy
)
2097 struct extent_io_tree
*pinned_extents
= &root
->fs_info
->pinned_extents
;
2100 mutex_lock(&root
->fs_info
->pinned_mutex
);
2102 ret
= find_first_extent_bit(pinned_extents
, last
,
2103 &start
, &end
, EXTENT_DIRTY
);
2106 set_extent_dirty(copy
, start
, end
, GFP_NOFS
);
2109 mutex_unlock(&root
->fs_info
->pinned_mutex
);
2113 int btrfs_finish_extent_commit(struct btrfs_trans_handle
*trans
,
2114 struct btrfs_root
*root
,
2115 struct extent_io_tree
*unpin
)
2121 mutex_lock(&root
->fs_info
->pinned_mutex
);
2123 ret
= find_first_extent_bit(unpin
, 0, &start
, &end
,
2127 btrfs_update_pinned_extents(root
, start
, end
+ 1 - start
, 0);
2128 clear_extent_dirty(unpin
, start
, end
, GFP_NOFS
);
2129 if (need_resched()) {
2130 mutex_unlock(&root
->fs_info
->pinned_mutex
);
2132 mutex_lock(&root
->fs_info
->pinned_mutex
);
2135 mutex_unlock(&root
->fs_info
->pinned_mutex
);
2139 static int finish_current_insert(struct btrfs_trans_handle
*trans
,
2140 struct btrfs_root
*extent_root
, int all
)
2147 struct btrfs_fs_info
*info
= extent_root
->fs_info
;
2148 struct btrfs_path
*path
;
2149 struct pending_extent_op
*extent_op
, *tmp
;
2150 struct list_head insert_list
, update_list
;
2152 int num_inserts
= 0, max_inserts
;
2154 path
= btrfs_alloc_path();
2155 INIT_LIST_HEAD(&insert_list
);
2156 INIT_LIST_HEAD(&update_list
);
2158 max_inserts
= extent_root
->leafsize
/
2159 (2 * sizeof(struct btrfs_key
) + 2 * sizeof(struct btrfs_item
) +
2160 sizeof(struct btrfs_extent_ref
) +
2161 sizeof(struct btrfs_extent_item
));
2163 mutex_lock(&info
->extent_ins_mutex
);
2165 ret
= find_first_extent_bit(&info
->extent_ins
, search
, &start
,
2166 &end
, EXTENT_WRITEBACK
);
2168 if (skipped
&& all
&& !num_inserts
) {
2173 mutex_unlock(&info
->extent_ins_mutex
);
2177 ret
= try_lock_extent(&info
->extent_ins
, start
, end
, GFP_NOFS
);
2181 if (need_resched()) {
2182 mutex_unlock(&info
->extent_ins_mutex
);
2184 mutex_lock(&info
->extent_ins_mutex
);
2189 ret
= get_state_private(&info
->extent_ins
, start
, &priv
);
2191 extent_op
= (struct pending_extent_op
*)(unsigned long) priv
;
2193 if (extent_op
->type
== PENDING_EXTENT_INSERT
) {
2195 list_add_tail(&extent_op
->list
, &insert_list
);
2197 if (num_inserts
== max_inserts
) {
2198 mutex_unlock(&info
->extent_ins_mutex
);
2201 } else if (extent_op
->type
== PENDING_BACKREF_UPDATE
) {
2202 list_add_tail(&extent_op
->list
, &update_list
);
2210 * process the update list, clear the writeback bit for it, and if
2211 * somebody marked this thing for deletion then just unlock it and be
2212 * done, the free_extents will handle it
2214 mutex_lock(&info
->extent_ins_mutex
);
2215 list_for_each_entry_safe(extent_op
, tmp
, &update_list
, list
) {
2216 clear_extent_bits(&info
->extent_ins
, extent_op
->bytenr
,
2217 extent_op
->bytenr
+ extent_op
->num_bytes
- 1,
2218 EXTENT_WRITEBACK
, GFP_NOFS
);
2219 if (extent_op
->del
) {
2220 list_del_init(&extent_op
->list
);
2221 unlock_extent(&info
->extent_ins
, extent_op
->bytenr
,
2222 extent_op
->bytenr
+ extent_op
->num_bytes
2227 mutex_unlock(&info
->extent_ins_mutex
);
2230 * still have things left on the update list, go ahead an update
2233 if (!list_empty(&update_list
)) {
2234 ret
= update_backrefs(trans
, extent_root
, path
, &update_list
);
2239 * if no inserts need to be done, but we skipped some extents and we
2240 * need to make sure everything is cleaned then reset everything and
2241 * go back to the beginning
2243 if (!num_inserts
&& all
&& skipped
) {
2246 INIT_LIST_HEAD(&update_list
);
2247 INIT_LIST_HEAD(&insert_list
);
2249 } else if (!num_inserts
) {
2254 * process the insert extents list. Again if we are deleting this
2255 * extent, then just unlock it, pin down the bytes if need be, and be
2256 * done with it. Saves us from having to actually insert the extent
2257 * into the tree and then subsequently come along and delete it
2259 mutex_lock(&info
->extent_ins_mutex
);
2260 list_for_each_entry_safe(extent_op
, tmp
, &insert_list
, list
) {
2261 clear_extent_bits(&info
->extent_ins
, extent_op
->bytenr
,
2262 extent_op
->bytenr
+ extent_op
->num_bytes
- 1,
2263 EXTENT_WRITEBACK
, GFP_NOFS
);
2264 if (extent_op
->del
) {
2265 list_del_init(&extent_op
->list
);
2266 unlock_extent(&info
->extent_ins
, extent_op
->bytenr
,
2267 extent_op
->bytenr
+ extent_op
->num_bytes
2270 mutex_lock(&extent_root
->fs_info
->pinned_mutex
);
2271 ret
= pin_down_bytes(trans
, extent_root
,
2273 extent_op
->num_bytes
, 0);
2274 mutex_unlock(&extent_root
->fs_info
->pinned_mutex
);
2276 ret
= update_block_group(trans
, extent_root
,
2278 extent_op
->num_bytes
,
2285 mutex_unlock(&info
->extent_ins_mutex
);
2287 ret
= insert_extents(trans
, extent_root
, path
, &insert_list
,
2292 * if we broke out of the loop in order to insert stuff because we hit
2293 * the maximum number of inserts at a time we can handle, then loop
2294 * back and pick up where we left off
2296 if (num_inserts
== max_inserts
) {
2297 INIT_LIST_HEAD(&insert_list
);
2298 INIT_LIST_HEAD(&update_list
);
2304 * again, if we need to make absolutely sure there are no more pending
2305 * extent operations left and we know that we skipped some, go back to
2306 * the beginning and do it all again
2308 if (all
&& skipped
) {
2309 INIT_LIST_HEAD(&insert_list
);
2310 INIT_LIST_HEAD(&update_list
);
2317 btrfs_free_path(path
);
2321 static int pin_down_bytes(struct btrfs_trans_handle
*trans
,
2322 struct btrfs_root
*root
,
2323 u64 bytenr
, u64 num_bytes
, int is_data
)
2326 struct extent_buffer
*buf
;
2331 buf
= btrfs_find_tree_block(root
, bytenr
, num_bytes
);
2335 /* we can reuse a block if it hasn't been written
2336 * and it is from this transaction. We can't
2337 * reuse anything from the tree log root because
2338 * it has tiny sub-transactions.
2340 if (btrfs_buffer_uptodate(buf
, 0) &&
2341 btrfs_try_tree_lock(buf
)) {
2342 u64 header_owner
= btrfs_header_owner(buf
);
2343 u64 header_transid
= btrfs_header_generation(buf
);
2344 if (header_owner
!= BTRFS_TREE_LOG_OBJECTID
&&
2345 header_owner
!= BTRFS_TREE_RELOC_OBJECTID
&&
2346 header_transid
== trans
->transid
&&
2347 !btrfs_header_flag(buf
, BTRFS_HEADER_FLAG_WRITTEN
)) {
2348 clean_tree_block(NULL
, root
, buf
);
2349 btrfs_tree_unlock(buf
);
2350 free_extent_buffer(buf
);
2353 btrfs_tree_unlock(buf
);
2355 free_extent_buffer(buf
);
2357 btrfs_update_pinned_extents(root
, bytenr
, num_bytes
, 1);
2364 * remove an extent from the root, returns 0 on success
2366 static int __free_extent(struct btrfs_trans_handle
*trans
,
2367 struct btrfs_root
*root
,
2368 u64 bytenr
, u64 num_bytes
, u64 parent
,
2369 u64 root_objectid
, u64 ref_generation
,
2370 u64 owner_objectid
, int pin
, int mark_free
)
2372 struct btrfs_path
*path
;
2373 struct btrfs_key key
;
2374 struct btrfs_fs_info
*info
= root
->fs_info
;
2375 struct btrfs_root
*extent_root
= info
->extent_root
;
2376 struct extent_buffer
*leaf
;
2378 int extent_slot
= 0;
2379 int found_extent
= 0;
2381 struct btrfs_extent_item
*ei
;
2384 key
.objectid
= bytenr
;
2385 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
2386 key
.offset
= num_bytes
;
2387 path
= btrfs_alloc_path();
2392 ret
= lookup_extent_backref(trans
, extent_root
, path
,
2393 bytenr
, parent
, root_objectid
,
2394 ref_generation
, owner_objectid
, 1);
2396 struct btrfs_key found_key
;
2397 extent_slot
= path
->slots
[0];
2398 while(extent_slot
> 0) {
2400 btrfs_item_key_to_cpu(path
->nodes
[0], &found_key
,
2402 if (found_key
.objectid
!= bytenr
)
2404 if (found_key
.type
== BTRFS_EXTENT_ITEM_KEY
&&
2405 found_key
.offset
== num_bytes
) {
2409 if (path
->slots
[0] - extent_slot
> 5)
2412 if (!found_extent
) {
2413 ret
= remove_extent_backref(trans
, extent_root
, path
);
2415 btrfs_release_path(extent_root
, path
);
2416 ret
= btrfs_search_slot(trans
, extent_root
,
2419 printk(KERN_ERR
"umm, got %d back from search"
2420 ", was looking for %Lu\n", ret
,
2422 btrfs_print_leaf(extent_root
, path
->nodes
[0]);
2425 extent_slot
= path
->slots
[0];
2428 btrfs_print_leaf(extent_root
, path
->nodes
[0]);
2430 printk("Unable to find ref byte nr %Lu root %Lu "
2431 "gen %Lu owner %Lu\n", bytenr
,
2432 root_objectid
, ref_generation
, owner_objectid
);
2435 leaf
= path
->nodes
[0];
2436 ei
= btrfs_item_ptr(leaf
, extent_slot
,
2437 struct btrfs_extent_item
);
2438 refs
= btrfs_extent_refs(leaf
, ei
);
2441 btrfs_set_extent_refs(leaf
, ei
, refs
);
2443 btrfs_mark_buffer_dirty(leaf
);
2445 if (refs
== 0 && found_extent
&& path
->slots
[0] == extent_slot
+ 1) {
2446 struct btrfs_extent_ref
*ref
;
2447 ref
= btrfs_item_ptr(leaf
, path
->slots
[0],
2448 struct btrfs_extent_ref
);
2449 BUG_ON(btrfs_ref_num_refs(leaf
, ref
) != 1);
2450 /* if the back ref and the extent are next to each other
2451 * they get deleted below in one shot
2453 path
->slots
[0] = extent_slot
;
2455 } else if (found_extent
) {
2456 /* otherwise delete the extent back ref */
2457 ret
= remove_extent_backref(trans
, extent_root
, path
);
2459 /* if refs are 0, we need to setup the path for deletion */
2461 btrfs_release_path(extent_root
, path
);
2462 ret
= btrfs_search_slot(trans
, extent_root
, &key
, path
,
2471 #ifdef BIO_RW_DISCARD
2472 u64 map_length
= num_bytes
;
2473 struct btrfs_multi_bio
*multi
= NULL
;
2477 mutex_lock(&root
->fs_info
->pinned_mutex
);
2478 ret
= pin_down_bytes(trans
, root
, bytenr
, num_bytes
,
2479 owner_objectid
>= BTRFS_FIRST_FREE_OBJECTID
);
2480 mutex_unlock(&root
->fs_info
->pinned_mutex
);
2486 /* block accounting for super block */
2487 spin_lock_irq(&info
->delalloc_lock
);
2488 super_used
= btrfs_super_bytes_used(&info
->super_copy
);
2489 btrfs_set_super_bytes_used(&info
->super_copy
,
2490 super_used
- num_bytes
);
2491 spin_unlock_irq(&info
->delalloc_lock
);
2493 /* block accounting for root item */
2494 root_used
= btrfs_root_used(&root
->root_item
);
2495 btrfs_set_root_used(&root
->root_item
,
2496 root_used
- num_bytes
);
2497 ret
= btrfs_del_items(trans
, extent_root
, path
, path
->slots
[0],
2500 btrfs_release_path(extent_root
, path
);
2501 ret
= update_block_group(trans
, root
, bytenr
, num_bytes
, 0,
2505 #ifdef BIO_RW_DISCARD
2506 /* Tell the block device(s) that the sectors can be discarded */
2507 ret
= btrfs_map_block(&root
->fs_info
->mapping_tree
, READ
,
2508 bytenr
, &map_length
, &multi
, 0);
2510 struct btrfs_bio_stripe
*stripe
= multi
->stripes
;
2513 if (map_length
> num_bytes
)
2514 map_length
= num_bytes
;
2516 for (i
= 0; i
< multi
->num_stripes
; i
++, stripe
++) {
2517 btrfs_issue_discard(stripe
->dev
->bdev
,
2525 btrfs_free_path(path
);
2526 finish_current_insert(trans
, extent_root
, 0);
2531 * find all the blocks marked as pending in the radix tree and remove
2532 * them from the extent map
2534 static int del_pending_extents(struct btrfs_trans_handle
*trans
, struct
2535 btrfs_root
*extent_root
, int all
)
2543 int nr
= 0, skipped
= 0;
2544 struct extent_io_tree
*pending_del
;
2545 struct extent_io_tree
*extent_ins
;
2546 struct pending_extent_op
*extent_op
;
2547 struct btrfs_fs_info
*info
= extent_root
->fs_info
;
2548 struct list_head delete_list
;
2550 INIT_LIST_HEAD(&delete_list
);
2551 extent_ins
= &extent_root
->fs_info
->extent_ins
;
2552 pending_del
= &extent_root
->fs_info
->pending_del
;
2555 mutex_lock(&info
->extent_ins_mutex
);
2557 ret
= find_first_extent_bit(pending_del
, search
, &start
, &end
,
2560 if (all
&& skipped
&& !nr
) {
2564 mutex_unlock(&info
->extent_ins_mutex
);
2568 ret
= try_lock_extent(extent_ins
, start
, end
, GFP_NOFS
);
2573 if (need_resched()) {
2574 mutex_unlock(&info
->extent_ins_mutex
);
2576 mutex_lock(&info
->extent_ins_mutex
);
2583 ret
= get_state_private(pending_del
, start
, &priv
);
2585 extent_op
= (struct pending_extent_op
*)(unsigned long)priv
;
2587 clear_extent_bits(pending_del
, start
, end
, EXTENT_WRITEBACK
,
2589 if (!test_range_bit(extent_ins
, start
, end
,
2590 EXTENT_WRITEBACK
, 0)) {
2591 list_add_tail(&extent_op
->list
, &delete_list
);
2596 ret
= get_state_private(&info
->extent_ins
, start
,
2599 extent_op
= (struct pending_extent_op
*)
2600 (unsigned long)priv
;
2602 clear_extent_bits(&info
->extent_ins
, start
, end
,
2603 EXTENT_WRITEBACK
, GFP_NOFS
);
2605 if (extent_op
->type
== PENDING_BACKREF_UPDATE
) {
2606 list_add_tail(&extent_op
->list
, &delete_list
);
2612 mutex_lock(&extent_root
->fs_info
->pinned_mutex
);
2613 ret
= pin_down_bytes(trans
, extent_root
, start
,
2614 end
+ 1 - start
, 0);
2615 mutex_unlock(&extent_root
->fs_info
->pinned_mutex
);
2617 ret
= update_block_group(trans
, extent_root
, start
,
2618 end
+ 1 - start
, 0, ret
> 0);
2620 unlock_extent(extent_ins
, start
, end
, GFP_NOFS
);
2629 if (need_resched()) {
2630 mutex_unlock(&info
->extent_ins_mutex
);
2632 mutex_lock(&info
->extent_ins_mutex
);
2637 ret
= free_extents(trans
, extent_root
, &delete_list
);
2641 if (all
&& skipped
) {
2642 INIT_LIST_HEAD(&delete_list
);
2652 * remove an extent from the root, returns 0 on success
2654 static int __btrfs_free_extent(struct btrfs_trans_handle
*trans
,
2655 struct btrfs_root
*root
,
2656 u64 bytenr
, u64 num_bytes
, u64 parent
,
2657 u64 root_objectid
, u64 ref_generation
,
2658 u64 owner_objectid
, int pin
)
2660 struct btrfs_root
*extent_root
= root
->fs_info
->extent_root
;
2664 WARN_ON(num_bytes
< root
->sectorsize
);
2665 if (root
== extent_root
) {
2666 struct pending_extent_op
*extent_op
= NULL
;
2668 mutex_lock(&root
->fs_info
->extent_ins_mutex
);
2669 if (test_range_bit(&root
->fs_info
->extent_ins
, bytenr
,
2670 bytenr
+ num_bytes
- 1, EXTENT_WRITEBACK
, 0)) {
2672 ret
= get_state_private(&root
->fs_info
->extent_ins
,
2675 extent_op
= (struct pending_extent_op
*)
2676 (unsigned long)priv
;
2679 if (extent_op
->type
== PENDING_EXTENT_INSERT
) {
2680 mutex_unlock(&root
->fs_info
->extent_ins_mutex
);
2686 ref_generation
= extent_op
->orig_generation
;
2687 parent
= extent_op
->orig_parent
;
2690 extent_op
= kmalloc(sizeof(*extent_op
), GFP_NOFS
);
2693 extent_op
->type
= PENDING_EXTENT_DELETE
;
2694 extent_op
->bytenr
= bytenr
;
2695 extent_op
->num_bytes
= num_bytes
;
2696 extent_op
->parent
= parent
;
2697 extent_op
->orig_parent
= parent
;
2698 extent_op
->generation
= ref_generation
;
2699 extent_op
->orig_generation
= ref_generation
;
2700 extent_op
->level
= (int)owner_objectid
;
2701 INIT_LIST_HEAD(&extent_op
->list
);
2704 set_extent_bits(&root
->fs_info
->pending_del
,
2705 bytenr
, bytenr
+ num_bytes
- 1,
2706 EXTENT_WRITEBACK
, GFP_NOFS
);
2707 set_state_private(&root
->fs_info
->pending_del
,
2708 bytenr
, (unsigned long)extent_op
);
2709 mutex_unlock(&root
->fs_info
->extent_ins_mutex
);
2712 /* if metadata always pin */
2713 if (owner_objectid
< BTRFS_FIRST_FREE_OBJECTID
) {
2714 if (root
->root_key
.objectid
== BTRFS_TREE_LOG_OBJECTID
) {
2715 struct btrfs_block_group_cache
*cache
;
2717 /* btrfs_free_reserved_extent */
2718 cache
= btrfs_lookup_block_group(root
->fs_info
, bytenr
);
2720 btrfs_add_free_space(cache
, bytenr
, num_bytes
);
2721 update_reserved_extents(root
, bytenr
, num_bytes
, 0);
2727 /* if data pin when any transaction has committed this */
2728 if (ref_generation
!= trans
->transid
)
2731 ret
= __free_extent(trans
, root
, bytenr
, num_bytes
, parent
,
2732 root_objectid
, ref_generation
,
2733 owner_objectid
, pin
, pin
== 0);
2735 finish_current_insert(trans
, root
->fs_info
->extent_root
, 0);
2736 pending_ret
= del_pending_extents(trans
, root
->fs_info
->extent_root
, 0);
2737 return ret
? ret
: pending_ret
;
2740 int btrfs_free_extent(struct btrfs_trans_handle
*trans
,
2741 struct btrfs_root
*root
,
2742 u64 bytenr
, u64 num_bytes
, u64 parent
,
2743 u64 root_objectid
, u64 ref_generation
,
2744 u64 owner_objectid
, int pin
)
2748 ret
= __btrfs_free_extent(trans
, root
, bytenr
, num_bytes
, parent
,
2749 root_objectid
, ref_generation
,
2750 owner_objectid
, pin
);
2754 static u64
stripe_align(struct btrfs_root
*root
, u64 val
)
2756 u64 mask
= ((u64
)root
->stripesize
- 1);
2757 u64 ret
= (val
+ mask
) & ~mask
;
2762 * walks the btree of allocated extents and find a hole of a given size.
2763 * The key ins is changed to record the hole:
2764 * ins->objectid == block start
2765 * ins->flags = BTRFS_EXTENT_ITEM_KEY
2766 * ins->offset == number of blocks
2767 * Any available blocks before search_start are skipped.
2769 static int noinline
find_free_extent(struct btrfs_trans_handle
*trans
,
2770 struct btrfs_root
*orig_root
,
2771 u64 num_bytes
, u64 empty_size
,
2772 u64 search_start
, u64 search_end
,
2773 u64 hint_byte
, struct btrfs_key
*ins
,
2774 u64 exclude_start
, u64 exclude_nr
,
2778 struct btrfs_root
* root
= orig_root
->fs_info
->extent_root
;
2779 u64 total_needed
= num_bytes
;
2780 u64
*last_ptr
= NULL
;
2781 u64 last_wanted
= 0;
2782 struct btrfs_block_group_cache
*block_group
= NULL
;
2783 int chunk_alloc_done
= 0;
2784 int empty_cluster
= 2 * 1024 * 1024;
2785 int allowed_chunk_alloc
= 0;
2786 struct list_head
*head
= NULL
, *cur
= NULL
;
2789 struct btrfs_space_info
*space_info
;
2791 WARN_ON(num_bytes
< root
->sectorsize
);
2792 btrfs_set_key_type(ins
, BTRFS_EXTENT_ITEM_KEY
);
2796 if (orig_root
->ref_cows
|| empty_size
)
2797 allowed_chunk_alloc
= 1;
2799 if (data
& BTRFS_BLOCK_GROUP_METADATA
) {
2800 last_ptr
= &root
->fs_info
->last_alloc
;
2801 empty_cluster
= 64 * 1024;
2804 if ((data
& BTRFS_BLOCK_GROUP_DATA
) && btrfs_test_opt(root
, SSD
))
2805 last_ptr
= &root
->fs_info
->last_data_alloc
;
2809 hint_byte
= *last_ptr
;
2810 last_wanted
= *last_ptr
;
2812 empty_size
+= empty_cluster
;
2816 search_start
= max(search_start
, first_logical_byte(root
, 0));
2817 search_start
= max(search_start
, hint_byte
);
2819 if (last_wanted
&& search_start
!= last_wanted
) {
2821 empty_size
+= empty_cluster
;
2824 total_needed
+= empty_size
;
2825 block_group
= btrfs_lookup_block_group(root
->fs_info
, search_start
);
2827 block_group
= btrfs_lookup_first_block_group(root
->fs_info
,
2829 space_info
= __find_space_info(root
->fs_info
, data
);
2831 down_read(&space_info
->groups_sem
);
2833 struct btrfs_free_space
*free_space
;
2835 * the only way this happens if our hint points to a block
2836 * group thats not of the proper type, while looping this
2837 * should never happen
2843 goto new_group_no_lock
;
2845 if (unlikely(!block_group
->cached
)) {
2846 mutex_lock(&block_group
->cache_mutex
);
2847 ret
= cache_block_group(root
, block_group
);
2848 mutex_unlock(&block_group
->cache_mutex
);
2853 mutex_lock(&block_group
->alloc_mutex
);
2854 if (unlikely(!block_group_bits(block_group
, data
)))
2857 if (unlikely(block_group
->ro
))
2860 free_space
= btrfs_find_free_space(block_group
, search_start
,
2863 u64 start
= block_group
->key
.objectid
;
2864 u64 end
= block_group
->key
.objectid
+
2865 block_group
->key
.offset
;
2867 search_start
= stripe_align(root
, free_space
->offset
);
2869 /* move on to the next group */
2870 if (search_start
+ num_bytes
>= search_end
)
2873 /* move on to the next group */
2874 if (search_start
+ num_bytes
> end
)
2877 if (last_wanted
&& search_start
!= last_wanted
) {
2878 total_needed
+= empty_cluster
;
2879 empty_size
+= empty_cluster
;
2882 * if search_start is still in this block group
2883 * then we just re-search this block group
2885 if (search_start
>= start
&&
2886 search_start
< end
) {
2887 mutex_unlock(&block_group
->alloc_mutex
);
2891 /* else we go to the next block group */
2895 if (exclude_nr
> 0 &&
2896 (search_start
+ num_bytes
> exclude_start
&&
2897 search_start
< exclude_start
+ exclude_nr
)) {
2898 search_start
= exclude_start
+ exclude_nr
;
2900 * if search_start is still in this block group
2901 * then we just re-search this block group
2903 if (search_start
>= start
&&
2904 search_start
< end
) {
2905 mutex_unlock(&block_group
->alloc_mutex
);
2910 /* else we go to the next block group */
2914 ins
->objectid
= search_start
;
2915 ins
->offset
= num_bytes
;
2917 btrfs_remove_free_space_lock(block_group
, search_start
,
2919 /* we are all good, lets return */
2920 mutex_unlock(&block_group
->alloc_mutex
);
2924 mutex_unlock(&block_group
->alloc_mutex
);
2926 /* don't try to compare new allocations against the
2927 * last allocation any more
2932 * Here's how this works.
2933 * loop == 0: we were searching a block group via a hint
2934 * and didn't find anything, so we start at
2935 * the head of the block groups and keep searching
2936 * loop == 1: we're searching through all of the block groups
2937 * if we hit the head again we have searched
2938 * all of the block groups for this space and we
2939 * need to try and allocate, if we cant error out.
2940 * loop == 2: we allocated more space and are looping through
2941 * all of the block groups again.
2944 head
= &space_info
->block_groups
;
2947 } else if (loop
== 1 && cur
== head
) {
2950 /* at this point we give up on the empty_size
2951 * allocations and just try to allocate the min
2954 * The extra_loop field was set if an empty_size
2955 * allocation was attempted above, and if this
2956 * is try we need to try the loop again without
2957 * the additional empty_size.
2959 total_needed
-= empty_size
;
2961 keep_going
= extra_loop
;
2964 if (allowed_chunk_alloc
&& !chunk_alloc_done
) {
2965 up_read(&space_info
->groups_sem
);
2966 ret
= do_chunk_alloc(trans
, root
, num_bytes
+
2967 2 * 1024 * 1024, data
, 1);
2968 down_read(&space_info
->groups_sem
);
2971 head
= &space_info
->block_groups
;
2973 * we've allocated a new chunk, keep
2977 chunk_alloc_done
= 1;
2978 } else if (!allowed_chunk_alloc
) {
2979 space_info
->force_alloc
= 1;
2988 } else if (cur
== head
) {
2992 block_group
= list_entry(cur
, struct btrfs_block_group_cache
,
2994 search_start
= block_group
->key
.objectid
;
2998 /* we found what we needed */
2999 if (ins
->objectid
) {
3000 if (!(data
& BTRFS_BLOCK_GROUP_DATA
))
3001 trans
->block_group
= block_group
;
3004 *last_ptr
= ins
->objectid
+ ins
->offset
;
3007 printk(KERN_ERR
"we were searching for %Lu bytes, num_bytes %Lu,"
3008 " loop %d, allowed_alloc %d\n", total_needed
, num_bytes
,
3009 loop
, allowed_chunk_alloc
);
3013 up_read(&space_info
->groups_sem
);
3017 static void dump_space_info(struct btrfs_space_info
*info
, u64 bytes
)
3019 struct btrfs_block_group_cache
*cache
;
3020 struct list_head
*l
;
3022 printk(KERN_INFO
"space_info has %Lu free, is %sfull\n",
3023 info
->total_bytes
- info
->bytes_used
- info
->bytes_pinned
-
3024 info
->bytes_reserved
, (info
->full
) ? "" : "not ");
3026 down_read(&info
->groups_sem
);
3027 list_for_each(l
, &info
->block_groups
) {
3028 cache
= list_entry(l
, struct btrfs_block_group_cache
, list
);
3029 spin_lock(&cache
->lock
);
3030 printk(KERN_INFO
"block group %Lu has %Lu bytes, %Lu used "
3031 "%Lu pinned %Lu reserved\n",
3032 cache
->key
.objectid
, cache
->key
.offset
,
3033 btrfs_block_group_used(&cache
->item
),
3034 cache
->pinned
, cache
->reserved
);
3035 btrfs_dump_free_space(cache
, bytes
);
3036 spin_unlock(&cache
->lock
);
3038 up_read(&info
->groups_sem
);
3041 static int __btrfs_reserve_extent(struct btrfs_trans_handle
*trans
,
3042 struct btrfs_root
*root
,
3043 u64 num_bytes
, u64 min_alloc_size
,
3044 u64 empty_size
, u64 hint_byte
,
3045 u64 search_end
, struct btrfs_key
*ins
,
3049 u64 search_start
= 0;
3051 struct btrfs_fs_info
*info
= root
->fs_info
;
3054 alloc_profile
= info
->avail_data_alloc_bits
&
3055 info
->data_alloc_profile
;
3056 data
= BTRFS_BLOCK_GROUP_DATA
| alloc_profile
;
3057 } else if (root
== root
->fs_info
->chunk_root
) {
3058 alloc_profile
= info
->avail_system_alloc_bits
&
3059 info
->system_alloc_profile
;
3060 data
= BTRFS_BLOCK_GROUP_SYSTEM
| alloc_profile
;
3062 alloc_profile
= info
->avail_metadata_alloc_bits
&
3063 info
->metadata_alloc_profile
;
3064 data
= BTRFS_BLOCK_GROUP_METADATA
| alloc_profile
;
3067 data
= btrfs_reduce_alloc_profile(root
, data
);
3069 * the only place that sets empty_size is btrfs_realloc_node, which
3070 * is not called recursively on allocations
3072 if (empty_size
|| root
->ref_cows
) {
3073 if (!(data
& BTRFS_BLOCK_GROUP_METADATA
)) {
3074 ret
= do_chunk_alloc(trans
, root
->fs_info
->extent_root
,
3076 BTRFS_BLOCK_GROUP_METADATA
|
3077 (info
->metadata_alloc_profile
&
3078 info
->avail_metadata_alloc_bits
), 0);
3080 ret
= do_chunk_alloc(trans
, root
->fs_info
->extent_root
,
3081 num_bytes
+ 2 * 1024 * 1024, data
, 0);
3084 WARN_ON(num_bytes
< root
->sectorsize
);
3085 ret
= find_free_extent(trans
, root
, num_bytes
, empty_size
,
3086 search_start
, search_end
, hint_byte
, ins
,
3087 trans
->alloc_exclude_start
,
3088 trans
->alloc_exclude_nr
, data
);
3090 if (ret
== -ENOSPC
&& num_bytes
> min_alloc_size
) {
3091 num_bytes
= num_bytes
>> 1;
3092 num_bytes
= num_bytes
& ~(root
->sectorsize
- 1);
3093 num_bytes
= max(num_bytes
, min_alloc_size
);
3094 do_chunk_alloc(trans
, root
->fs_info
->extent_root
,
3095 num_bytes
, data
, 1);
3099 struct btrfs_space_info
*sinfo
;
3101 sinfo
= __find_space_info(root
->fs_info
, data
);
3102 printk("allocation failed flags %Lu, wanted %Lu\n",
3104 dump_space_info(sinfo
, num_bytes
);
3111 int btrfs_free_reserved_extent(struct btrfs_root
*root
, u64 start
, u64 len
)
3113 struct btrfs_block_group_cache
*cache
;
3115 cache
= btrfs_lookup_block_group(root
->fs_info
, start
);
3117 printk(KERN_ERR
"Unable to find block group for %Lu\n", start
);
3120 btrfs_add_free_space(cache
, start
, len
);
3121 update_reserved_extents(root
, start
, len
, 0);
3125 int btrfs_reserve_extent(struct btrfs_trans_handle
*trans
,
3126 struct btrfs_root
*root
,
3127 u64 num_bytes
, u64 min_alloc_size
,
3128 u64 empty_size
, u64 hint_byte
,
3129 u64 search_end
, struct btrfs_key
*ins
,
3133 ret
= __btrfs_reserve_extent(trans
, root
, num_bytes
, min_alloc_size
,
3134 empty_size
, hint_byte
, search_end
, ins
,
3136 update_reserved_extents(root
, ins
->objectid
, ins
->offset
, 1);
3140 static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle
*trans
,
3141 struct btrfs_root
*root
, u64 parent
,
3142 u64 root_objectid
, u64 ref_generation
,
3143 u64 owner
, struct btrfs_key
*ins
)
3149 u64 num_bytes
= ins
->offset
;
3151 struct btrfs_fs_info
*info
= root
->fs_info
;
3152 struct btrfs_root
*extent_root
= info
->extent_root
;
3153 struct btrfs_extent_item
*extent_item
;
3154 struct btrfs_extent_ref
*ref
;
3155 struct btrfs_path
*path
;
3156 struct btrfs_key keys
[2];
3159 parent
= ins
->objectid
;
3161 /* block accounting for super block */
3162 spin_lock_irq(&info
->delalloc_lock
);
3163 super_used
= btrfs_super_bytes_used(&info
->super_copy
);
3164 btrfs_set_super_bytes_used(&info
->super_copy
, super_used
+ num_bytes
);
3165 spin_unlock_irq(&info
->delalloc_lock
);
3167 /* block accounting for root item */
3168 root_used
= btrfs_root_used(&root
->root_item
);
3169 btrfs_set_root_used(&root
->root_item
, root_used
+ num_bytes
);
3171 if (root
== extent_root
) {
3172 struct pending_extent_op
*extent_op
;
3174 extent_op
= kmalloc(sizeof(*extent_op
), GFP_NOFS
);
3177 extent_op
->type
= PENDING_EXTENT_INSERT
;
3178 extent_op
->bytenr
= ins
->objectid
;
3179 extent_op
->num_bytes
= ins
->offset
;
3180 extent_op
->parent
= parent
;
3181 extent_op
->orig_parent
= 0;
3182 extent_op
->generation
= ref_generation
;
3183 extent_op
->orig_generation
= 0;
3184 extent_op
->level
= (int)owner
;
3185 INIT_LIST_HEAD(&extent_op
->list
);
3188 mutex_lock(&root
->fs_info
->extent_ins_mutex
);
3189 set_extent_bits(&root
->fs_info
->extent_ins
, ins
->objectid
,
3190 ins
->objectid
+ ins
->offset
- 1,
3191 EXTENT_WRITEBACK
, GFP_NOFS
);
3192 set_state_private(&root
->fs_info
->extent_ins
,
3193 ins
->objectid
, (unsigned long)extent_op
);
3194 mutex_unlock(&root
->fs_info
->extent_ins_mutex
);
3198 memcpy(&keys
[0], ins
, sizeof(*ins
));
3199 keys
[1].objectid
= ins
->objectid
;
3200 keys
[1].type
= BTRFS_EXTENT_REF_KEY
;
3201 keys
[1].offset
= parent
;
3202 sizes
[0] = sizeof(*extent_item
);
3203 sizes
[1] = sizeof(*ref
);
3205 path
= btrfs_alloc_path();
3208 ret
= btrfs_insert_empty_items(trans
, extent_root
, path
, keys
,
3212 extent_item
= btrfs_item_ptr(path
->nodes
[0], path
->slots
[0],
3213 struct btrfs_extent_item
);
3214 btrfs_set_extent_refs(path
->nodes
[0], extent_item
, 1);
3215 ref
= btrfs_item_ptr(path
->nodes
[0], path
->slots
[0] + 1,
3216 struct btrfs_extent_ref
);
3218 btrfs_set_ref_root(path
->nodes
[0], ref
, root_objectid
);
3219 btrfs_set_ref_generation(path
->nodes
[0], ref
, ref_generation
);
3220 btrfs_set_ref_objectid(path
->nodes
[0], ref
, owner
);
3221 btrfs_set_ref_num_refs(path
->nodes
[0], ref
, 1);
3223 btrfs_mark_buffer_dirty(path
->nodes
[0]);
3225 trans
->alloc_exclude_start
= 0;
3226 trans
->alloc_exclude_nr
= 0;
3227 btrfs_free_path(path
);
3228 finish_current_insert(trans
, extent_root
, 0);
3229 pending_ret
= del_pending_extents(trans
, extent_root
, 0);
3239 ret
= update_block_group(trans
, root
, ins
->objectid
, ins
->offset
, 1, 0);
3241 printk("update block group failed for %Lu %Lu\n",
3242 ins
->objectid
, ins
->offset
);
3249 int btrfs_alloc_reserved_extent(struct btrfs_trans_handle
*trans
,
3250 struct btrfs_root
*root
, u64 parent
,
3251 u64 root_objectid
, u64 ref_generation
,
3252 u64 owner
, struct btrfs_key
*ins
)
3256 if (root_objectid
== BTRFS_TREE_LOG_OBJECTID
)
3258 ret
= __btrfs_alloc_reserved_extent(trans
, root
, parent
, root_objectid
,
3259 ref_generation
, owner
, ins
);
3260 update_reserved_extents(root
, ins
->objectid
, ins
->offset
, 0);
3265 * this is used by the tree logging recovery code. It records that
3266 * an extent has been allocated and makes sure to clear the free
3267 * space cache bits as well
3269 int btrfs_alloc_logged_extent(struct btrfs_trans_handle
*trans
,
3270 struct btrfs_root
*root
, u64 parent
,
3271 u64 root_objectid
, u64 ref_generation
,
3272 u64 owner
, struct btrfs_key
*ins
)
3275 struct btrfs_block_group_cache
*block_group
;
3277 block_group
= btrfs_lookup_block_group(root
->fs_info
, ins
->objectid
);
3278 mutex_lock(&block_group
->cache_mutex
);
3279 cache_block_group(root
, block_group
);
3280 mutex_unlock(&block_group
->cache_mutex
);
3282 ret
= btrfs_remove_free_space(block_group
, ins
->objectid
,
3285 ret
= __btrfs_alloc_reserved_extent(trans
, root
, parent
, root_objectid
,
3286 ref_generation
, owner
, ins
);
3291 * finds a free extent and does all the dirty work required for allocation
3292 * returns the key for the extent through ins, and a tree buffer for
3293 * the first block of the extent through buf.
3295 * returns 0 if everything worked, non-zero otherwise.
3297 int btrfs_alloc_extent(struct btrfs_trans_handle
*trans
,
3298 struct btrfs_root
*root
,
3299 u64 num_bytes
, u64 parent
, u64 min_alloc_size
,
3300 u64 root_objectid
, u64 ref_generation
,
3301 u64 owner_objectid
, u64 empty_size
, u64 hint_byte
,
3302 u64 search_end
, struct btrfs_key
*ins
, u64 data
)
3306 ret
= __btrfs_reserve_extent(trans
, root
, num_bytes
,
3307 min_alloc_size
, empty_size
, hint_byte
,
3308 search_end
, ins
, data
);
3310 if (root_objectid
!= BTRFS_TREE_LOG_OBJECTID
) {
3311 ret
= __btrfs_alloc_reserved_extent(trans
, root
, parent
,
3312 root_objectid
, ref_generation
,
3313 owner_objectid
, ins
);
3317 update_reserved_extents(root
, ins
->objectid
, ins
->offset
, 1);
3322 struct extent_buffer
*btrfs_init_new_buffer(struct btrfs_trans_handle
*trans
,
3323 struct btrfs_root
*root
,
3324 u64 bytenr
, u32 blocksize
)
3326 struct extent_buffer
*buf
;
3328 buf
= btrfs_find_create_tree_block(root
, bytenr
, blocksize
);
3330 return ERR_PTR(-ENOMEM
);
3331 btrfs_set_header_generation(buf
, trans
->transid
);
3332 btrfs_tree_lock(buf
);
3333 clean_tree_block(trans
, root
, buf
);
3334 btrfs_set_buffer_uptodate(buf
);
3335 if (root
->root_key
.objectid
== BTRFS_TREE_LOG_OBJECTID
) {
3336 set_extent_dirty(&root
->dirty_log_pages
, buf
->start
,
3337 buf
->start
+ buf
->len
- 1, GFP_NOFS
);
3339 set_extent_dirty(&trans
->transaction
->dirty_pages
, buf
->start
,
3340 buf
->start
+ buf
->len
- 1, GFP_NOFS
);
3342 trans
->blocks_used
++;
3347 * helper function to allocate a block for a given tree
3348 * returns the tree buffer or NULL.
3350 struct extent_buffer
*btrfs_alloc_free_block(struct btrfs_trans_handle
*trans
,
3351 struct btrfs_root
*root
,
3352 u32 blocksize
, u64 parent
,
3359 struct btrfs_key ins
;
3361 struct extent_buffer
*buf
;
3363 ret
= btrfs_alloc_extent(trans
, root
, blocksize
, parent
, blocksize
,
3364 root_objectid
, ref_generation
, level
,
3365 empty_size
, hint
, (u64
)-1, &ins
, 0);
3368 return ERR_PTR(ret
);
3371 buf
= btrfs_init_new_buffer(trans
, root
, ins
.objectid
, blocksize
);
3375 int btrfs_drop_leaf_ref(struct btrfs_trans_handle
*trans
,
3376 struct btrfs_root
*root
, struct extent_buffer
*leaf
)
3379 u64 leaf_generation
;
3380 struct btrfs_key key
;
3381 struct btrfs_file_extent_item
*fi
;
3386 BUG_ON(!btrfs_is_leaf(leaf
));
3387 nritems
= btrfs_header_nritems(leaf
);
3388 leaf_owner
= btrfs_header_owner(leaf
);
3389 leaf_generation
= btrfs_header_generation(leaf
);
3391 for (i
= 0; i
< nritems
; i
++) {
3395 btrfs_item_key_to_cpu(leaf
, &key
, i
);
3396 if (btrfs_key_type(&key
) != BTRFS_EXTENT_DATA_KEY
)
3398 fi
= btrfs_item_ptr(leaf
, i
, struct btrfs_file_extent_item
);
3399 if (btrfs_file_extent_type(leaf
, fi
) ==
3400 BTRFS_FILE_EXTENT_INLINE
)
3403 * FIXME make sure to insert a trans record that
3404 * repeats the snapshot del on crash
3406 disk_bytenr
= btrfs_file_extent_disk_bytenr(leaf
, fi
);
3407 if (disk_bytenr
== 0)
3410 ret
= __btrfs_free_extent(trans
, root
, disk_bytenr
,
3411 btrfs_file_extent_disk_num_bytes(leaf
, fi
),
3412 leaf
->start
, leaf_owner
, leaf_generation
,
3416 atomic_inc(&root
->fs_info
->throttle_gen
);
3417 wake_up(&root
->fs_info
->transaction_throttle
);
3423 static int noinline
cache_drop_leaf_ref(struct btrfs_trans_handle
*trans
,
3424 struct btrfs_root
*root
,
3425 struct btrfs_leaf_ref
*ref
)
3429 struct btrfs_extent_info
*info
= ref
->extents
;
3431 for (i
= 0; i
< ref
->nritems
; i
++) {
3432 ret
= __btrfs_free_extent(trans
, root
, info
->bytenr
,
3433 info
->num_bytes
, ref
->bytenr
,
3434 ref
->owner
, ref
->generation
,
3437 atomic_inc(&root
->fs_info
->throttle_gen
);
3438 wake_up(&root
->fs_info
->transaction_throttle
);
3448 static int drop_snap_lookup_refcount(struct btrfs_root
*root
, u64 start
, u64 len
,
3453 ret
= btrfs_lookup_extent_ref(NULL
, root
, start
, len
, refs
);
3456 #if 0 // some debugging code in case we see problems here
3457 /* if the refs count is one, it won't get increased again. But
3458 * if the ref count is > 1, someone may be decreasing it at
3459 * the same time we are.
3462 struct extent_buffer
*eb
= NULL
;
3463 eb
= btrfs_find_create_tree_block(root
, start
, len
);
3465 btrfs_tree_lock(eb
);
3467 mutex_lock(&root
->fs_info
->alloc_mutex
);
3468 ret
= lookup_extent_ref(NULL
, root
, start
, len
, refs
);
3470 mutex_unlock(&root
->fs_info
->alloc_mutex
);
3473 btrfs_tree_unlock(eb
);
3474 free_extent_buffer(eb
);
3477 printk("block %llu went down to one during drop_snap\n",
3478 (unsigned long long)start
);
3489 * helper function for drop_snapshot, this walks down the tree dropping ref
3490 * counts as it goes.
3492 static int noinline
walk_down_tree(struct btrfs_trans_handle
*trans
,
3493 struct btrfs_root
*root
,
3494 struct btrfs_path
*path
, int *level
)
3500 struct extent_buffer
*next
;
3501 struct extent_buffer
*cur
;
3502 struct extent_buffer
*parent
;
3503 struct btrfs_leaf_ref
*ref
;
3508 WARN_ON(*level
< 0);
3509 WARN_ON(*level
>= BTRFS_MAX_LEVEL
);
3510 ret
= drop_snap_lookup_refcount(root
, path
->nodes
[*level
]->start
,
3511 path
->nodes
[*level
]->len
, &refs
);
3517 * walk down to the last node level and free all the leaves
3519 while(*level
>= 0) {
3520 WARN_ON(*level
< 0);
3521 WARN_ON(*level
>= BTRFS_MAX_LEVEL
);
3522 cur
= path
->nodes
[*level
];
3524 if (btrfs_header_level(cur
) != *level
)
3527 if (path
->slots
[*level
] >=
3528 btrfs_header_nritems(cur
))
3531 ret
= btrfs_drop_leaf_ref(trans
, root
, cur
);
3535 bytenr
= btrfs_node_blockptr(cur
, path
->slots
[*level
]);
3536 ptr_gen
= btrfs_node_ptr_generation(cur
, path
->slots
[*level
]);
3537 blocksize
= btrfs_level_size(root
, *level
- 1);
3539 ret
= drop_snap_lookup_refcount(root
, bytenr
, blocksize
, &refs
);
3542 parent
= path
->nodes
[*level
];
3543 root_owner
= btrfs_header_owner(parent
);
3544 root_gen
= btrfs_header_generation(parent
);
3545 path
->slots
[*level
]++;
3547 ret
= __btrfs_free_extent(trans
, root
, bytenr
,
3548 blocksize
, parent
->start
,
3549 root_owner
, root_gen
,
3553 atomic_inc(&root
->fs_info
->throttle_gen
);
3554 wake_up(&root
->fs_info
->transaction_throttle
);
3560 * at this point, we have a single ref, and since the
3561 * only place referencing this extent is a dead root
3562 * the reference count should never go higher.
3563 * So, we don't need to check it again
3566 ref
= btrfs_lookup_leaf_ref(root
, bytenr
);
3567 if (ref
&& ref
->generation
!= ptr_gen
) {
3568 btrfs_free_leaf_ref(root
, ref
);
3572 ret
= cache_drop_leaf_ref(trans
, root
, ref
);
3574 btrfs_remove_leaf_ref(root
, ref
);
3575 btrfs_free_leaf_ref(root
, ref
);
3579 if (printk_ratelimit()) {
3580 printk("leaf ref miss for bytenr %llu\n",
3581 (unsigned long long)bytenr
);
3584 next
= btrfs_find_tree_block(root
, bytenr
, blocksize
);
3585 if (!next
|| !btrfs_buffer_uptodate(next
, ptr_gen
)) {
3586 free_extent_buffer(next
);
3588 next
= read_tree_block(root
, bytenr
, blocksize
,
3593 * this is a debugging check and can go away
3594 * the ref should never go all the way down to 1
3597 ret
= lookup_extent_ref(NULL
, root
, bytenr
, blocksize
,
3603 WARN_ON(*level
<= 0);
3604 if (path
->nodes
[*level
-1])
3605 free_extent_buffer(path
->nodes
[*level
-1]);
3606 path
->nodes
[*level
-1] = next
;
3607 *level
= btrfs_header_level(next
);
3608 path
->slots
[*level
] = 0;
3612 WARN_ON(*level
< 0);
3613 WARN_ON(*level
>= BTRFS_MAX_LEVEL
);
3615 if (path
->nodes
[*level
] == root
->node
) {
3616 parent
= path
->nodes
[*level
];
3617 bytenr
= path
->nodes
[*level
]->start
;
3619 parent
= path
->nodes
[*level
+ 1];
3620 bytenr
= btrfs_node_blockptr(parent
, path
->slots
[*level
+ 1]);
3623 blocksize
= btrfs_level_size(root
, *level
);
3624 root_owner
= btrfs_header_owner(parent
);
3625 root_gen
= btrfs_header_generation(parent
);
3627 ret
= __btrfs_free_extent(trans
, root
, bytenr
, blocksize
,
3628 parent
->start
, root_owner
, root_gen
,
3630 free_extent_buffer(path
->nodes
[*level
]);
3631 path
->nodes
[*level
] = NULL
;
3640 * helper function for drop_subtree, this function is similar to
3641 * walk_down_tree. The main difference is that it checks reference
3642 * counts while tree blocks are locked.
3644 static int noinline
walk_down_subtree(struct btrfs_trans_handle
*trans
,
3645 struct btrfs_root
*root
,
3646 struct btrfs_path
*path
, int *level
)
3648 struct extent_buffer
*next
;
3649 struct extent_buffer
*cur
;
3650 struct extent_buffer
*parent
;
3657 cur
= path
->nodes
[*level
];
3658 ret
= btrfs_lookup_extent_ref(trans
, root
, cur
->start
, cur
->len
,
3664 while (*level
>= 0) {
3665 cur
= path
->nodes
[*level
];
3667 ret
= btrfs_drop_leaf_ref(trans
, root
, cur
);
3669 clean_tree_block(trans
, root
, cur
);
3672 if (path
->slots
[*level
] >= btrfs_header_nritems(cur
)) {
3673 clean_tree_block(trans
, root
, cur
);
3677 bytenr
= btrfs_node_blockptr(cur
, path
->slots
[*level
]);
3678 blocksize
= btrfs_level_size(root
, *level
- 1);
3679 ptr_gen
= btrfs_node_ptr_generation(cur
, path
->slots
[*level
]);
3681 next
= read_tree_block(root
, bytenr
, blocksize
, ptr_gen
);
3682 btrfs_tree_lock(next
);
3684 ret
= btrfs_lookup_extent_ref(trans
, root
, bytenr
, blocksize
,
3688 parent
= path
->nodes
[*level
];
3689 ret
= btrfs_free_extent(trans
, root
, bytenr
,
3690 blocksize
, parent
->start
,
3691 btrfs_header_owner(parent
),
3692 btrfs_header_generation(parent
),
3695 path
->slots
[*level
]++;
3696 btrfs_tree_unlock(next
);
3697 free_extent_buffer(next
);
3701 *level
= btrfs_header_level(next
);
3702 path
->nodes
[*level
] = next
;
3703 path
->slots
[*level
] = 0;
3704 path
->locks
[*level
] = 1;
3708 parent
= path
->nodes
[*level
+ 1];
3709 bytenr
= path
->nodes
[*level
]->start
;
3710 blocksize
= path
->nodes
[*level
]->len
;
3712 ret
= btrfs_free_extent(trans
, root
, bytenr
, blocksize
,
3713 parent
->start
, btrfs_header_owner(parent
),
3714 btrfs_header_generation(parent
), *level
, 1);
3717 if (path
->locks
[*level
]) {
3718 btrfs_tree_unlock(path
->nodes
[*level
]);
3719 path
->locks
[*level
] = 0;
3721 free_extent_buffer(path
->nodes
[*level
]);
3722 path
->nodes
[*level
] = NULL
;
3729 * helper for dropping snapshots. This walks back up the tree in the path
3730 * to find the first node higher up where we haven't yet gone through
3733 static int noinline
walk_up_tree(struct btrfs_trans_handle
*trans
,
3734 struct btrfs_root
*root
,
3735 struct btrfs_path
*path
,
3736 int *level
, int max_level
)
3740 struct btrfs_root_item
*root_item
= &root
->root_item
;
3745 for (i
= *level
; i
< max_level
&& path
->nodes
[i
]; i
++) {
3746 slot
= path
->slots
[i
];
3747 if (slot
< btrfs_header_nritems(path
->nodes
[i
]) - 1) {
3748 struct extent_buffer
*node
;
3749 struct btrfs_disk_key disk_key
;
3750 node
= path
->nodes
[i
];
3753 WARN_ON(*level
== 0);
3754 btrfs_node_key(node
, &disk_key
, path
->slots
[i
]);
3755 memcpy(&root_item
->drop_progress
,
3756 &disk_key
, sizeof(disk_key
));
3757 root_item
->drop_level
= i
;
3760 struct extent_buffer
*parent
;
3761 if (path
->nodes
[*level
] == root
->node
)
3762 parent
= path
->nodes
[*level
];
3764 parent
= path
->nodes
[*level
+ 1];
3766 root_owner
= btrfs_header_owner(parent
);
3767 root_gen
= btrfs_header_generation(parent
);
3769 clean_tree_block(trans
, root
, path
->nodes
[*level
]);
3770 ret
= btrfs_free_extent(trans
, root
,
3771 path
->nodes
[*level
]->start
,
3772 path
->nodes
[*level
]->len
,
3773 parent
->start
, root_owner
,
3774 root_gen
, *level
, 1);
3776 if (path
->locks
[*level
]) {
3777 btrfs_tree_unlock(path
->nodes
[*level
]);
3778 path
->locks
[*level
] = 0;
3780 free_extent_buffer(path
->nodes
[*level
]);
3781 path
->nodes
[*level
] = NULL
;
3789 * drop the reference count on the tree rooted at 'snap'. This traverses
3790 * the tree freeing any blocks that have a ref count of zero after being
3793 int btrfs_drop_snapshot(struct btrfs_trans_handle
*trans
, struct btrfs_root
3799 struct btrfs_path
*path
;
3802 struct btrfs_root_item
*root_item
= &root
->root_item
;
3804 WARN_ON(!mutex_is_locked(&root
->fs_info
->drop_mutex
));
3805 path
= btrfs_alloc_path();
3808 level
= btrfs_header_level(root
->node
);
3810 if (btrfs_disk_key_objectid(&root_item
->drop_progress
) == 0) {
3811 path
->nodes
[level
] = root
->node
;
3812 extent_buffer_get(root
->node
);
3813 path
->slots
[level
] = 0;
3815 struct btrfs_key key
;
3816 struct btrfs_disk_key found_key
;
3817 struct extent_buffer
*node
;
3819 btrfs_disk_key_to_cpu(&key
, &root_item
->drop_progress
);
3820 level
= root_item
->drop_level
;
3821 path
->lowest_level
= level
;
3822 wret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
3827 node
= path
->nodes
[level
];
3828 btrfs_node_key(node
, &found_key
, path
->slots
[level
]);
3829 WARN_ON(memcmp(&found_key
, &root_item
->drop_progress
,
3830 sizeof(found_key
)));
3832 * unlock our path, this is safe because only this
3833 * function is allowed to delete this snapshot
3835 for (i
= 0; i
< BTRFS_MAX_LEVEL
; i
++) {
3836 if (path
->nodes
[i
] && path
->locks
[i
]) {
3838 btrfs_tree_unlock(path
->nodes
[i
]);
3843 wret
= walk_down_tree(trans
, root
, path
, &level
);
3849 wret
= walk_up_tree(trans
, root
, path
, &level
,
3855 if (trans
->transaction
->in_commit
) {
3859 atomic_inc(&root
->fs_info
->throttle_gen
);
3860 wake_up(&root
->fs_info
->transaction_throttle
);
3862 for (i
= 0; i
<= orig_level
; i
++) {
3863 if (path
->nodes
[i
]) {
3864 free_extent_buffer(path
->nodes
[i
]);
3865 path
->nodes
[i
] = NULL
;
3869 btrfs_free_path(path
);
3873 int btrfs_drop_subtree(struct btrfs_trans_handle
*trans
,
3874 struct btrfs_root
*root
,
3875 struct extent_buffer
*node
,
3876 struct extent_buffer
*parent
)
3878 struct btrfs_path
*path
;
3884 path
= btrfs_alloc_path();
3887 BUG_ON(!btrfs_tree_locked(parent
));
3888 parent_level
= btrfs_header_level(parent
);
3889 extent_buffer_get(parent
);
3890 path
->nodes
[parent_level
] = parent
;
3891 path
->slots
[parent_level
] = btrfs_header_nritems(parent
);
3893 BUG_ON(!btrfs_tree_locked(node
));
3894 level
= btrfs_header_level(node
);
3895 extent_buffer_get(node
);
3896 path
->nodes
[level
] = node
;
3897 path
->slots
[level
] = 0;
3900 wret
= walk_down_subtree(trans
, root
, path
, &level
);
3906 wret
= walk_up_tree(trans
, root
, path
, &level
, parent_level
);
3913 btrfs_free_path(path
);
3917 static unsigned long calc_ra(unsigned long start
, unsigned long last
,
3920 return min(last
, start
+ nr
- 1);
3923 static int noinline
relocate_inode_pages(struct inode
*inode
, u64 start
,
3928 unsigned long first_index
;
3929 unsigned long last_index
;
3932 struct extent_io_tree
*io_tree
= &BTRFS_I(inode
)->io_tree
;
3933 struct file_ra_state
*ra
;
3934 struct btrfs_ordered_extent
*ordered
;
3935 unsigned int total_read
= 0;
3936 unsigned int total_dirty
= 0;
3939 ra
= kzalloc(sizeof(*ra
), GFP_NOFS
);
3941 mutex_lock(&inode
->i_mutex
);
3942 first_index
= start
>> PAGE_CACHE_SHIFT
;
3943 last_index
= (start
+ len
- 1) >> PAGE_CACHE_SHIFT
;
3945 /* make sure the dirty trick played by the caller work */
3946 ret
= invalidate_inode_pages2_range(inode
->i_mapping
,
3947 first_index
, last_index
);
3951 file_ra_state_init(ra
, inode
->i_mapping
);
3953 for (i
= first_index
; i
<= last_index
; i
++) {
3954 if (total_read
% ra
->ra_pages
== 0) {
3955 btrfs_force_ra(inode
->i_mapping
, ra
, NULL
, i
,
3956 calc_ra(i
, last_index
, ra
->ra_pages
));
3960 if (((u64
)i
<< PAGE_CACHE_SHIFT
) > i_size_read(inode
))
3962 page
= grab_cache_page(inode
->i_mapping
, i
);
3967 if (!PageUptodate(page
)) {
3968 btrfs_readpage(NULL
, page
);
3970 if (!PageUptodate(page
)) {
3972 page_cache_release(page
);
3977 wait_on_page_writeback(page
);
3979 page_start
= (u64
)page
->index
<< PAGE_CACHE_SHIFT
;
3980 page_end
= page_start
+ PAGE_CACHE_SIZE
- 1;
3981 lock_extent(io_tree
, page_start
, page_end
, GFP_NOFS
);
3983 ordered
= btrfs_lookup_ordered_extent(inode
, page_start
);
3985 unlock_extent(io_tree
, page_start
, page_end
, GFP_NOFS
);
3987 page_cache_release(page
);
3988 btrfs_start_ordered_extent(inode
, ordered
, 1);
3989 btrfs_put_ordered_extent(ordered
);
3992 set_page_extent_mapped(page
);
3994 btrfs_set_extent_delalloc(inode
, page_start
, page_end
);
3995 if (i
== first_index
)
3996 set_extent_bits(io_tree
, page_start
, page_end
,
3997 EXTENT_BOUNDARY
, GFP_NOFS
);
3999 set_page_dirty(page
);
4002 unlock_extent(io_tree
, page_start
, page_end
, GFP_NOFS
);
4004 page_cache_release(page
);
4009 mutex_unlock(&inode
->i_mutex
);
4010 balance_dirty_pages_ratelimited_nr(inode
->i_mapping
, total_dirty
);
4014 static int noinline
relocate_data_extent(struct inode
*reloc_inode
,
4015 struct btrfs_key
*extent_key
,
4018 struct btrfs_root
*root
= BTRFS_I(reloc_inode
)->root
;
4019 struct extent_map_tree
*em_tree
= &BTRFS_I(reloc_inode
)->extent_tree
;
4020 struct extent_map
*em
;
4021 u64 start
= extent_key
->objectid
- offset
;
4022 u64 end
= start
+ extent_key
->offset
- 1;
4024 em
= alloc_extent_map(GFP_NOFS
);
4025 BUG_ON(!em
|| IS_ERR(em
));
4028 em
->len
= extent_key
->offset
;
4029 em
->block_len
= extent_key
->offset
;
4030 em
->block_start
= extent_key
->objectid
;
4031 em
->bdev
= root
->fs_info
->fs_devices
->latest_bdev
;
4032 set_bit(EXTENT_FLAG_PINNED
, &em
->flags
);
4034 /* setup extent map to cheat btrfs_readpage */
4035 lock_extent(&BTRFS_I(reloc_inode
)->io_tree
, start
, end
, GFP_NOFS
);
4038 spin_lock(&em_tree
->lock
);
4039 ret
= add_extent_mapping(em_tree
, em
);
4040 spin_unlock(&em_tree
->lock
);
4041 if (ret
!= -EEXIST
) {
4042 free_extent_map(em
);
4045 btrfs_drop_extent_cache(reloc_inode
, start
, end
, 0);
4047 unlock_extent(&BTRFS_I(reloc_inode
)->io_tree
, start
, end
, GFP_NOFS
);
4049 return relocate_inode_pages(reloc_inode
, start
, extent_key
->offset
);
4052 struct btrfs_ref_path
{
4054 u64 nodes
[BTRFS_MAX_LEVEL
];
4056 u64 root_generation
;
4063 struct btrfs_key node_keys
[BTRFS_MAX_LEVEL
];
4064 u64 new_nodes
[BTRFS_MAX_LEVEL
];
4067 struct disk_extent
{
4078 static int is_cowonly_root(u64 root_objectid
)
4080 if (root_objectid
== BTRFS_ROOT_TREE_OBJECTID
||
4081 root_objectid
== BTRFS_EXTENT_TREE_OBJECTID
||
4082 root_objectid
== BTRFS_CHUNK_TREE_OBJECTID
||
4083 root_objectid
== BTRFS_DEV_TREE_OBJECTID
||
4084 root_objectid
== BTRFS_TREE_LOG_OBJECTID
)
4089 static int noinline
__next_ref_path(struct btrfs_trans_handle
*trans
,
4090 struct btrfs_root
*extent_root
,
4091 struct btrfs_ref_path
*ref_path
,
4094 struct extent_buffer
*leaf
;
4095 struct btrfs_path
*path
;
4096 struct btrfs_extent_ref
*ref
;
4097 struct btrfs_key key
;
4098 struct btrfs_key found_key
;
4104 path
= btrfs_alloc_path();
4109 ref_path
->lowest_level
= -1;
4110 ref_path
->current_level
= -1;
4111 ref_path
->shared_level
= -1;
4115 level
= ref_path
->current_level
- 1;
4116 while (level
>= -1) {
4118 if (level
< ref_path
->lowest_level
)
4122 bytenr
= ref_path
->nodes
[level
];
4124 bytenr
= ref_path
->extent_start
;
4126 BUG_ON(bytenr
== 0);
4128 parent
= ref_path
->nodes
[level
+ 1];
4129 ref_path
->nodes
[level
+ 1] = 0;
4130 ref_path
->current_level
= level
;
4131 BUG_ON(parent
== 0);
4133 key
.objectid
= bytenr
;
4134 key
.offset
= parent
+ 1;
4135 key
.type
= BTRFS_EXTENT_REF_KEY
;
4137 ret
= btrfs_search_slot(trans
, extent_root
, &key
, path
, 0, 0);
4142 leaf
= path
->nodes
[0];
4143 nritems
= btrfs_header_nritems(leaf
);
4144 if (path
->slots
[0] >= nritems
) {
4145 ret
= btrfs_next_leaf(extent_root
, path
);
4150 leaf
= path
->nodes
[0];
4153 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
4154 if (found_key
.objectid
== bytenr
&&
4155 found_key
.type
== BTRFS_EXTENT_REF_KEY
) {
4156 if (level
< ref_path
->shared_level
)
4157 ref_path
->shared_level
= level
;
4162 btrfs_release_path(extent_root
, path
);
4165 /* reached lowest level */
4169 level
= ref_path
->current_level
;
4170 while (level
< BTRFS_MAX_LEVEL
- 1) {
4173 bytenr
= ref_path
->nodes
[level
];
4175 bytenr
= ref_path
->extent_start
;
4177 BUG_ON(bytenr
== 0);
4179 key
.objectid
= bytenr
;
4181 key
.type
= BTRFS_EXTENT_REF_KEY
;
4183 ret
= btrfs_search_slot(trans
, extent_root
, &key
, path
, 0, 0);
4187 leaf
= path
->nodes
[0];
4188 nritems
= btrfs_header_nritems(leaf
);
4189 if (path
->slots
[0] >= nritems
) {
4190 ret
= btrfs_next_leaf(extent_root
, path
);
4194 /* the extent was freed by someone */
4195 if (ref_path
->lowest_level
== level
)
4197 btrfs_release_path(extent_root
, path
);
4200 leaf
= path
->nodes
[0];
4203 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
4204 if (found_key
.objectid
!= bytenr
||
4205 found_key
.type
!= BTRFS_EXTENT_REF_KEY
) {
4206 /* the extent was freed by someone */
4207 if (ref_path
->lowest_level
== level
) {
4211 btrfs_release_path(extent_root
, path
);
4215 ref
= btrfs_item_ptr(leaf
, path
->slots
[0],
4216 struct btrfs_extent_ref
);
4217 ref_objectid
= btrfs_ref_objectid(leaf
, ref
);
4218 if (ref_objectid
< BTRFS_FIRST_FREE_OBJECTID
) {
4220 level
= (int)ref_objectid
;
4221 BUG_ON(level
>= BTRFS_MAX_LEVEL
);
4222 ref_path
->lowest_level
= level
;
4223 ref_path
->current_level
= level
;
4224 ref_path
->nodes
[level
] = bytenr
;
4226 WARN_ON(ref_objectid
!= level
);
4229 WARN_ON(level
!= -1);
4233 if (ref_path
->lowest_level
== level
) {
4234 ref_path
->owner_objectid
= ref_objectid
;
4235 ref_path
->num_refs
= btrfs_ref_num_refs(leaf
, ref
);
4239 * the block is tree root or the block isn't in reference
4242 if (found_key
.objectid
== found_key
.offset
||
4243 is_cowonly_root(btrfs_ref_root(leaf
, ref
))) {
4244 ref_path
->root_objectid
= btrfs_ref_root(leaf
, ref
);
4245 ref_path
->root_generation
=
4246 btrfs_ref_generation(leaf
, ref
);
4248 /* special reference from the tree log */
4249 ref_path
->nodes
[0] = found_key
.offset
;
4250 ref_path
->current_level
= 0;
4257 BUG_ON(ref_path
->nodes
[level
] != 0);
4258 ref_path
->nodes
[level
] = found_key
.offset
;
4259 ref_path
->current_level
= level
;
4262 * the reference was created in the running transaction,
4263 * no need to continue walking up.
4265 if (btrfs_ref_generation(leaf
, ref
) == trans
->transid
) {
4266 ref_path
->root_objectid
= btrfs_ref_root(leaf
, ref
);
4267 ref_path
->root_generation
=
4268 btrfs_ref_generation(leaf
, ref
);
4273 btrfs_release_path(extent_root
, path
);
4276 /* reached max tree level, but no tree root found. */
4279 btrfs_free_path(path
);
4283 static int btrfs_first_ref_path(struct btrfs_trans_handle
*trans
,
4284 struct btrfs_root
*extent_root
,
4285 struct btrfs_ref_path
*ref_path
,
4288 memset(ref_path
, 0, sizeof(*ref_path
));
4289 ref_path
->extent_start
= extent_start
;
4291 return __next_ref_path(trans
, extent_root
, ref_path
, 1);
4294 static int btrfs_next_ref_path(struct btrfs_trans_handle
*trans
,
4295 struct btrfs_root
*extent_root
,
4296 struct btrfs_ref_path
*ref_path
)
4298 return __next_ref_path(trans
, extent_root
, ref_path
, 0);
4301 static int noinline
get_new_locations(struct inode
*reloc_inode
,
4302 struct btrfs_key
*extent_key
,
4303 u64 offset
, int no_fragment
,
4304 struct disk_extent
**extents
,
4307 struct btrfs_root
*root
= BTRFS_I(reloc_inode
)->root
;
4308 struct btrfs_path
*path
;
4309 struct btrfs_file_extent_item
*fi
;
4310 struct extent_buffer
*leaf
;
4311 struct disk_extent
*exts
= *extents
;
4312 struct btrfs_key found_key
;
4317 int max
= *nr_extents
;
4320 WARN_ON(!no_fragment
&& *extents
);
4323 exts
= kmalloc(sizeof(*exts
) * max
, GFP_NOFS
);
4328 path
= btrfs_alloc_path();
4331 cur_pos
= extent_key
->objectid
- offset
;
4332 last_byte
= extent_key
->objectid
+ extent_key
->offset
;
4333 ret
= btrfs_lookup_file_extent(NULL
, root
, path
, reloc_inode
->i_ino
,
4343 leaf
= path
->nodes
[0];
4344 nritems
= btrfs_header_nritems(leaf
);
4345 if (path
->slots
[0] >= nritems
) {
4346 ret
= btrfs_next_leaf(root
, path
);
4351 leaf
= path
->nodes
[0];
4354 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
4355 if (found_key
.offset
!= cur_pos
||
4356 found_key
.type
!= BTRFS_EXTENT_DATA_KEY
||
4357 found_key
.objectid
!= reloc_inode
->i_ino
)
4360 fi
= btrfs_item_ptr(leaf
, path
->slots
[0],
4361 struct btrfs_file_extent_item
);
4362 if (btrfs_file_extent_type(leaf
, fi
) !=
4363 BTRFS_FILE_EXTENT_REG
||
4364 btrfs_file_extent_disk_bytenr(leaf
, fi
) == 0)
4368 struct disk_extent
*old
= exts
;
4370 exts
= kzalloc(sizeof(*exts
) * max
, GFP_NOFS
);
4371 memcpy(exts
, old
, sizeof(*exts
) * nr
);
4372 if (old
!= *extents
)
4376 exts
[nr
].disk_bytenr
=
4377 btrfs_file_extent_disk_bytenr(leaf
, fi
);
4378 exts
[nr
].disk_num_bytes
=
4379 btrfs_file_extent_disk_num_bytes(leaf
, fi
);
4380 exts
[nr
].offset
= btrfs_file_extent_offset(leaf
, fi
);
4381 exts
[nr
].num_bytes
= btrfs_file_extent_num_bytes(leaf
, fi
);
4382 exts
[nr
].ram_bytes
= btrfs_file_extent_ram_bytes(leaf
, fi
);
4383 exts
[nr
].compression
= btrfs_file_extent_compression(leaf
, fi
);
4384 exts
[nr
].encryption
= btrfs_file_extent_encryption(leaf
, fi
);
4385 exts
[nr
].other_encoding
= btrfs_file_extent_other_encoding(leaf
,
4387 BUG_ON(exts
[nr
].offset
> 0);
4388 BUG_ON(exts
[nr
].compression
|| exts
[nr
].encryption
);
4389 BUG_ON(exts
[nr
].num_bytes
!= exts
[nr
].disk_num_bytes
);
4391 cur_pos
+= exts
[nr
].num_bytes
;
4394 if (cur_pos
+ offset
>= last_byte
)
4404 WARN_ON(cur_pos
+ offset
> last_byte
);
4405 if (cur_pos
+ offset
< last_byte
) {
4411 btrfs_free_path(path
);
4413 if (exts
!= *extents
)
4422 static int noinline
replace_one_extent(struct btrfs_trans_handle
*trans
,
4423 struct btrfs_root
*root
,
4424 struct btrfs_path
*path
,
4425 struct btrfs_key
*extent_key
,
4426 struct btrfs_key
*leaf_key
,
4427 struct btrfs_ref_path
*ref_path
,
4428 struct disk_extent
*new_extents
,
4431 struct extent_buffer
*leaf
;
4432 struct btrfs_file_extent_item
*fi
;
4433 struct inode
*inode
= NULL
;
4434 struct btrfs_key key
;
4442 int extent_locked
= 0;
4446 memcpy(&key
, leaf_key
, sizeof(key
));
4447 first_pos
= INT_LIMIT(loff_t
) - extent_key
->offset
;
4448 if (ref_path
->owner_objectid
!= BTRFS_MULTIPLE_OBJECTIDS
) {
4449 if (key
.objectid
< ref_path
->owner_objectid
||
4450 (key
.objectid
== ref_path
->owner_objectid
&&
4451 key
.type
< BTRFS_EXTENT_DATA_KEY
)) {
4452 key
.objectid
= ref_path
->owner_objectid
;
4453 key
.type
= BTRFS_EXTENT_DATA_KEY
;
4459 ret
= btrfs_search_slot(trans
, root
, &key
, path
, 0, 1);
4463 leaf
= path
->nodes
[0];
4464 nritems
= btrfs_header_nritems(leaf
);
4466 if (extent_locked
&& ret
> 0) {
4468 * the file extent item was modified by someone
4469 * before the extent got locked.
4471 unlock_extent(&BTRFS_I(inode
)->io_tree
, lock_start
,
4472 lock_end
, GFP_NOFS
);
4476 if (path
->slots
[0] >= nritems
) {
4477 if (++nr_scaned
> 2)
4480 BUG_ON(extent_locked
);
4481 ret
= btrfs_next_leaf(root
, path
);
4486 leaf
= path
->nodes
[0];
4487 nritems
= btrfs_header_nritems(leaf
);
4490 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
4492 if (ref_path
->owner_objectid
!= BTRFS_MULTIPLE_OBJECTIDS
) {
4493 if ((key
.objectid
> ref_path
->owner_objectid
) ||
4494 (key
.objectid
== ref_path
->owner_objectid
&&
4495 key
.type
> BTRFS_EXTENT_DATA_KEY
) ||
4496 (key
.offset
>= first_pos
+ extent_key
->offset
))
4500 if (inode
&& key
.objectid
!= inode
->i_ino
) {
4501 BUG_ON(extent_locked
);
4502 btrfs_release_path(root
, path
);
4503 mutex_unlock(&inode
->i_mutex
);
4509 if (key
.type
!= BTRFS_EXTENT_DATA_KEY
) {
4514 fi
= btrfs_item_ptr(leaf
, path
->slots
[0],
4515 struct btrfs_file_extent_item
);
4516 extent_type
= btrfs_file_extent_type(leaf
, fi
);
4517 if ((extent_type
!= BTRFS_FILE_EXTENT_REG
&&
4518 extent_type
!= BTRFS_FILE_EXTENT_PREALLOC
) ||
4519 (btrfs_file_extent_disk_bytenr(leaf
, fi
) !=
4520 extent_key
->objectid
)) {
4526 num_bytes
= btrfs_file_extent_num_bytes(leaf
, fi
);
4527 ext_offset
= btrfs_file_extent_offset(leaf
, fi
);
4529 if (first_pos
> key
.offset
- ext_offset
)
4530 first_pos
= key
.offset
- ext_offset
;
4532 if (!extent_locked
) {
4533 lock_start
= key
.offset
;
4534 lock_end
= lock_start
+ num_bytes
- 1;
4536 if (lock_start
> key
.offset
||
4537 lock_end
+ 1 < key
.offset
+ num_bytes
) {
4538 unlock_extent(&BTRFS_I(inode
)->io_tree
,
4539 lock_start
, lock_end
, GFP_NOFS
);
4545 btrfs_release_path(root
, path
);
4547 inode
= btrfs_iget_locked(root
->fs_info
->sb
,
4548 key
.objectid
, root
);
4549 if (inode
->i_state
& I_NEW
) {
4550 BTRFS_I(inode
)->root
= root
;
4551 BTRFS_I(inode
)->location
.objectid
=
4553 BTRFS_I(inode
)->location
.type
=
4554 BTRFS_INODE_ITEM_KEY
;
4555 BTRFS_I(inode
)->location
.offset
= 0;
4556 btrfs_read_locked_inode(inode
);
4557 unlock_new_inode(inode
);
4560 * some code call btrfs_commit_transaction while
4561 * holding the i_mutex, so we can't use mutex_lock
4564 if (is_bad_inode(inode
) ||
4565 !mutex_trylock(&inode
->i_mutex
)) {
4568 key
.offset
= (u64
)-1;
4573 if (!extent_locked
) {
4574 struct btrfs_ordered_extent
*ordered
;
4576 btrfs_release_path(root
, path
);
4578 lock_extent(&BTRFS_I(inode
)->io_tree
, lock_start
,
4579 lock_end
, GFP_NOFS
);
4580 ordered
= btrfs_lookup_first_ordered_extent(inode
,
4583 ordered
->file_offset
<= lock_end
&&
4584 ordered
->file_offset
+ ordered
->len
> lock_start
) {
4585 unlock_extent(&BTRFS_I(inode
)->io_tree
,
4586 lock_start
, lock_end
, GFP_NOFS
);
4587 btrfs_start_ordered_extent(inode
, ordered
, 1);
4588 btrfs_put_ordered_extent(ordered
);
4589 key
.offset
+= num_bytes
;
4593 btrfs_put_ordered_extent(ordered
);
4599 if (nr_extents
== 1) {
4600 /* update extent pointer in place */
4601 btrfs_set_file_extent_disk_bytenr(leaf
, fi
,
4602 new_extents
[0].disk_bytenr
);
4603 btrfs_set_file_extent_disk_num_bytes(leaf
, fi
,
4604 new_extents
[0].disk_num_bytes
);
4605 btrfs_mark_buffer_dirty(leaf
);
4607 btrfs_drop_extent_cache(inode
, key
.offset
,
4608 key
.offset
+ num_bytes
- 1, 0);
4610 ret
= btrfs_inc_extent_ref(trans
, root
,
4611 new_extents
[0].disk_bytenr
,
4612 new_extents
[0].disk_num_bytes
,
4614 root
->root_key
.objectid
,
4619 ret
= btrfs_free_extent(trans
, root
,
4620 extent_key
->objectid
,
4623 btrfs_header_owner(leaf
),
4624 btrfs_header_generation(leaf
),
4628 btrfs_release_path(root
, path
);
4629 key
.offset
+= num_bytes
;
4637 * drop old extent pointer at first, then insert the
4638 * new pointers one bye one
4640 btrfs_release_path(root
, path
);
4641 ret
= btrfs_drop_extents(trans
, root
, inode
, key
.offset
,
4642 key
.offset
+ num_bytes
,
4643 key
.offset
, &alloc_hint
);
4646 for (i
= 0; i
< nr_extents
; i
++) {
4647 if (ext_offset
>= new_extents
[i
].num_bytes
) {
4648 ext_offset
-= new_extents
[i
].num_bytes
;
4651 extent_len
= min(new_extents
[i
].num_bytes
-
4652 ext_offset
, num_bytes
);
4654 ret
= btrfs_insert_empty_item(trans
, root
,
4659 leaf
= path
->nodes
[0];
4660 fi
= btrfs_item_ptr(leaf
, path
->slots
[0],
4661 struct btrfs_file_extent_item
);
4662 btrfs_set_file_extent_generation(leaf
, fi
,
4664 btrfs_set_file_extent_type(leaf
, fi
,
4665 BTRFS_FILE_EXTENT_REG
);
4666 btrfs_set_file_extent_disk_bytenr(leaf
, fi
,
4667 new_extents
[i
].disk_bytenr
);
4668 btrfs_set_file_extent_disk_num_bytes(leaf
, fi
,
4669 new_extents
[i
].disk_num_bytes
);
4670 btrfs_set_file_extent_ram_bytes(leaf
, fi
,
4671 new_extents
[i
].ram_bytes
);
4673 btrfs_set_file_extent_compression(leaf
, fi
,
4674 new_extents
[i
].compression
);
4675 btrfs_set_file_extent_encryption(leaf
, fi
,
4676 new_extents
[i
].encryption
);
4677 btrfs_set_file_extent_other_encoding(leaf
, fi
,
4678 new_extents
[i
].other_encoding
);
4680 btrfs_set_file_extent_num_bytes(leaf
, fi
,
4682 ext_offset
+= new_extents
[i
].offset
;
4683 btrfs_set_file_extent_offset(leaf
, fi
,
4685 btrfs_mark_buffer_dirty(leaf
);
4687 btrfs_drop_extent_cache(inode
, key
.offset
,
4688 key
.offset
+ extent_len
- 1, 0);
4690 ret
= btrfs_inc_extent_ref(trans
, root
,
4691 new_extents
[i
].disk_bytenr
,
4692 new_extents
[i
].disk_num_bytes
,
4694 root
->root_key
.objectid
,
4695 trans
->transid
, key
.objectid
);
4697 btrfs_release_path(root
, path
);
4699 inode_add_bytes(inode
, extent_len
);
4702 num_bytes
-= extent_len
;
4703 key
.offset
+= extent_len
;
4708 BUG_ON(i
>= nr_extents
);
4712 if (extent_locked
) {
4713 unlock_extent(&BTRFS_I(inode
)->io_tree
, lock_start
,
4714 lock_end
, GFP_NOFS
);
4718 if (ref_path
->owner_objectid
!= BTRFS_MULTIPLE_OBJECTIDS
&&
4719 key
.offset
>= first_pos
+ extent_key
->offset
)
4726 btrfs_release_path(root
, path
);
4728 mutex_unlock(&inode
->i_mutex
);
4729 if (extent_locked
) {
4730 unlock_extent(&BTRFS_I(inode
)->io_tree
, lock_start
,
4731 lock_end
, GFP_NOFS
);
4738 int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle
*trans
,
4739 struct btrfs_root
*root
,
4740 struct extent_buffer
*buf
, u64 orig_start
)
4745 BUG_ON(btrfs_header_generation(buf
) != trans
->transid
);
4746 BUG_ON(root
->root_key
.objectid
!= BTRFS_TREE_RELOC_OBJECTID
);
4748 level
= btrfs_header_level(buf
);
4750 struct btrfs_leaf_ref
*ref
;
4751 struct btrfs_leaf_ref
*orig_ref
;
4753 orig_ref
= btrfs_lookup_leaf_ref(root
, orig_start
);
4757 ref
= btrfs_alloc_leaf_ref(root
, orig_ref
->nritems
);
4759 btrfs_free_leaf_ref(root
, orig_ref
);
4763 ref
->nritems
= orig_ref
->nritems
;
4764 memcpy(ref
->extents
, orig_ref
->extents
,
4765 sizeof(ref
->extents
[0]) * ref
->nritems
);
4767 btrfs_free_leaf_ref(root
, orig_ref
);
4769 ref
->root_gen
= trans
->transid
;
4770 ref
->bytenr
= buf
->start
;
4771 ref
->owner
= btrfs_header_owner(buf
);
4772 ref
->generation
= btrfs_header_generation(buf
);
4773 ret
= btrfs_add_leaf_ref(root
, ref
, 0);
4775 btrfs_free_leaf_ref(root
, ref
);
4780 static int noinline
invalidate_extent_cache(struct btrfs_root
*root
,
4781 struct extent_buffer
*leaf
,
4782 struct btrfs_block_group_cache
*group
,
4783 struct btrfs_root
*target_root
)
4785 struct btrfs_key key
;
4786 struct inode
*inode
= NULL
;
4787 struct btrfs_file_extent_item
*fi
;
4789 u64 skip_objectid
= 0;
4793 nritems
= btrfs_header_nritems(leaf
);
4794 for (i
= 0; i
< nritems
; i
++) {
4795 btrfs_item_key_to_cpu(leaf
, &key
, i
);
4796 if (key
.objectid
== skip_objectid
||
4797 key
.type
!= BTRFS_EXTENT_DATA_KEY
)
4799 fi
= btrfs_item_ptr(leaf
, i
, struct btrfs_file_extent_item
);
4800 if (btrfs_file_extent_type(leaf
, fi
) ==
4801 BTRFS_FILE_EXTENT_INLINE
)
4803 if (btrfs_file_extent_disk_bytenr(leaf
, fi
) == 0)
4805 if (!inode
|| inode
->i_ino
!= key
.objectid
) {
4807 inode
= btrfs_ilookup(target_root
->fs_info
->sb
,
4808 key
.objectid
, target_root
, 1);
4811 skip_objectid
= key
.objectid
;
4814 num_bytes
= btrfs_file_extent_num_bytes(leaf
, fi
);
4816 lock_extent(&BTRFS_I(inode
)->io_tree
, key
.offset
,
4817 key
.offset
+ num_bytes
- 1, GFP_NOFS
);
4818 btrfs_drop_extent_cache(inode
, key
.offset
,
4819 key
.offset
+ num_bytes
- 1, 1);
4820 unlock_extent(&BTRFS_I(inode
)->io_tree
, key
.offset
,
4821 key
.offset
+ num_bytes
- 1, GFP_NOFS
);
4828 static int noinline
replace_extents_in_leaf(struct btrfs_trans_handle
*trans
,
4829 struct btrfs_root
*root
,
4830 struct extent_buffer
*leaf
,
4831 struct btrfs_block_group_cache
*group
,
4832 struct inode
*reloc_inode
)
4834 struct btrfs_key key
;
4835 struct btrfs_key extent_key
;
4836 struct btrfs_file_extent_item
*fi
;
4837 struct btrfs_leaf_ref
*ref
;
4838 struct disk_extent
*new_extent
;
4847 new_extent
= kmalloc(sizeof(*new_extent
), GFP_NOFS
);
4848 BUG_ON(!new_extent
);
4850 ref
= btrfs_lookup_leaf_ref(root
, leaf
->start
);
4854 nritems
= btrfs_header_nritems(leaf
);
4855 for (i
= 0; i
< nritems
; i
++) {
4856 btrfs_item_key_to_cpu(leaf
, &key
, i
);
4857 if (btrfs_key_type(&key
) != BTRFS_EXTENT_DATA_KEY
)
4859 fi
= btrfs_item_ptr(leaf
, i
, struct btrfs_file_extent_item
);
4860 if (btrfs_file_extent_type(leaf
, fi
) ==
4861 BTRFS_FILE_EXTENT_INLINE
)
4863 bytenr
= btrfs_file_extent_disk_bytenr(leaf
, fi
);
4864 num_bytes
= btrfs_file_extent_disk_num_bytes(leaf
, fi
);
4869 if (bytenr
>= group
->key
.objectid
+ group
->key
.offset
||
4870 bytenr
+ num_bytes
<= group
->key
.objectid
)
4873 extent_key
.objectid
= bytenr
;
4874 extent_key
.offset
= num_bytes
;
4875 extent_key
.type
= BTRFS_EXTENT_ITEM_KEY
;
4877 ret
= get_new_locations(reloc_inode
, &extent_key
,
4878 group
->key
.objectid
, 1,
4879 &new_extent
, &nr_extent
);
4884 BUG_ON(ref
->extents
[ext_index
].bytenr
!= bytenr
);
4885 BUG_ON(ref
->extents
[ext_index
].num_bytes
!= num_bytes
);
4886 ref
->extents
[ext_index
].bytenr
= new_extent
->disk_bytenr
;
4887 ref
->extents
[ext_index
].num_bytes
= new_extent
->disk_num_bytes
;
4889 btrfs_set_file_extent_disk_bytenr(leaf
, fi
,
4890 new_extent
->disk_bytenr
);
4891 btrfs_set_file_extent_disk_num_bytes(leaf
, fi
,
4892 new_extent
->disk_num_bytes
);
4893 btrfs_mark_buffer_dirty(leaf
);
4895 ret
= btrfs_inc_extent_ref(trans
, root
,
4896 new_extent
->disk_bytenr
,
4897 new_extent
->disk_num_bytes
,
4899 root
->root_key
.objectid
,
4900 trans
->transid
, key
.objectid
);
4902 ret
= btrfs_free_extent(trans
, root
,
4903 bytenr
, num_bytes
, leaf
->start
,
4904 btrfs_header_owner(leaf
),
4905 btrfs_header_generation(leaf
),
4911 BUG_ON(ext_index
+ 1 != ref
->nritems
);
4912 btrfs_free_leaf_ref(root
, ref
);
4916 int btrfs_free_reloc_root(struct btrfs_trans_handle
*trans
,
4917 struct btrfs_root
*root
)
4919 struct btrfs_root
*reloc_root
;
4922 if (root
->reloc_root
) {
4923 reloc_root
= root
->reloc_root
;
4924 root
->reloc_root
= NULL
;
4925 list_add(&reloc_root
->dead_list
,
4926 &root
->fs_info
->dead_reloc_roots
);
4928 btrfs_set_root_bytenr(&reloc_root
->root_item
,
4929 reloc_root
->node
->start
);
4930 btrfs_set_root_level(&root
->root_item
,
4931 btrfs_header_level(reloc_root
->node
));
4932 memset(&reloc_root
->root_item
.drop_progress
, 0,
4933 sizeof(struct btrfs_disk_key
));
4934 reloc_root
->root_item
.drop_level
= 0;
4936 ret
= btrfs_update_root(trans
, root
->fs_info
->tree_root
,
4937 &reloc_root
->root_key
,
4938 &reloc_root
->root_item
);
4944 int btrfs_drop_dead_reloc_roots(struct btrfs_root
*root
)
4946 struct btrfs_trans_handle
*trans
;
4947 struct btrfs_root
*reloc_root
;
4948 struct btrfs_root
*prev_root
= NULL
;
4949 struct list_head dead_roots
;
4953 INIT_LIST_HEAD(&dead_roots
);
4954 list_splice_init(&root
->fs_info
->dead_reloc_roots
, &dead_roots
);
4956 while (!list_empty(&dead_roots
)) {
4957 reloc_root
= list_entry(dead_roots
.prev
,
4958 struct btrfs_root
, dead_list
);
4959 list_del_init(&reloc_root
->dead_list
);
4961 BUG_ON(reloc_root
->commit_root
!= NULL
);
4963 trans
= btrfs_join_transaction(root
, 1);
4966 mutex_lock(&root
->fs_info
->drop_mutex
);
4967 ret
= btrfs_drop_snapshot(trans
, reloc_root
);
4970 mutex_unlock(&root
->fs_info
->drop_mutex
);
4972 nr
= trans
->blocks_used
;
4973 ret
= btrfs_end_transaction(trans
, root
);
4975 btrfs_btree_balance_dirty(root
, nr
);
4978 free_extent_buffer(reloc_root
->node
);
4980 ret
= btrfs_del_root(trans
, root
->fs_info
->tree_root
,
4981 &reloc_root
->root_key
);
4983 mutex_unlock(&root
->fs_info
->drop_mutex
);
4985 nr
= trans
->blocks_used
;
4986 ret
= btrfs_end_transaction(trans
, root
);
4988 btrfs_btree_balance_dirty(root
, nr
);
4991 prev_root
= reloc_root
;
4994 btrfs_remove_leaf_refs(prev_root
, (u64
)-1, 0);
5000 int btrfs_add_dead_reloc_root(struct btrfs_root
*root
)
5002 list_add(&root
->dead_list
, &root
->fs_info
->dead_reloc_roots
);
5006 int btrfs_cleanup_reloc_trees(struct btrfs_root
*root
)
5008 struct btrfs_root
*reloc_root
;
5009 struct btrfs_trans_handle
*trans
;
5010 struct btrfs_key location
;
5014 mutex_lock(&root
->fs_info
->tree_reloc_mutex
);
5015 ret
= btrfs_find_dead_roots(root
, BTRFS_TREE_RELOC_OBJECTID
, NULL
);
5017 found
= !list_empty(&root
->fs_info
->dead_reloc_roots
);
5018 mutex_unlock(&root
->fs_info
->tree_reloc_mutex
);
5021 trans
= btrfs_start_transaction(root
, 1);
5023 ret
= btrfs_commit_transaction(trans
, root
);
5027 location
.objectid
= BTRFS_DATA_RELOC_TREE_OBJECTID
;
5028 location
.offset
= (u64
)-1;
5029 location
.type
= BTRFS_ROOT_ITEM_KEY
;
5031 reloc_root
= btrfs_read_fs_root_no_name(root
->fs_info
, &location
);
5032 BUG_ON(!reloc_root
);
5033 btrfs_orphan_cleanup(reloc_root
);
5037 static int noinline
init_reloc_tree(struct btrfs_trans_handle
*trans
,
5038 struct btrfs_root
*root
)
5040 struct btrfs_root
*reloc_root
;
5041 struct extent_buffer
*eb
;
5042 struct btrfs_root_item
*root_item
;
5043 struct btrfs_key root_key
;
5046 BUG_ON(!root
->ref_cows
);
5047 if (root
->reloc_root
)
5050 root_item
= kmalloc(sizeof(*root_item
), GFP_NOFS
);
5053 ret
= btrfs_copy_root(trans
, root
, root
->commit_root
,
5054 &eb
, BTRFS_TREE_RELOC_OBJECTID
);
5057 root_key
.objectid
= BTRFS_TREE_RELOC_OBJECTID
;
5058 root_key
.offset
= root
->root_key
.objectid
;
5059 root_key
.type
= BTRFS_ROOT_ITEM_KEY
;
5061 memcpy(root_item
, &root
->root_item
, sizeof(root_item
));
5062 btrfs_set_root_refs(root_item
, 0);
5063 btrfs_set_root_bytenr(root_item
, eb
->start
);
5064 btrfs_set_root_level(root_item
, btrfs_header_level(eb
));
5065 btrfs_set_root_generation(root_item
, trans
->transid
);
5067 btrfs_tree_unlock(eb
);
5068 free_extent_buffer(eb
);
5070 ret
= btrfs_insert_root(trans
, root
->fs_info
->tree_root
,
5071 &root_key
, root_item
);
5075 reloc_root
= btrfs_read_fs_root_no_radix(root
->fs_info
->tree_root
,
5077 BUG_ON(!reloc_root
);
5078 reloc_root
->last_trans
= trans
->transid
;
5079 reloc_root
->commit_root
= NULL
;
5080 reloc_root
->ref_tree
= &root
->fs_info
->reloc_ref_tree
;
5082 root
->reloc_root
= reloc_root
;
5087 * Core function of space balance.
5089 * The idea is using reloc trees to relocate tree blocks in reference
5090 * counted roots. There is one reloc tree for each subvol, and all
5091 * reloc trees share same root key objectid. Reloc trees are snapshots
5092 * of the latest committed roots of subvols (root->commit_root).
5094 * To relocate a tree block referenced by a subvol, there are two steps.
5095 * COW the block through subvol's reloc tree, then update block pointer
5096 * in the subvol to point to the new block. Since all reloc trees share
5097 * same root key objectid, doing special handing for tree blocks owned
5098 * by them is easy. Once a tree block has been COWed in one reloc tree,
5099 * we can use the resulting new block directly when the same block is
5100 * required to COW again through other reloc trees. By this way, relocated
5101 * tree blocks are shared between reloc trees, so they are also shared
5104 static int noinline
relocate_one_path(struct btrfs_trans_handle
*trans
,
5105 struct btrfs_root
*root
,
5106 struct btrfs_path
*path
,
5107 struct btrfs_key
*first_key
,
5108 struct btrfs_ref_path
*ref_path
,
5109 struct btrfs_block_group_cache
*group
,
5110 struct inode
*reloc_inode
)
5112 struct btrfs_root
*reloc_root
;
5113 struct extent_buffer
*eb
= NULL
;
5114 struct btrfs_key
*keys
;
5118 int lowest_level
= 0;
5121 if (ref_path
->owner_objectid
< BTRFS_FIRST_FREE_OBJECTID
)
5122 lowest_level
= ref_path
->owner_objectid
;
5124 if (!root
->ref_cows
) {
5125 path
->lowest_level
= lowest_level
;
5126 ret
= btrfs_search_slot(trans
, root
, first_key
, path
, 0, 1);
5128 path
->lowest_level
= 0;
5129 btrfs_release_path(root
, path
);
5133 mutex_lock(&root
->fs_info
->tree_reloc_mutex
);
5134 ret
= init_reloc_tree(trans
, root
);
5136 reloc_root
= root
->reloc_root
;
5138 shared_level
= ref_path
->shared_level
;
5139 ref_path
->shared_level
= BTRFS_MAX_LEVEL
- 1;
5141 keys
= ref_path
->node_keys
;
5142 nodes
= ref_path
->new_nodes
;
5143 memset(&keys
[shared_level
+ 1], 0,
5144 sizeof(*keys
) * (BTRFS_MAX_LEVEL
- shared_level
- 1));
5145 memset(&nodes
[shared_level
+ 1], 0,
5146 sizeof(*nodes
) * (BTRFS_MAX_LEVEL
- shared_level
- 1));
5148 if (nodes
[lowest_level
] == 0) {
5149 path
->lowest_level
= lowest_level
;
5150 ret
= btrfs_search_slot(trans
, reloc_root
, first_key
, path
,
5153 for (level
= lowest_level
; level
< BTRFS_MAX_LEVEL
; level
++) {
5154 eb
= path
->nodes
[level
];
5155 if (!eb
|| eb
== reloc_root
->node
)
5157 nodes
[level
] = eb
->start
;
5159 btrfs_item_key_to_cpu(eb
, &keys
[level
], 0);
5161 btrfs_node_key_to_cpu(eb
, &keys
[level
], 0);
5164 ref_path
->owner_objectid
>= BTRFS_FIRST_FREE_OBJECTID
) {
5165 eb
= path
->nodes
[0];
5166 ret
= replace_extents_in_leaf(trans
, reloc_root
, eb
,
5167 group
, reloc_inode
);
5170 btrfs_release_path(reloc_root
, path
);
5172 ret
= btrfs_merge_path(trans
, reloc_root
, keys
, nodes
,
5178 * replace tree blocks in the fs tree with tree blocks in
5181 ret
= btrfs_merge_path(trans
, root
, keys
, nodes
, lowest_level
);
5184 if (ref_path
->owner_objectid
>= BTRFS_FIRST_FREE_OBJECTID
) {
5185 ret
= btrfs_search_slot(trans
, reloc_root
, first_key
, path
,
5188 extent_buffer_get(path
->nodes
[0]);
5189 eb
= path
->nodes
[0];
5190 btrfs_release_path(reloc_root
, path
);
5191 ret
= invalidate_extent_cache(reloc_root
, eb
, group
, root
);
5193 free_extent_buffer(eb
);
5196 mutex_unlock(&root
->fs_info
->tree_reloc_mutex
);
5197 path
->lowest_level
= 0;
5201 static int noinline
relocate_tree_block(struct btrfs_trans_handle
*trans
,
5202 struct btrfs_root
*root
,
5203 struct btrfs_path
*path
,
5204 struct btrfs_key
*first_key
,
5205 struct btrfs_ref_path
*ref_path
)
5209 ret
= relocate_one_path(trans
, root
, path
, first_key
,
5210 ref_path
, NULL
, NULL
);
5213 if (root
== root
->fs_info
->extent_root
)
5214 btrfs_extent_post_op(trans
, root
);
5219 static int noinline
del_extent_zero(struct btrfs_trans_handle
*trans
,
5220 struct btrfs_root
*extent_root
,
5221 struct btrfs_path
*path
,
5222 struct btrfs_key
*extent_key
)
5226 ret
= btrfs_search_slot(trans
, extent_root
, extent_key
, path
, -1, 1);
5229 ret
= btrfs_del_item(trans
, extent_root
, path
);
5231 btrfs_release_path(extent_root
, path
);
5235 static struct btrfs_root noinline
*read_ref_root(struct btrfs_fs_info
*fs_info
,
5236 struct btrfs_ref_path
*ref_path
)
5238 struct btrfs_key root_key
;
5240 root_key
.objectid
= ref_path
->root_objectid
;
5241 root_key
.type
= BTRFS_ROOT_ITEM_KEY
;
5242 if (is_cowonly_root(ref_path
->root_objectid
))
5243 root_key
.offset
= 0;
5245 root_key
.offset
= (u64
)-1;
5247 return btrfs_read_fs_root_no_name(fs_info
, &root_key
);
5250 static int noinline
relocate_one_extent(struct btrfs_root
*extent_root
,
5251 struct btrfs_path
*path
,
5252 struct btrfs_key
*extent_key
,
5253 struct btrfs_block_group_cache
*group
,
5254 struct inode
*reloc_inode
, int pass
)
5256 struct btrfs_trans_handle
*trans
;
5257 struct btrfs_root
*found_root
;
5258 struct btrfs_ref_path
*ref_path
= NULL
;
5259 struct disk_extent
*new_extents
= NULL
;
5264 struct btrfs_key first_key
;
5268 trans
= btrfs_start_transaction(extent_root
, 1);
5271 if (extent_key
->objectid
== 0) {
5272 ret
= del_extent_zero(trans
, extent_root
, path
, extent_key
);
5276 ref_path
= kmalloc(sizeof(*ref_path
), GFP_NOFS
);
5282 for (loops
= 0; ; loops
++) {
5284 ret
= btrfs_first_ref_path(trans
, extent_root
, ref_path
,
5285 extent_key
->objectid
);
5287 ret
= btrfs_next_ref_path(trans
, extent_root
, ref_path
);
5294 if (ref_path
->root_objectid
== BTRFS_TREE_LOG_OBJECTID
||
5295 ref_path
->root_objectid
== BTRFS_TREE_RELOC_OBJECTID
)
5298 found_root
= read_ref_root(extent_root
->fs_info
, ref_path
);
5299 BUG_ON(!found_root
);
5301 * for reference counted tree, only process reference paths
5302 * rooted at the latest committed root.
5304 if (found_root
->ref_cows
&&
5305 ref_path
->root_generation
!= found_root
->root_key
.offset
)
5308 if (ref_path
->owner_objectid
>= BTRFS_FIRST_FREE_OBJECTID
) {
5311 * copy data extents to new locations
5313 u64 group_start
= group
->key
.objectid
;
5314 ret
= relocate_data_extent(reloc_inode
,
5323 level
= ref_path
->owner_objectid
;
5326 if (prev_block
!= ref_path
->nodes
[level
]) {
5327 struct extent_buffer
*eb
;
5328 u64 block_start
= ref_path
->nodes
[level
];
5329 u64 block_size
= btrfs_level_size(found_root
, level
);
5331 eb
= read_tree_block(found_root
, block_start
,
5333 btrfs_tree_lock(eb
);
5334 BUG_ON(level
!= btrfs_header_level(eb
));
5337 btrfs_item_key_to_cpu(eb
, &first_key
, 0);
5339 btrfs_node_key_to_cpu(eb
, &first_key
, 0);
5341 btrfs_tree_unlock(eb
);
5342 free_extent_buffer(eb
);
5343 prev_block
= block_start
;
5346 if (ref_path
->owner_objectid
>= BTRFS_FIRST_FREE_OBJECTID
&&
5349 * use fallback method to process the remaining
5353 u64 group_start
= group
->key
.objectid
;
5354 new_extents
= kmalloc(sizeof(*new_extents
),
5357 ret
= get_new_locations(reloc_inode
,
5365 btrfs_record_root_in_trans(found_root
);
5366 ret
= replace_one_extent(trans
, found_root
,
5368 &first_key
, ref_path
,
5369 new_extents
, nr_extents
);
5375 btrfs_record_root_in_trans(found_root
);
5376 if (ref_path
->owner_objectid
< BTRFS_FIRST_FREE_OBJECTID
) {
5377 ret
= relocate_tree_block(trans
, found_root
, path
,
5378 &first_key
, ref_path
);
5381 * try to update data extent references while
5382 * keeping metadata shared between snapshots.
5384 ret
= relocate_one_path(trans
, found_root
, path
,
5385 &first_key
, ref_path
,
5386 group
, reloc_inode
);
5393 btrfs_end_transaction(trans
, extent_root
);
5399 static u64
update_block_group_flags(struct btrfs_root
*root
, u64 flags
)
5402 u64 stripped
= BTRFS_BLOCK_GROUP_RAID0
|
5403 BTRFS_BLOCK_GROUP_RAID1
| BTRFS_BLOCK_GROUP_RAID10
;
5405 num_devices
= root
->fs_info
->fs_devices
->rw_devices
;
5406 if (num_devices
== 1) {
5407 stripped
|= BTRFS_BLOCK_GROUP_DUP
;
5408 stripped
= flags
& ~stripped
;
5410 /* turn raid0 into single device chunks */
5411 if (flags
& BTRFS_BLOCK_GROUP_RAID0
)
5414 /* turn mirroring into duplication */
5415 if (flags
& (BTRFS_BLOCK_GROUP_RAID1
|
5416 BTRFS_BLOCK_GROUP_RAID10
))
5417 return stripped
| BTRFS_BLOCK_GROUP_DUP
;
5420 /* they already had raid on here, just return */
5421 if (flags
& stripped
)
5424 stripped
|= BTRFS_BLOCK_GROUP_DUP
;
5425 stripped
= flags
& ~stripped
;
5427 /* switch duplicated blocks with raid1 */
5428 if (flags
& BTRFS_BLOCK_GROUP_DUP
)
5429 return stripped
| BTRFS_BLOCK_GROUP_RAID1
;
5431 /* turn single device chunks into raid0 */
5432 return stripped
| BTRFS_BLOCK_GROUP_RAID0
;
5437 static int __alloc_chunk_for_shrink(struct btrfs_root
*root
,
5438 struct btrfs_block_group_cache
*shrink_block_group
,
5441 struct btrfs_trans_handle
*trans
;
5442 u64 new_alloc_flags
;
5445 spin_lock(&shrink_block_group
->lock
);
5446 if (btrfs_block_group_used(&shrink_block_group
->item
) > 0) {
5447 spin_unlock(&shrink_block_group
->lock
);
5449 trans
= btrfs_start_transaction(root
, 1);
5450 spin_lock(&shrink_block_group
->lock
);
5452 new_alloc_flags
= update_block_group_flags(root
,
5453 shrink_block_group
->flags
);
5454 if (new_alloc_flags
!= shrink_block_group
->flags
) {
5456 btrfs_block_group_used(&shrink_block_group
->item
);
5458 calc
= shrink_block_group
->key
.offset
;
5460 spin_unlock(&shrink_block_group
->lock
);
5462 do_chunk_alloc(trans
, root
->fs_info
->extent_root
,
5463 calc
+ 2 * 1024 * 1024, new_alloc_flags
, force
);
5465 btrfs_end_transaction(trans
, root
);
5467 spin_unlock(&shrink_block_group
->lock
);
5471 static int __insert_orphan_inode(struct btrfs_trans_handle
*trans
,
5472 struct btrfs_root
*root
,
5473 u64 objectid
, u64 size
)
5475 struct btrfs_path
*path
;
5476 struct btrfs_inode_item
*item
;
5477 struct extent_buffer
*leaf
;
5480 path
= btrfs_alloc_path();
5484 ret
= btrfs_insert_empty_inode(trans
, root
, path
, objectid
);
5488 leaf
= path
->nodes
[0];
5489 item
= btrfs_item_ptr(leaf
, path
->slots
[0], struct btrfs_inode_item
);
5490 memset_extent_buffer(leaf
, 0, (unsigned long)item
, sizeof(*item
));
5491 btrfs_set_inode_generation(leaf
, item
, 1);
5492 btrfs_set_inode_size(leaf
, item
, size
);
5493 btrfs_set_inode_mode(leaf
, item
, S_IFREG
| 0600);
5494 btrfs_set_inode_flags(leaf
, item
, BTRFS_INODE_NODATASUM
|
5495 BTRFS_INODE_NOCOMPRESS
);
5496 btrfs_mark_buffer_dirty(leaf
);
5497 btrfs_release_path(root
, path
);
5499 btrfs_free_path(path
);
5503 static struct inode noinline
*create_reloc_inode(struct btrfs_fs_info
*fs_info
,
5504 struct btrfs_block_group_cache
*group
)
5506 struct inode
*inode
= NULL
;
5507 struct btrfs_trans_handle
*trans
;
5508 struct btrfs_root
*root
;
5509 struct btrfs_key root_key
;
5510 u64 objectid
= BTRFS_FIRST_FREE_OBJECTID
;
5513 root_key
.objectid
= BTRFS_DATA_RELOC_TREE_OBJECTID
;
5514 root_key
.type
= BTRFS_ROOT_ITEM_KEY
;
5515 root_key
.offset
= (u64
)-1;
5516 root
= btrfs_read_fs_root_no_name(fs_info
, &root_key
);
5518 return ERR_CAST(root
);
5520 trans
= btrfs_start_transaction(root
, 1);
5523 err
= btrfs_find_free_objectid(trans
, root
, objectid
, &objectid
);
5527 err
= __insert_orphan_inode(trans
, root
, objectid
, group
->key
.offset
);
5530 err
= btrfs_insert_file_extent(trans
, root
, objectid
, 0, 0, 0,
5531 group
->key
.offset
, 0, group
->key
.offset
,
5535 inode
= btrfs_iget_locked(root
->fs_info
->sb
, objectid
, root
);
5536 if (inode
->i_state
& I_NEW
) {
5537 BTRFS_I(inode
)->root
= root
;
5538 BTRFS_I(inode
)->location
.objectid
= objectid
;
5539 BTRFS_I(inode
)->location
.type
= BTRFS_INODE_ITEM_KEY
;
5540 BTRFS_I(inode
)->location
.offset
= 0;
5541 btrfs_read_locked_inode(inode
);
5542 unlock_new_inode(inode
);
5543 BUG_ON(is_bad_inode(inode
));
5548 err
= btrfs_orphan_add(trans
, inode
);
5550 btrfs_end_transaction(trans
, root
);
5554 inode
= ERR_PTR(err
);
5559 int btrfs_relocate_block_group(struct btrfs_root
*root
, u64 group_start
)
5561 struct btrfs_trans_handle
*trans
;
5562 struct btrfs_path
*path
;
5563 struct btrfs_fs_info
*info
= root
->fs_info
;
5564 struct extent_buffer
*leaf
;
5565 struct inode
*reloc_inode
;
5566 struct btrfs_block_group_cache
*block_group
;
5567 struct btrfs_key key
;
5576 root
= root
->fs_info
->extent_root
;
5578 block_group
= btrfs_lookup_block_group(info
, group_start
);
5579 BUG_ON(!block_group
);
5581 printk("btrfs relocating block group %llu flags %llu\n",
5582 (unsigned long long)block_group
->key
.objectid
,
5583 (unsigned long long)block_group
->flags
);
5585 path
= btrfs_alloc_path();
5588 reloc_inode
= create_reloc_inode(info
, block_group
);
5589 BUG_ON(IS_ERR(reloc_inode
));
5591 __alloc_chunk_for_shrink(root
, block_group
, 1);
5592 set_block_group_readonly(block_group
);
5594 btrfs_start_delalloc_inodes(info
->tree_root
);
5595 btrfs_wait_ordered_extents(info
->tree_root
, 0);
5600 key
.objectid
= block_group
->key
.objectid
;
5603 cur_byte
= key
.objectid
;
5605 trans
= btrfs_start_transaction(info
->tree_root
, 1);
5606 btrfs_commit_transaction(trans
, info
->tree_root
);
5608 mutex_lock(&root
->fs_info
->cleaner_mutex
);
5609 btrfs_clean_old_snapshots(info
->tree_root
);
5610 btrfs_remove_leaf_refs(info
->tree_root
, (u64
)-1, 1);
5611 mutex_unlock(&root
->fs_info
->cleaner_mutex
);
5614 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
5618 leaf
= path
->nodes
[0];
5619 nritems
= btrfs_header_nritems(leaf
);
5620 if (path
->slots
[0] >= nritems
) {
5621 ret
= btrfs_next_leaf(root
, path
);
5628 leaf
= path
->nodes
[0];
5629 nritems
= btrfs_header_nritems(leaf
);
5632 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
5634 if (key
.objectid
>= block_group
->key
.objectid
+
5635 block_group
->key
.offset
)
5638 if (progress
&& need_resched()) {
5639 btrfs_release_path(root
, path
);
5646 if (btrfs_key_type(&key
) != BTRFS_EXTENT_ITEM_KEY
||
5647 key
.objectid
+ key
.offset
<= cur_byte
) {
5653 cur_byte
= key
.objectid
+ key
.offset
;
5654 btrfs_release_path(root
, path
);
5656 __alloc_chunk_for_shrink(root
, block_group
, 0);
5657 ret
= relocate_one_extent(root
, path
, &key
, block_group
,
5663 key
.objectid
= cur_byte
;
5668 btrfs_release_path(root
, path
);
5671 btrfs_wait_ordered_range(reloc_inode
, 0, (u64
)-1);
5672 invalidate_mapping_pages(reloc_inode
->i_mapping
, 0, -1);
5673 WARN_ON(reloc_inode
->i_mapping
->nrpages
);
5676 if (total_found
> 0) {
5677 printk("btrfs found %llu extents in pass %d\n",
5678 (unsigned long long)total_found
, pass
);
5680 if (total_found
== skipped
&& pass
> 2) {
5682 reloc_inode
= create_reloc_inode(info
, block_group
);
5688 /* delete reloc_inode */
5691 /* unpin extents in this range */
5692 trans
= btrfs_start_transaction(info
->tree_root
, 1);
5693 btrfs_commit_transaction(trans
, info
->tree_root
);
5695 spin_lock(&block_group
->lock
);
5696 WARN_ON(block_group
->pinned
> 0);
5697 WARN_ON(block_group
->reserved
> 0);
5698 WARN_ON(btrfs_block_group_used(&block_group
->item
) > 0);
5699 spin_unlock(&block_group
->lock
);
5702 btrfs_free_path(path
);
5706 static int find_first_block_group(struct btrfs_root
*root
,
5707 struct btrfs_path
*path
, struct btrfs_key
*key
)
5710 struct btrfs_key found_key
;
5711 struct extent_buffer
*leaf
;
5714 ret
= btrfs_search_slot(NULL
, root
, key
, path
, 0, 0);
5719 slot
= path
->slots
[0];
5720 leaf
= path
->nodes
[0];
5721 if (slot
>= btrfs_header_nritems(leaf
)) {
5722 ret
= btrfs_next_leaf(root
, path
);
5729 btrfs_item_key_to_cpu(leaf
, &found_key
, slot
);
5731 if (found_key
.objectid
>= key
->objectid
&&
5732 found_key
.type
== BTRFS_BLOCK_GROUP_ITEM_KEY
) {
5743 int btrfs_free_block_groups(struct btrfs_fs_info
*info
)
5745 struct btrfs_block_group_cache
*block_group
;
5748 spin_lock(&info
->block_group_cache_lock
);
5749 while ((n
= rb_last(&info
->block_group_cache_tree
)) != NULL
) {
5750 block_group
= rb_entry(n
, struct btrfs_block_group_cache
,
5752 rb_erase(&block_group
->cache_node
,
5753 &info
->block_group_cache_tree
);
5754 spin_unlock(&info
->block_group_cache_lock
);
5756 btrfs_remove_free_space_cache(block_group
);
5757 down_write(&block_group
->space_info
->groups_sem
);
5758 list_del(&block_group
->list
);
5759 up_write(&block_group
->space_info
->groups_sem
);
5762 spin_lock(&info
->block_group_cache_lock
);
5764 spin_unlock(&info
->block_group_cache_lock
);
5768 int btrfs_read_block_groups(struct btrfs_root
*root
)
5770 struct btrfs_path
*path
;
5772 struct btrfs_block_group_cache
*cache
;
5773 struct btrfs_fs_info
*info
= root
->fs_info
;
5774 struct btrfs_space_info
*space_info
;
5775 struct btrfs_key key
;
5776 struct btrfs_key found_key
;
5777 struct extent_buffer
*leaf
;
5779 root
= info
->extent_root
;
5782 btrfs_set_key_type(&key
, BTRFS_BLOCK_GROUP_ITEM_KEY
);
5783 path
= btrfs_alloc_path();
5788 ret
= find_first_block_group(root
, path
, &key
);
5796 leaf
= path
->nodes
[0];
5797 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
5798 cache
= kzalloc(sizeof(*cache
), GFP_NOFS
);
5804 spin_lock_init(&cache
->lock
);
5805 mutex_init(&cache
->alloc_mutex
);
5806 mutex_init(&cache
->cache_mutex
);
5807 INIT_LIST_HEAD(&cache
->list
);
5808 read_extent_buffer(leaf
, &cache
->item
,
5809 btrfs_item_ptr_offset(leaf
, path
->slots
[0]),
5810 sizeof(cache
->item
));
5811 memcpy(&cache
->key
, &found_key
, sizeof(found_key
));
5813 key
.objectid
= found_key
.objectid
+ found_key
.offset
;
5814 btrfs_release_path(root
, path
);
5815 cache
->flags
= btrfs_block_group_flags(&cache
->item
);
5817 ret
= update_space_info(info
, cache
->flags
, found_key
.offset
,
5818 btrfs_block_group_used(&cache
->item
),
5821 cache
->space_info
= space_info
;
5822 down_write(&space_info
->groups_sem
);
5823 list_add_tail(&cache
->list
, &space_info
->block_groups
);
5824 up_write(&space_info
->groups_sem
);
5826 ret
= btrfs_add_block_group_cache(root
->fs_info
, cache
);
5829 set_avail_alloc_bits(root
->fs_info
, cache
->flags
);
5830 if (btrfs_chunk_readonly(root
, cache
->key
.objectid
))
5831 set_block_group_readonly(cache
);
5835 btrfs_free_path(path
);
5839 int btrfs_make_block_group(struct btrfs_trans_handle
*trans
,
5840 struct btrfs_root
*root
, u64 bytes_used
,
5841 u64 type
, u64 chunk_objectid
, u64 chunk_offset
,
5845 struct btrfs_root
*extent_root
;
5846 struct btrfs_block_group_cache
*cache
;
5848 extent_root
= root
->fs_info
->extent_root
;
5850 root
->fs_info
->last_trans_new_blockgroup
= trans
->transid
;
5852 cache
= kzalloc(sizeof(*cache
), GFP_NOFS
);
5856 cache
->key
.objectid
= chunk_offset
;
5857 cache
->key
.offset
= size
;
5858 spin_lock_init(&cache
->lock
);
5859 mutex_init(&cache
->alloc_mutex
);
5860 mutex_init(&cache
->cache_mutex
);
5861 INIT_LIST_HEAD(&cache
->list
);
5862 btrfs_set_key_type(&cache
->key
, BTRFS_BLOCK_GROUP_ITEM_KEY
);
5864 btrfs_set_block_group_used(&cache
->item
, bytes_used
);
5865 btrfs_set_block_group_chunk_objectid(&cache
->item
, chunk_objectid
);
5866 cache
->flags
= type
;
5867 btrfs_set_block_group_flags(&cache
->item
, type
);
5869 ret
= update_space_info(root
->fs_info
, cache
->flags
, size
, bytes_used
,
5870 &cache
->space_info
);
5872 down_write(&cache
->space_info
->groups_sem
);
5873 list_add_tail(&cache
->list
, &cache
->space_info
->block_groups
);
5874 up_write(&cache
->space_info
->groups_sem
);
5876 ret
= btrfs_add_block_group_cache(root
->fs_info
, cache
);
5879 ret
= btrfs_insert_item(trans
, extent_root
, &cache
->key
, &cache
->item
,
5880 sizeof(cache
->item
));
5883 finish_current_insert(trans
, extent_root
, 0);
5884 ret
= del_pending_extents(trans
, extent_root
, 0);
5886 set_avail_alloc_bits(extent_root
->fs_info
, type
);
5891 int btrfs_remove_block_group(struct btrfs_trans_handle
*trans
,
5892 struct btrfs_root
*root
, u64 group_start
)
5894 struct btrfs_path
*path
;
5895 struct btrfs_block_group_cache
*block_group
;
5896 struct btrfs_key key
;
5899 root
= root
->fs_info
->extent_root
;
5901 block_group
= btrfs_lookup_block_group(root
->fs_info
, group_start
);
5902 BUG_ON(!block_group
);
5903 BUG_ON(!block_group
->ro
);
5905 memcpy(&key
, &block_group
->key
, sizeof(key
));
5907 path
= btrfs_alloc_path();
5910 btrfs_remove_free_space_cache(block_group
);
5911 rb_erase(&block_group
->cache_node
,
5912 &root
->fs_info
->block_group_cache_tree
);
5913 down_write(&block_group
->space_info
->groups_sem
);
5914 list_del(&block_group
->list
);
5915 up_write(&block_group
->space_info
->groups_sem
);
5917 spin_lock(&block_group
->space_info
->lock
);
5918 block_group
->space_info
->total_bytes
-= block_group
->key
.offset
;
5919 block_group
->space_info
->bytes_readonly
-= block_group
->key
.offset
;
5920 spin_unlock(&block_group
->space_info
->lock
);
5921 block_group
->space_info
->full
= 0;
5924 memset(shrink_block_group, 0, sizeof(*shrink_block_group));
5925 kfree(shrink_block_group);
5928 ret
= btrfs_search_slot(trans
, root
, &key
, path
, -1, 1);
5934 ret
= btrfs_del_item(trans
, root
, path
);
5936 btrfs_free_path(path
);