2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
18 #include <linux/sched.h>
19 #include <linux/pagemap.h>
20 #include <linux/writeback.h>
21 #include <linux/blkdev.h>
26 #include "print-tree.h"
27 #include "transaction.h"
30 #include "ref-cache.h"
32 #define PENDING_EXTENT_INSERT 0
33 #define PENDING_EXTENT_DELETE 1
34 #define PENDING_BACKREF_UPDATE 2
36 struct pending_extent_op
{
45 struct list_head list
;
49 static int finish_current_insert(struct btrfs_trans_handle
*trans
, struct
50 btrfs_root
*extent_root
, int all
);
51 static int del_pending_extents(struct btrfs_trans_handle
*trans
, struct
52 btrfs_root
*extent_root
, int all
);
53 static struct btrfs_block_group_cache
*
54 __btrfs_find_block_group(struct btrfs_root
*root
,
55 struct btrfs_block_group_cache
*hint
,
56 u64 search_start
, int data
, int owner
);
57 static int pin_down_bytes(struct btrfs_trans_handle
*trans
,
58 struct btrfs_root
*root
,
59 u64 bytenr
, u64 num_bytes
, int is_data
);
60 static int update_block_group(struct btrfs_trans_handle
*trans
,
61 struct btrfs_root
*root
,
62 u64 bytenr
, u64 num_bytes
, int alloc
,
65 static int block_group_bits(struct btrfs_block_group_cache
*cache
, u64 bits
)
67 return (cache
->flags
& bits
) == bits
;
71 * this adds the block group to the fs_info rb tree for the block group
74 int btrfs_add_block_group_cache(struct btrfs_fs_info
*info
,
75 struct btrfs_block_group_cache
*block_group
)
78 struct rb_node
*parent
= NULL
;
79 struct btrfs_block_group_cache
*cache
;
81 spin_lock(&info
->block_group_cache_lock
);
82 p
= &info
->block_group_cache_tree
.rb_node
;
86 cache
= rb_entry(parent
, struct btrfs_block_group_cache
,
88 if (block_group
->key
.objectid
< cache
->key
.objectid
) {
90 } else if (block_group
->key
.objectid
> cache
->key
.objectid
) {
93 spin_unlock(&info
->block_group_cache_lock
);
98 rb_link_node(&block_group
->cache_node
, parent
, p
);
99 rb_insert_color(&block_group
->cache_node
,
100 &info
->block_group_cache_tree
);
101 spin_unlock(&info
->block_group_cache_lock
);
107 * This will return the block group at or after bytenr if contains is 0, else
108 * it will return the block group that contains the bytenr
110 static struct btrfs_block_group_cache
*
111 block_group_cache_tree_search(struct btrfs_fs_info
*info
, u64 bytenr
,
114 struct btrfs_block_group_cache
*cache
, *ret
= NULL
;
118 spin_lock(&info
->block_group_cache_lock
);
119 n
= info
->block_group_cache_tree
.rb_node
;
122 cache
= rb_entry(n
, struct btrfs_block_group_cache
,
124 end
= cache
->key
.objectid
+ cache
->key
.offset
- 1;
125 start
= cache
->key
.objectid
;
127 if (bytenr
< start
) {
128 if (!contains
&& (!ret
|| start
< ret
->key
.objectid
))
131 } else if (bytenr
> start
) {
132 if (contains
&& bytenr
<= end
) {
142 spin_unlock(&info
->block_group_cache_lock
);
148 * this is only called by cache_block_group, since we could have freed extents
149 * we need to check the pinned_extents for any extents that can't be used yet
150 * since their free space will be released as soon as the transaction commits.
152 static int add_new_free_space(struct btrfs_block_group_cache
*block_group
,
153 struct btrfs_fs_info
*info
, u64 start
, u64 end
)
155 u64 extent_start
, extent_end
, size
;
158 mutex_lock(&info
->pinned_mutex
);
159 while (start
< end
) {
160 ret
= find_first_extent_bit(&info
->pinned_extents
, start
,
161 &extent_start
, &extent_end
,
166 if (extent_start
== start
) {
167 start
= extent_end
+ 1;
168 } else if (extent_start
> start
&& extent_start
< end
) {
169 size
= extent_start
- start
;
170 ret
= btrfs_add_free_space_lock(block_group
, start
,
173 start
= extent_end
+ 1;
181 ret
= btrfs_add_free_space_lock(block_group
, start
, size
);
184 mutex_unlock(&info
->pinned_mutex
);
189 static int cache_block_group(struct btrfs_root
*root
,
190 struct btrfs_block_group_cache
*block_group
)
192 struct btrfs_path
*path
;
194 struct btrfs_key key
;
195 struct extent_buffer
*leaf
;
204 root
= root
->fs_info
->extent_root
;
206 if (block_group
->cached
)
209 path
= btrfs_alloc_path();
215 * we get into deadlocks with paths held by callers of this function.
216 * since the alloc_mutex is protecting things right now, just
217 * skip the locking here
219 path
->skip_locking
= 1;
220 first_free
= max_t(u64
, block_group
->key
.objectid
,
221 BTRFS_SUPER_INFO_OFFSET
+ BTRFS_SUPER_INFO_SIZE
);
222 key
.objectid
= block_group
->key
.objectid
;
224 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
225 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
228 ret
= btrfs_previous_item(root
, path
, 0, BTRFS_EXTENT_ITEM_KEY
);
232 leaf
= path
->nodes
[0];
233 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
234 if (key
.objectid
+ key
.offset
> first_free
)
235 first_free
= key
.objectid
+ key
.offset
;
238 leaf
= path
->nodes
[0];
239 slot
= path
->slots
[0];
240 if (slot
>= btrfs_header_nritems(leaf
)) {
241 ret
= btrfs_next_leaf(root
, path
);
249 btrfs_item_key_to_cpu(leaf
, &key
, slot
);
250 if (key
.objectid
< block_group
->key
.objectid
)
253 if (key
.objectid
>= block_group
->key
.objectid
+
254 block_group
->key
.offset
)
257 if (btrfs_key_type(&key
) == BTRFS_EXTENT_ITEM_KEY
) {
263 add_new_free_space(block_group
, root
->fs_info
, last
,
266 last
= key
.objectid
+ key
.offset
;
275 add_new_free_space(block_group
, root
->fs_info
, last
,
276 block_group
->key
.objectid
+
277 block_group
->key
.offset
);
279 block_group
->cached
= 1;
282 btrfs_free_path(path
);
287 * return the block group that starts at or after bytenr
289 struct btrfs_block_group_cache
*btrfs_lookup_first_block_group(struct
293 struct btrfs_block_group_cache
*cache
;
295 cache
= block_group_cache_tree_search(info
, bytenr
, 0);
301 * return the block group that contains teh given bytenr
303 struct btrfs_block_group_cache
*btrfs_lookup_block_group(struct
307 struct btrfs_block_group_cache
*cache
;
309 cache
= block_group_cache_tree_search(info
, bytenr
, 1);
314 static struct btrfs_space_info
*__find_space_info(struct btrfs_fs_info
*info
,
317 struct list_head
*head
= &info
->space_info
;
318 struct list_head
*cur
;
319 struct btrfs_space_info
*found
;
320 list_for_each(cur
, head
) {
321 found
= list_entry(cur
, struct btrfs_space_info
, list
);
322 if (found
->flags
== flags
)
328 static u64
div_factor(u64 num
, int factor
)
337 static struct btrfs_block_group_cache
*
338 __btrfs_find_block_group(struct btrfs_root
*root
,
339 struct btrfs_block_group_cache
*hint
,
340 u64 search_start
, int data
, int owner
)
342 struct btrfs_block_group_cache
*cache
;
343 struct btrfs_block_group_cache
*found_group
= NULL
;
344 struct btrfs_fs_info
*info
= root
->fs_info
;
352 if (data
& BTRFS_BLOCK_GROUP_METADATA
)
356 struct btrfs_block_group_cache
*shint
;
357 shint
= btrfs_lookup_first_block_group(info
, search_start
);
358 if (shint
&& block_group_bits(shint
, data
)) {
359 spin_lock(&shint
->lock
);
360 used
= btrfs_block_group_used(&shint
->item
);
361 if (used
+ shint
->pinned
+ shint
->reserved
<
362 div_factor(shint
->key
.offset
, factor
)) {
363 spin_unlock(&shint
->lock
);
366 spin_unlock(&shint
->lock
);
369 if (hint
&& block_group_bits(hint
, data
)) {
370 spin_lock(&hint
->lock
);
371 used
= btrfs_block_group_used(&hint
->item
);
372 if (used
+ hint
->pinned
+ hint
->reserved
<
373 div_factor(hint
->key
.offset
, factor
)) {
374 spin_unlock(&hint
->lock
);
377 spin_unlock(&hint
->lock
);
378 last
= hint
->key
.objectid
+ hint
->key
.offset
;
381 last
= max(hint
->key
.objectid
, search_start
);
387 cache
= btrfs_lookup_first_block_group(root
->fs_info
, last
);
391 spin_lock(&cache
->lock
);
392 last
= cache
->key
.objectid
+ cache
->key
.offset
;
393 used
= btrfs_block_group_used(&cache
->item
);
395 if (block_group_bits(cache
, data
)) {
396 free_check
= div_factor(cache
->key
.offset
, factor
);
397 if (used
+ cache
->pinned
+ cache
->reserved
<
400 spin_unlock(&cache
->lock
);
404 spin_unlock(&cache
->lock
);
412 if (!full_search
&& factor
< 10) {
422 struct btrfs_block_group_cache
*btrfs_find_block_group(struct btrfs_root
*root
,
423 struct btrfs_block_group_cache
424 *hint
, u64 search_start
,
428 struct btrfs_block_group_cache
*ret
;
429 ret
= __btrfs_find_block_group(root
, hint
, search_start
, data
, owner
);
433 /* simple helper to search for an existing extent at a given offset */
434 int btrfs_lookup_extent(struct btrfs_root
*root
, u64 start
, u64 len
)
437 struct btrfs_key key
;
438 struct btrfs_path
*path
;
440 path
= btrfs_alloc_path();
442 key
.objectid
= start
;
444 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
445 ret
= btrfs_search_slot(NULL
, root
->fs_info
->extent_root
, &key
, path
,
447 btrfs_free_path(path
);
452 * Back reference rules. Back refs have three main goals:
454 * 1) differentiate between all holders of references to an extent so that
455 * when a reference is dropped we can make sure it was a valid reference
456 * before freeing the extent.
458 * 2) Provide enough information to quickly find the holders of an extent
459 * if we notice a given block is corrupted or bad.
461 * 3) Make it easy to migrate blocks for FS shrinking or storage pool
462 * maintenance. This is actually the same as #2, but with a slightly
463 * different use case.
465 * File extents can be referenced by:
467 * - multiple snapshots, subvolumes, or different generations in one subvol
468 * - different files inside a single subvolume
469 * - different offsets inside a file (bookend extents in file.c)
471 * The extent ref structure has fields for:
473 * - Objectid of the subvolume root
474 * - Generation number of the tree holding the reference
475 * - objectid of the file holding the reference
476 * - number of references holding by parent node (alway 1 for tree blocks)
478 * Btree leaf may hold multiple references to a file extent. In most cases,
479 * these references are from same file and the corresponding offsets inside
480 * the file are close together.
482 * When a file extent is allocated the fields are filled in:
483 * (root_key.objectid, trans->transid, inode objectid, 1)
485 * When a leaf is cow'd new references are added for every file extent found
486 * in the leaf. It looks similar to the create case, but trans->transid will
487 * be different when the block is cow'd.
489 * (root_key.objectid, trans->transid, inode objectid,
490 * number of references in the leaf)
492 * When a file extent is removed either during snapshot deletion or
493 * file truncation, we find the corresponding back reference and check
494 * the following fields:
496 * (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
499 * Btree extents can be referenced by:
501 * - Different subvolumes
502 * - Different generations of the same subvolume
504 * When a tree block is created, back references are inserted:
506 * (root->root_key.objectid, trans->transid, level, 1)
508 * When a tree block is cow'd, new back references are added for all the
509 * blocks it points to. If the tree block isn't in reference counted root,
510 * the old back references are removed. These new back references are of
511 * the form (trans->transid will have increased since creation):
513 * (root->root_key.objectid, trans->transid, level, 1)
515 * When a backref is in deleting, the following fields are checked:
517 * if backref was for a tree root:
518 * (btrfs_header_owner(itself), btrfs_header_generation(itself), level)
520 * (btrfs_header_owner(parent), btrfs_header_generation(parent), level)
522 * Back Reference Key composing:
524 * The key objectid corresponds to the first byte in the extent, the key
525 * type is set to BTRFS_EXTENT_REF_KEY, and the key offset is the first
526 * byte of parent extent. If a extent is tree root, the key offset is set
527 * to the key objectid.
530 static int noinline
lookup_extent_backref(struct btrfs_trans_handle
*trans
,
531 struct btrfs_root
*root
,
532 struct btrfs_path
*path
,
533 u64 bytenr
, u64 parent
,
534 u64 ref_root
, u64 ref_generation
,
535 u64 owner_objectid
, int del
)
537 struct btrfs_key key
;
538 struct btrfs_extent_ref
*ref
;
539 struct extent_buffer
*leaf
;
543 key
.objectid
= bytenr
;
544 key
.type
= BTRFS_EXTENT_REF_KEY
;
547 ret
= btrfs_search_slot(trans
, root
, &key
, path
, del
? -1 : 0, 1);
555 leaf
= path
->nodes
[0];
556 ref
= btrfs_item_ptr(leaf
, path
->slots
[0], struct btrfs_extent_ref
);
557 ref_objectid
= btrfs_ref_objectid(leaf
, ref
);
558 if (btrfs_ref_root(leaf
, ref
) != ref_root
||
559 btrfs_ref_generation(leaf
, ref
) != ref_generation
||
560 (ref_objectid
!= owner_objectid
&&
561 ref_objectid
!= BTRFS_MULTIPLE_OBJECTIDS
)) {
572 * updates all the backrefs that are pending on update_list for the
575 static int noinline
update_backrefs(struct btrfs_trans_handle
*trans
,
576 struct btrfs_root
*extent_root
,
577 struct btrfs_path
*path
,
578 struct list_head
*update_list
)
580 struct btrfs_key key
;
581 struct btrfs_extent_ref
*ref
;
582 struct btrfs_fs_info
*info
= extent_root
->fs_info
;
583 struct pending_extent_op
*op
;
584 struct extent_buffer
*leaf
;
586 struct list_head
*cur
= update_list
->next
;
588 u64 ref_root
= extent_root
->root_key
.objectid
;
590 op
= list_entry(cur
, struct pending_extent_op
, list
);
593 key
.objectid
= op
->bytenr
;
594 key
.type
= BTRFS_EXTENT_REF_KEY
;
595 key
.offset
= op
->orig_parent
;
597 ret
= btrfs_search_slot(trans
, extent_root
, &key
, path
, 0, 1);
600 leaf
= path
->nodes
[0];
603 ref
= btrfs_item_ptr(leaf
, path
->slots
[0], struct btrfs_extent_ref
);
605 ref_objectid
= btrfs_ref_objectid(leaf
, ref
);
607 if (btrfs_ref_root(leaf
, ref
) != ref_root
||
608 btrfs_ref_generation(leaf
, ref
) != op
->orig_generation
||
609 (ref_objectid
!= op
->level
&&
610 ref_objectid
!= BTRFS_MULTIPLE_OBJECTIDS
)) {
611 printk(KERN_ERR
"couldn't find %Lu, parent %Lu, root %Lu, "
612 "owner %u\n", op
->bytenr
, op
->orig_parent
,
613 ref_root
, op
->level
);
614 btrfs_print_leaf(extent_root
, leaf
);
618 key
.objectid
= op
->bytenr
;
619 key
.offset
= op
->parent
;
620 key
.type
= BTRFS_EXTENT_REF_KEY
;
621 ret
= btrfs_set_item_key_safe(trans
, extent_root
, path
, &key
);
623 ref
= btrfs_item_ptr(leaf
, path
->slots
[0], struct btrfs_extent_ref
);
624 btrfs_set_ref_generation(leaf
, ref
, op
->generation
);
628 list_del_init(&op
->list
);
629 unlock_extent(&info
->extent_ins
, op
->bytenr
,
630 op
->bytenr
+ op
->num_bytes
- 1, GFP_NOFS
);
633 if (cur
== update_list
) {
634 btrfs_mark_buffer_dirty(path
->nodes
[0]);
635 btrfs_release_path(extent_root
, path
);
639 op
= list_entry(cur
, struct pending_extent_op
, list
);
642 while (path
->slots
[0] < btrfs_header_nritems(leaf
)) {
643 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
644 if (key
.objectid
== op
->bytenr
&&
645 key
.type
== BTRFS_EXTENT_REF_KEY
)
650 btrfs_mark_buffer_dirty(path
->nodes
[0]);
651 btrfs_release_path(extent_root
, path
);
658 static int noinline
insert_extents(struct btrfs_trans_handle
*trans
,
659 struct btrfs_root
*extent_root
,
660 struct btrfs_path
*path
,
661 struct list_head
*insert_list
, int nr
)
663 struct btrfs_key
*keys
;
665 struct pending_extent_op
*op
;
666 struct extent_buffer
*leaf
;
667 struct list_head
*cur
= insert_list
->next
;
668 struct btrfs_fs_info
*info
= extent_root
->fs_info
;
669 u64 ref_root
= extent_root
->root_key
.objectid
;
670 int i
= 0, last
= 0, ret
;
676 keys
= kzalloc(total
* sizeof(struct btrfs_key
), GFP_NOFS
);
680 data_size
= kzalloc(total
* sizeof(u32
), GFP_NOFS
);
686 list_for_each_entry(op
, insert_list
, list
) {
687 keys
[i
].objectid
= op
->bytenr
;
688 keys
[i
].offset
= op
->num_bytes
;
689 keys
[i
].type
= BTRFS_EXTENT_ITEM_KEY
;
690 data_size
[i
] = sizeof(struct btrfs_extent_item
);
693 keys
[i
].objectid
= op
->bytenr
;
694 keys
[i
].offset
= op
->parent
;
695 keys
[i
].type
= BTRFS_EXTENT_REF_KEY
;
696 data_size
[i
] = sizeof(struct btrfs_extent_ref
);
700 op
= list_entry(cur
, struct pending_extent_op
, list
);
704 ret
= btrfs_insert_some_items(trans
, extent_root
, path
,
705 keys
+i
, data_size
+i
, total
-i
);
711 leaf
= path
->nodes
[0];
712 for (c
= 0; c
< ret
; c
++) {
713 int ref_first
= keys
[i
].type
== BTRFS_EXTENT_REF_KEY
;
716 * if the first item we inserted was a backref, then
717 * the EXTENT_ITEM will be the odd c's, else it will
720 if ((ref_first
&& (c
% 2)) ||
721 (!ref_first
&& !(c
% 2))) {
722 struct btrfs_extent_item
*itm
;
724 itm
= btrfs_item_ptr(leaf
, path
->slots
[0] + c
,
725 struct btrfs_extent_item
);
726 btrfs_set_extent_refs(path
->nodes
[0], itm
, 1);
729 struct btrfs_extent_ref
*ref
;
731 ref
= btrfs_item_ptr(leaf
, path
->slots
[0] + c
,
732 struct btrfs_extent_ref
);
733 btrfs_set_ref_root(leaf
, ref
, ref_root
);
734 btrfs_set_ref_generation(leaf
, ref
,
736 btrfs_set_ref_objectid(leaf
, ref
, op
->level
);
737 btrfs_set_ref_num_refs(leaf
, ref
, 1);
742 * using del to see when its ok to free up the
743 * pending_extent_op. In the case where we insert the
744 * last item on the list in order to help do batching
745 * we need to not free the extent op until we actually
746 * insert the extent_item
749 unlock_extent(&info
->extent_ins
, op
->bytenr
,
750 op
->bytenr
+ op
->num_bytes
- 1,
753 list_del_init(&op
->list
);
755 if (cur
!= insert_list
)
757 struct pending_extent_op
,
761 btrfs_mark_buffer_dirty(leaf
);
762 btrfs_release_path(extent_root
, path
);
765 * Ok backref's and items usually go right next to eachother,
766 * but if we could only insert 1 item that means that we
767 * inserted on the end of a leaf, and we have no idea what may
768 * be on the next leaf so we just play it safe. In order to
769 * try and help this case we insert the last thing on our
770 * insert list so hopefully it will end up being the last
771 * thing on the leaf and everything else will be before it,
772 * which will let us insert a whole bunch of items at the same
775 if (ret
== 1 && !last
&& (i
+ ret
< total
)) {
777 * last: where we will pick up the next time around
778 * i: our current key to insert, will be total - 1
779 * cur: the current op we are screwing with
784 cur
= insert_list
->prev
;
785 op
= list_entry(cur
, struct pending_extent_op
, list
);
788 * ok we successfully inserted the last item on the
789 * list, lets reset everything
791 * i: our current key to insert, so where we left off
793 * last: done with this
794 * cur: the op we are messing with
796 * total: since we inserted the last key, we need to
797 * decrement total so we dont overflow
803 cur
= insert_list
->next
;
804 op
= list_entry(cur
, struct pending_extent_op
,
819 static int noinline
insert_extent_backref(struct btrfs_trans_handle
*trans
,
820 struct btrfs_root
*root
,
821 struct btrfs_path
*path
,
822 u64 bytenr
, u64 parent
,
823 u64 ref_root
, u64 ref_generation
,
826 struct btrfs_key key
;
827 struct extent_buffer
*leaf
;
828 struct btrfs_extent_ref
*ref
;
832 key
.objectid
= bytenr
;
833 key
.type
= BTRFS_EXTENT_REF_KEY
;
836 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
, sizeof(*ref
));
838 leaf
= path
->nodes
[0];
839 ref
= btrfs_item_ptr(leaf
, path
->slots
[0],
840 struct btrfs_extent_ref
);
841 btrfs_set_ref_root(leaf
, ref
, ref_root
);
842 btrfs_set_ref_generation(leaf
, ref
, ref_generation
);
843 btrfs_set_ref_objectid(leaf
, ref
, owner_objectid
);
844 btrfs_set_ref_num_refs(leaf
, ref
, 1);
845 } else if (ret
== -EEXIST
) {
847 BUG_ON(owner_objectid
< BTRFS_FIRST_FREE_OBJECTID
);
848 leaf
= path
->nodes
[0];
849 ref
= btrfs_item_ptr(leaf
, path
->slots
[0],
850 struct btrfs_extent_ref
);
851 if (btrfs_ref_root(leaf
, ref
) != ref_root
||
852 btrfs_ref_generation(leaf
, ref
) != ref_generation
) {
858 num_refs
= btrfs_ref_num_refs(leaf
, ref
);
859 BUG_ON(num_refs
== 0);
860 btrfs_set_ref_num_refs(leaf
, ref
, num_refs
+ 1);
862 existing_owner
= btrfs_ref_objectid(leaf
, ref
);
863 if (existing_owner
!= owner_objectid
&&
864 existing_owner
!= BTRFS_MULTIPLE_OBJECTIDS
) {
865 btrfs_set_ref_objectid(leaf
, ref
,
866 BTRFS_MULTIPLE_OBJECTIDS
);
872 btrfs_mark_buffer_dirty(path
->nodes
[0]);
874 btrfs_release_path(root
, path
);
878 static int noinline
remove_extent_backref(struct btrfs_trans_handle
*trans
,
879 struct btrfs_root
*root
,
880 struct btrfs_path
*path
)
882 struct extent_buffer
*leaf
;
883 struct btrfs_extent_ref
*ref
;
887 leaf
= path
->nodes
[0];
888 ref
= btrfs_item_ptr(leaf
, path
->slots
[0], struct btrfs_extent_ref
);
889 num_refs
= btrfs_ref_num_refs(leaf
, ref
);
890 BUG_ON(num_refs
== 0);
893 ret
= btrfs_del_item(trans
, root
, path
);
895 btrfs_set_ref_num_refs(leaf
, ref
, num_refs
);
896 btrfs_mark_buffer_dirty(leaf
);
898 btrfs_release_path(root
, path
);
902 static int noinline
free_extents(struct btrfs_trans_handle
*trans
,
903 struct btrfs_root
*extent_root
,
904 struct list_head
*del_list
)
906 struct btrfs_fs_info
*info
= extent_root
->fs_info
;
907 struct btrfs_path
*path
;
908 struct btrfs_key key
, found_key
;
909 struct extent_buffer
*leaf
;
910 struct list_head
*cur
;
911 struct pending_extent_op
*op
;
912 struct btrfs_extent_item
*ei
;
913 int ret
, num_to_del
, extent_slot
= 0, found_extent
= 0;
917 path
= btrfs_alloc_path();
923 /* search for the backref for the current ref we want to delete */
924 cur
= del_list
->next
;
925 op
= list_entry(cur
, struct pending_extent_op
, list
);
926 ret
= lookup_extent_backref(trans
, extent_root
, path
, op
->bytenr
,
928 extent_root
->root_key
.objectid
,
929 op
->orig_generation
, op
->level
, 1);
931 printk("Unable to find backref byte nr %Lu root %Lu gen %Lu "
932 "owner %u\n", op
->bytenr
,
933 extent_root
->root_key
.objectid
, op
->orig_generation
,
935 btrfs_print_leaf(extent_root
, path
->nodes
[0]);
940 extent_slot
= path
->slots
[0];
945 * if we aren't the first item on the leaf we can move back one and see
946 * if our ref is right next to our extent item
948 if (likely(extent_slot
)) {
950 btrfs_item_key_to_cpu(path
->nodes
[0], &found_key
,
952 if (found_key
.objectid
== op
->bytenr
&&
953 found_key
.type
== BTRFS_EXTENT_ITEM_KEY
&&
954 found_key
.offset
== op
->num_bytes
) {
961 * if we didn't find the extent we need to delete the backref and then
962 * search for the extent item key so we can update its ref count
965 key
.objectid
= op
->bytenr
;
966 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
967 key
.offset
= op
->num_bytes
;
969 ret
= remove_extent_backref(trans
, extent_root
, path
);
971 btrfs_release_path(extent_root
, path
);
972 ret
= btrfs_search_slot(trans
, extent_root
, &key
, path
, -1, 1);
974 extent_slot
= path
->slots
[0];
977 /* this is where we update the ref count for the extent */
978 leaf
= path
->nodes
[0];
979 ei
= btrfs_item_ptr(leaf
, extent_slot
, struct btrfs_extent_item
);
980 refs
= btrfs_extent_refs(leaf
, ei
);
983 btrfs_set_extent_refs(leaf
, ei
, refs
);
985 btrfs_mark_buffer_dirty(leaf
);
988 * This extent needs deleting. The reason cur_slot is extent_slot +
989 * num_to_del is because extent_slot points to the slot where the extent
990 * is, and if the backref was not right next to the extent we will be
991 * deleting at least 1 item, and will want to start searching at the
992 * slot directly next to extent_slot. However if we did find the
993 * backref next to the extent item them we will be deleting at least 2
994 * items and will want to start searching directly after the ref slot
997 struct list_head
*pos
, *n
, *end
;
998 int cur_slot
= extent_slot
+num_to_del
;
1002 path
->slots
[0] = extent_slot
;
1003 bytes_freed
= op
->num_bytes
;
1005 mutex_lock(&info
->pinned_mutex
);
1006 ret
= pin_down_bytes(trans
, extent_root
, op
->bytenr
,
1007 op
->num_bytes
, op
->level
>=
1008 BTRFS_FIRST_FREE_OBJECTID
);
1009 mutex_unlock(&info
->pinned_mutex
);
1014 * we need to see if we can delete multiple things at once, so
1015 * start looping through the list of extents we are wanting to
1016 * delete and see if their extent/backref's are right next to
1017 * eachother and the extents only have 1 ref
1019 for (pos
= cur
->next
; pos
!= del_list
; pos
= pos
->next
) {
1020 struct pending_extent_op
*tmp
;
1022 tmp
= list_entry(pos
, struct pending_extent_op
, list
);
1024 /* we only want to delete extent+ref at this stage */
1025 if (cur_slot
>= btrfs_header_nritems(leaf
) - 1)
1028 btrfs_item_key_to_cpu(leaf
, &found_key
, cur_slot
);
1029 if (found_key
.objectid
!= tmp
->bytenr
||
1030 found_key
.type
!= BTRFS_EXTENT_ITEM_KEY
||
1031 found_key
.offset
!= tmp
->num_bytes
)
1034 /* check to make sure this extent only has one ref */
1035 ei
= btrfs_item_ptr(leaf
, cur_slot
,
1036 struct btrfs_extent_item
);
1037 if (btrfs_extent_refs(leaf
, ei
) != 1)
1040 btrfs_item_key_to_cpu(leaf
, &found_key
, cur_slot
+1);
1041 if (found_key
.objectid
!= tmp
->bytenr
||
1042 found_key
.type
!= BTRFS_EXTENT_REF_KEY
||
1043 found_key
.offset
!= tmp
->orig_parent
)
1047 * the ref is right next to the extent, we can set the
1048 * ref count to 0 since we will delete them both now
1050 btrfs_set_extent_refs(leaf
, ei
, 0);
1052 /* pin down the bytes for this extent */
1053 mutex_lock(&info
->pinned_mutex
);
1054 ret
= pin_down_bytes(trans
, extent_root
, tmp
->bytenr
,
1055 tmp
->num_bytes
, tmp
->level
>=
1056 BTRFS_FIRST_FREE_OBJECTID
);
1057 mutex_unlock(&info
->pinned_mutex
);
1061 * use the del field to tell if we need to go ahead and
1062 * free up the extent when we delete the item or not.
1065 bytes_freed
+= tmp
->num_bytes
;
1072 /* update the free space counters */
1073 spin_lock_irq(&info
->delalloc_lock
);
1074 super_used
= btrfs_super_bytes_used(&info
->super_copy
);
1075 btrfs_set_super_bytes_used(&info
->super_copy
,
1076 super_used
- bytes_freed
);
1077 spin_unlock_irq(&info
->delalloc_lock
);
1079 root_used
= btrfs_root_used(&extent_root
->root_item
);
1080 btrfs_set_root_used(&extent_root
->root_item
,
1081 root_used
- bytes_freed
);
1083 /* delete the items */
1084 ret
= btrfs_del_items(trans
, extent_root
, path
,
1085 path
->slots
[0], num_to_del
);
1089 * loop through the extents we deleted and do the cleanup work
1092 for (pos
= cur
, n
= pos
->next
; pos
!= end
;
1093 pos
= n
, n
= pos
->next
) {
1094 struct pending_extent_op
*tmp
;
1095 #ifdef BIO_RW_DISCARD
1097 struct btrfs_multi_bio
*multi
= NULL
;
1099 tmp
= list_entry(pos
, struct pending_extent_op
, list
);
1102 * remember tmp->del tells us wether or not we pinned
1105 ret
= update_block_group(trans
, extent_root
,
1106 tmp
->bytenr
, tmp
->num_bytes
, 0,
1110 #ifdef BIO_RW_DISCARD
1111 ret
= btrfs_map_block(&info
->mapping_tree
, READ
,
1112 tmp
->bytenr
, &map_length
, &multi
,
1115 struct btrfs_bio_stripe
*stripe
;
1118 stripe
= multi
->stripe
;
1120 if (map_length
> tmp
->num_bytes
)
1121 map_length
= tmp
->num_bytes
;
1123 for (i
= 0; i
< multi
->num_stripes
;
1125 blkdev_issue_discard(stripe
->dev
->bdev
,
1126 stripe
->physical
>> 9,
1131 list_del_init(&tmp
->list
);
1132 unlock_extent(&info
->extent_ins
, tmp
->bytenr
,
1133 tmp
->bytenr
+ tmp
->num_bytes
- 1,
1137 } else if (refs
&& found_extent
) {
1139 * the ref and extent were right next to eachother, but the
1140 * extent still has a ref, so just free the backref and keep
1143 ret
= remove_extent_backref(trans
, extent_root
, path
);
1146 list_del_init(&op
->list
);
1147 unlock_extent(&info
->extent_ins
, op
->bytenr
,
1148 op
->bytenr
+ op
->num_bytes
- 1, GFP_NOFS
);
1152 * the extent has multiple refs and the backref we were looking
1153 * for was not right next to it, so just unlock and go next,
1156 list_del_init(&op
->list
);
1157 unlock_extent(&info
->extent_ins
, op
->bytenr
,
1158 op
->bytenr
+ op
->num_bytes
- 1, GFP_NOFS
);
1162 btrfs_release_path(extent_root
, path
);
1163 if (!list_empty(del_list
))
1167 btrfs_free_path(path
);
1171 static int __btrfs_update_extent_ref(struct btrfs_trans_handle
*trans
,
1172 struct btrfs_root
*root
, u64 bytenr
,
1173 u64 orig_parent
, u64 parent
,
1174 u64 orig_root
, u64 ref_root
,
1175 u64 orig_generation
, u64 ref_generation
,
1179 struct btrfs_root
*extent_root
= root
->fs_info
->extent_root
;
1180 struct btrfs_path
*path
;
1182 if (root
== root
->fs_info
->extent_root
) {
1183 struct pending_extent_op
*extent_op
;
1186 BUG_ON(owner_objectid
>= BTRFS_MAX_LEVEL
);
1187 num_bytes
= btrfs_level_size(root
, (int)owner_objectid
);
1188 mutex_lock(&root
->fs_info
->extent_ins_mutex
);
1189 if (test_range_bit(&root
->fs_info
->extent_ins
, bytenr
,
1190 bytenr
+ num_bytes
- 1, EXTENT_WRITEBACK
, 0)) {
1192 ret
= get_state_private(&root
->fs_info
->extent_ins
,
1195 extent_op
= (struct pending_extent_op
*)
1196 (unsigned long)priv
;
1197 BUG_ON(extent_op
->parent
!= orig_parent
);
1198 BUG_ON(extent_op
->generation
!= orig_generation
);
1200 extent_op
->parent
= parent
;
1201 extent_op
->generation
= ref_generation
;
1203 extent_op
= kmalloc(sizeof(*extent_op
), GFP_NOFS
);
1206 extent_op
->type
= PENDING_BACKREF_UPDATE
;
1207 extent_op
->bytenr
= bytenr
;
1208 extent_op
->num_bytes
= num_bytes
;
1209 extent_op
->parent
= parent
;
1210 extent_op
->orig_parent
= orig_parent
;
1211 extent_op
->generation
= ref_generation
;
1212 extent_op
->orig_generation
= orig_generation
;
1213 extent_op
->level
= (int)owner_objectid
;
1214 INIT_LIST_HEAD(&extent_op
->list
);
1217 set_extent_bits(&root
->fs_info
->extent_ins
,
1218 bytenr
, bytenr
+ num_bytes
- 1,
1219 EXTENT_WRITEBACK
, GFP_NOFS
);
1220 set_state_private(&root
->fs_info
->extent_ins
,
1221 bytenr
, (unsigned long)extent_op
);
1223 mutex_unlock(&root
->fs_info
->extent_ins_mutex
);
1227 path
= btrfs_alloc_path();
1230 ret
= lookup_extent_backref(trans
, extent_root
, path
,
1231 bytenr
, orig_parent
, orig_root
,
1232 orig_generation
, owner_objectid
, 1);
1235 ret
= remove_extent_backref(trans
, extent_root
, path
);
1238 ret
= insert_extent_backref(trans
, extent_root
, path
, bytenr
,
1239 parent
, ref_root
, ref_generation
,
1242 finish_current_insert(trans
, extent_root
, 0);
1243 del_pending_extents(trans
, extent_root
, 0);
1245 btrfs_free_path(path
);
1249 int btrfs_update_extent_ref(struct btrfs_trans_handle
*trans
,
1250 struct btrfs_root
*root
, u64 bytenr
,
1251 u64 orig_parent
, u64 parent
,
1252 u64 ref_root
, u64 ref_generation
,
1256 if (ref_root
== BTRFS_TREE_LOG_OBJECTID
&&
1257 owner_objectid
< BTRFS_FIRST_FREE_OBJECTID
)
1259 ret
= __btrfs_update_extent_ref(trans
, root
, bytenr
, orig_parent
,
1260 parent
, ref_root
, ref_root
,
1261 ref_generation
, ref_generation
,
1266 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle
*trans
,
1267 struct btrfs_root
*root
, u64 bytenr
,
1268 u64 orig_parent
, u64 parent
,
1269 u64 orig_root
, u64 ref_root
,
1270 u64 orig_generation
, u64 ref_generation
,
1273 struct btrfs_path
*path
;
1275 struct btrfs_key key
;
1276 struct extent_buffer
*l
;
1277 struct btrfs_extent_item
*item
;
1280 path
= btrfs_alloc_path();
1285 key
.objectid
= bytenr
;
1286 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
1287 key
.offset
= (u64
)-1;
1289 ret
= btrfs_search_slot(trans
, root
->fs_info
->extent_root
, &key
, path
,
1293 BUG_ON(ret
== 0 || path
->slots
[0] == 0);
1298 btrfs_item_key_to_cpu(l
, &key
, path
->slots
[0]);
1299 if (key
.objectid
!= bytenr
) {
1300 btrfs_print_leaf(root
->fs_info
->extent_root
, path
->nodes
[0]);
1301 printk("wanted %Lu found %Lu\n", bytenr
, key
.objectid
);
1304 BUG_ON(key
.type
!= BTRFS_EXTENT_ITEM_KEY
);
1306 item
= btrfs_item_ptr(l
, path
->slots
[0], struct btrfs_extent_item
);
1307 refs
= btrfs_extent_refs(l
, item
);
1308 btrfs_set_extent_refs(l
, item
, refs
+ 1);
1309 btrfs_mark_buffer_dirty(path
->nodes
[0]);
1311 btrfs_release_path(root
->fs_info
->extent_root
, path
);
1314 ret
= insert_extent_backref(trans
, root
->fs_info
->extent_root
,
1315 path
, bytenr
, parent
,
1316 ref_root
, ref_generation
,
1319 finish_current_insert(trans
, root
->fs_info
->extent_root
, 0);
1320 del_pending_extents(trans
, root
->fs_info
->extent_root
, 0);
1322 btrfs_free_path(path
);
1326 int btrfs_inc_extent_ref(struct btrfs_trans_handle
*trans
,
1327 struct btrfs_root
*root
,
1328 u64 bytenr
, u64 num_bytes
, u64 parent
,
1329 u64 ref_root
, u64 ref_generation
,
1333 if (ref_root
== BTRFS_TREE_LOG_OBJECTID
&&
1334 owner_objectid
< BTRFS_FIRST_FREE_OBJECTID
)
1336 ret
= __btrfs_inc_extent_ref(trans
, root
, bytenr
, 0, parent
,
1337 0, ref_root
, 0, ref_generation
,
1342 int btrfs_extent_post_op(struct btrfs_trans_handle
*trans
,
1343 struct btrfs_root
*root
)
1345 finish_current_insert(trans
, root
->fs_info
->extent_root
, 1);
1346 del_pending_extents(trans
, root
->fs_info
->extent_root
, 1);
1350 int btrfs_lookup_extent_ref(struct btrfs_trans_handle
*trans
,
1351 struct btrfs_root
*root
, u64 bytenr
,
1352 u64 num_bytes
, u32
*refs
)
1354 struct btrfs_path
*path
;
1356 struct btrfs_key key
;
1357 struct extent_buffer
*l
;
1358 struct btrfs_extent_item
*item
;
1360 WARN_ON(num_bytes
< root
->sectorsize
);
1361 path
= btrfs_alloc_path();
1363 key
.objectid
= bytenr
;
1364 key
.offset
= num_bytes
;
1365 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
1366 ret
= btrfs_search_slot(trans
, root
->fs_info
->extent_root
, &key
, path
,
1371 btrfs_print_leaf(root
, path
->nodes
[0]);
1372 printk("failed to find block number %Lu\n", bytenr
);
1376 item
= btrfs_item_ptr(l
, path
->slots
[0], struct btrfs_extent_item
);
1377 *refs
= btrfs_extent_refs(l
, item
);
1379 btrfs_free_path(path
);
1383 int btrfs_cross_ref_exist(struct btrfs_trans_handle
*trans
,
1384 struct btrfs_root
*root
, u64 bytenr
)
1386 struct btrfs_root
*extent_root
= root
->fs_info
->extent_root
;
1387 struct btrfs_path
*path
;
1388 struct extent_buffer
*leaf
;
1389 struct btrfs_extent_ref
*ref_item
;
1390 struct btrfs_key key
;
1391 struct btrfs_key found_key
;
1397 key
.objectid
= bytenr
;
1398 key
.offset
= (u64
)-1;
1399 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
1401 path
= btrfs_alloc_path();
1402 ret
= btrfs_search_slot(NULL
, extent_root
, &key
, path
, 0, 0);
1408 if (path
->slots
[0] == 0)
1412 leaf
= path
->nodes
[0];
1413 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
1415 if (found_key
.objectid
!= bytenr
||
1416 found_key
.type
!= BTRFS_EXTENT_ITEM_KEY
)
1419 last_snapshot
= btrfs_root_last_snapshot(&root
->root_item
);
1421 leaf
= path
->nodes
[0];
1422 nritems
= btrfs_header_nritems(leaf
);
1423 if (path
->slots
[0] >= nritems
) {
1424 ret
= btrfs_next_leaf(extent_root
, path
);
1431 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
1432 if (found_key
.objectid
!= bytenr
)
1435 if (found_key
.type
!= BTRFS_EXTENT_REF_KEY
) {
1440 ref_item
= btrfs_item_ptr(leaf
, path
->slots
[0],
1441 struct btrfs_extent_ref
);
1442 ref_root
= btrfs_ref_root(leaf
, ref_item
);
1443 if (ref_root
!= root
->root_key
.objectid
&&
1444 ref_root
!= BTRFS_TREE_LOG_OBJECTID
) {
1448 if (btrfs_ref_generation(leaf
, ref_item
) <= last_snapshot
) {
1457 btrfs_free_path(path
);
1461 int btrfs_cache_ref(struct btrfs_trans_handle
*trans
, struct btrfs_root
*root
,
1462 struct extent_buffer
*buf
, u32 nr_extents
)
1464 struct btrfs_key key
;
1465 struct btrfs_file_extent_item
*fi
;
1473 if (!root
->ref_cows
)
1476 if (root
->root_key
.objectid
!= BTRFS_TREE_RELOC_OBJECTID
) {
1478 root_gen
= root
->root_key
.offset
;
1481 root_gen
= trans
->transid
- 1;
1484 level
= btrfs_header_level(buf
);
1485 nritems
= btrfs_header_nritems(buf
);
1488 struct btrfs_leaf_ref
*ref
;
1489 struct btrfs_extent_info
*info
;
1491 ref
= btrfs_alloc_leaf_ref(root
, nr_extents
);
1497 ref
->root_gen
= root_gen
;
1498 ref
->bytenr
= buf
->start
;
1499 ref
->owner
= btrfs_header_owner(buf
);
1500 ref
->generation
= btrfs_header_generation(buf
);
1501 ref
->nritems
= nr_extents
;
1502 info
= ref
->extents
;
1504 for (i
= 0; nr_extents
> 0 && i
< nritems
; i
++) {
1506 btrfs_item_key_to_cpu(buf
, &key
, i
);
1507 if (btrfs_key_type(&key
) != BTRFS_EXTENT_DATA_KEY
)
1509 fi
= btrfs_item_ptr(buf
, i
,
1510 struct btrfs_file_extent_item
);
1511 if (btrfs_file_extent_type(buf
, fi
) ==
1512 BTRFS_FILE_EXTENT_INLINE
)
1514 disk_bytenr
= btrfs_file_extent_disk_bytenr(buf
, fi
);
1515 if (disk_bytenr
== 0)
1518 info
->bytenr
= disk_bytenr
;
1520 btrfs_file_extent_disk_num_bytes(buf
, fi
);
1521 info
->objectid
= key
.objectid
;
1522 info
->offset
= key
.offset
;
1526 ret
= btrfs_add_leaf_ref(root
, ref
, shared
);
1527 if (ret
== -EEXIST
&& shared
) {
1528 struct btrfs_leaf_ref
*old
;
1529 old
= btrfs_lookup_leaf_ref(root
, ref
->bytenr
);
1531 btrfs_remove_leaf_ref(root
, old
);
1532 btrfs_free_leaf_ref(root
, old
);
1533 ret
= btrfs_add_leaf_ref(root
, ref
, shared
);
1536 btrfs_free_leaf_ref(root
, ref
);
1542 int btrfs_inc_ref(struct btrfs_trans_handle
*trans
, struct btrfs_root
*root
,
1543 struct extent_buffer
*orig_buf
, struct extent_buffer
*buf
,
1550 u64 orig_generation
;
1552 u32 nr_file_extents
= 0;
1553 struct btrfs_key key
;
1554 struct btrfs_file_extent_item
*fi
;
1559 int (*process_func
)(struct btrfs_trans_handle
*, struct btrfs_root
*,
1560 u64
, u64
, u64
, u64
, u64
, u64
, u64
, u64
);
1562 ref_root
= btrfs_header_owner(buf
);
1563 ref_generation
= btrfs_header_generation(buf
);
1564 orig_root
= btrfs_header_owner(orig_buf
);
1565 orig_generation
= btrfs_header_generation(orig_buf
);
1567 nritems
= btrfs_header_nritems(buf
);
1568 level
= btrfs_header_level(buf
);
1570 if (root
->ref_cows
) {
1571 process_func
= __btrfs_inc_extent_ref
;
1574 root
->root_key
.objectid
!= BTRFS_TREE_LOG_OBJECTID
)
1577 root
->root_key
.objectid
== BTRFS_TREE_LOG_OBJECTID
)
1579 process_func
= __btrfs_update_extent_ref
;
1582 for (i
= 0; i
< nritems
; i
++) {
1585 btrfs_item_key_to_cpu(buf
, &key
, i
);
1586 if (btrfs_key_type(&key
) != BTRFS_EXTENT_DATA_KEY
)
1588 fi
= btrfs_item_ptr(buf
, i
,
1589 struct btrfs_file_extent_item
);
1590 if (btrfs_file_extent_type(buf
, fi
) ==
1591 BTRFS_FILE_EXTENT_INLINE
)
1593 bytenr
= btrfs_file_extent_disk_bytenr(buf
, fi
);
1599 ret
= process_func(trans
, root
, bytenr
,
1600 orig_buf
->start
, buf
->start
,
1601 orig_root
, ref_root
,
1602 orig_generation
, ref_generation
,
1611 bytenr
= btrfs_node_blockptr(buf
, i
);
1612 ret
= process_func(trans
, root
, bytenr
,
1613 orig_buf
->start
, buf
->start
,
1614 orig_root
, ref_root
,
1615 orig_generation
, ref_generation
,
1627 *nr_extents
= nr_file_extents
;
1629 *nr_extents
= nritems
;
1637 int btrfs_update_ref(struct btrfs_trans_handle
*trans
,
1638 struct btrfs_root
*root
, struct extent_buffer
*orig_buf
,
1639 struct extent_buffer
*buf
, int start_slot
, int nr
)
1646 u64 orig_generation
;
1647 struct btrfs_key key
;
1648 struct btrfs_file_extent_item
*fi
;
1654 BUG_ON(start_slot
< 0);
1655 BUG_ON(start_slot
+ nr
> btrfs_header_nritems(buf
));
1657 ref_root
= btrfs_header_owner(buf
);
1658 ref_generation
= btrfs_header_generation(buf
);
1659 orig_root
= btrfs_header_owner(orig_buf
);
1660 orig_generation
= btrfs_header_generation(orig_buf
);
1661 level
= btrfs_header_level(buf
);
1663 if (!root
->ref_cows
) {
1665 root
->root_key
.objectid
!= BTRFS_TREE_LOG_OBJECTID
)
1668 root
->root_key
.objectid
== BTRFS_TREE_LOG_OBJECTID
)
1672 for (i
= 0, slot
= start_slot
; i
< nr
; i
++, slot
++) {
1675 btrfs_item_key_to_cpu(buf
, &key
, slot
);
1676 if (btrfs_key_type(&key
) != BTRFS_EXTENT_DATA_KEY
)
1678 fi
= btrfs_item_ptr(buf
, slot
,
1679 struct btrfs_file_extent_item
);
1680 if (btrfs_file_extent_type(buf
, fi
) ==
1681 BTRFS_FILE_EXTENT_INLINE
)
1683 bytenr
= btrfs_file_extent_disk_bytenr(buf
, fi
);
1686 ret
= __btrfs_update_extent_ref(trans
, root
, bytenr
,
1687 orig_buf
->start
, buf
->start
,
1688 orig_root
, ref_root
,
1689 orig_generation
, ref_generation
,
1694 bytenr
= btrfs_node_blockptr(buf
, slot
);
1695 ret
= __btrfs_update_extent_ref(trans
, root
, bytenr
,
1696 orig_buf
->start
, buf
->start
,
1697 orig_root
, ref_root
,
1698 orig_generation
, ref_generation
,
1710 static int write_one_cache_group(struct btrfs_trans_handle
*trans
,
1711 struct btrfs_root
*root
,
1712 struct btrfs_path
*path
,
1713 struct btrfs_block_group_cache
*cache
)
1717 struct btrfs_root
*extent_root
= root
->fs_info
->extent_root
;
1719 struct extent_buffer
*leaf
;
1721 ret
= btrfs_search_slot(trans
, extent_root
, &cache
->key
, path
, 0, 1);
1726 leaf
= path
->nodes
[0];
1727 bi
= btrfs_item_ptr_offset(leaf
, path
->slots
[0]);
1728 write_extent_buffer(leaf
, &cache
->item
, bi
, sizeof(cache
->item
));
1729 btrfs_mark_buffer_dirty(leaf
);
1730 btrfs_release_path(extent_root
, path
);
1732 finish_current_insert(trans
, extent_root
, 0);
1733 pending_ret
= del_pending_extents(trans
, extent_root
, 0);
1742 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle
*trans
,
1743 struct btrfs_root
*root
)
1745 struct btrfs_block_group_cache
*cache
, *entry
;
1749 struct btrfs_path
*path
;
1752 path
= btrfs_alloc_path();
1758 spin_lock(&root
->fs_info
->block_group_cache_lock
);
1759 for (n
= rb_first(&root
->fs_info
->block_group_cache_tree
);
1760 n
; n
= rb_next(n
)) {
1761 entry
= rb_entry(n
, struct btrfs_block_group_cache
,
1768 spin_unlock(&root
->fs_info
->block_group_cache_lock
);
1774 last
+= cache
->key
.offset
;
1776 err
= write_one_cache_group(trans
, root
,
1779 * if we fail to write the cache group, we want
1780 * to keep it marked dirty in hopes that a later
1788 btrfs_free_path(path
);
1792 static int update_space_info(struct btrfs_fs_info
*info
, u64 flags
,
1793 u64 total_bytes
, u64 bytes_used
,
1794 struct btrfs_space_info
**space_info
)
1796 struct btrfs_space_info
*found
;
1798 found
= __find_space_info(info
, flags
);
1800 spin_lock(&found
->lock
);
1801 found
->total_bytes
+= total_bytes
;
1802 found
->bytes_used
+= bytes_used
;
1804 spin_unlock(&found
->lock
);
1805 *space_info
= found
;
1808 found
= kzalloc(sizeof(*found
), GFP_NOFS
);
1812 list_add(&found
->list
, &info
->space_info
);
1813 INIT_LIST_HEAD(&found
->block_groups
);
1814 init_rwsem(&found
->groups_sem
);
1815 spin_lock_init(&found
->lock
);
1816 found
->flags
= flags
;
1817 found
->total_bytes
= total_bytes
;
1818 found
->bytes_used
= bytes_used
;
1819 found
->bytes_pinned
= 0;
1820 found
->bytes_reserved
= 0;
1821 found
->bytes_readonly
= 0;
1823 found
->force_alloc
= 0;
1824 *space_info
= found
;
1828 static void set_avail_alloc_bits(struct btrfs_fs_info
*fs_info
, u64 flags
)
1830 u64 extra_flags
= flags
& (BTRFS_BLOCK_GROUP_RAID0
|
1831 BTRFS_BLOCK_GROUP_RAID1
|
1832 BTRFS_BLOCK_GROUP_RAID10
|
1833 BTRFS_BLOCK_GROUP_DUP
);
1835 if (flags
& BTRFS_BLOCK_GROUP_DATA
)
1836 fs_info
->avail_data_alloc_bits
|= extra_flags
;
1837 if (flags
& BTRFS_BLOCK_GROUP_METADATA
)
1838 fs_info
->avail_metadata_alloc_bits
|= extra_flags
;
1839 if (flags
& BTRFS_BLOCK_GROUP_SYSTEM
)
1840 fs_info
->avail_system_alloc_bits
|= extra_flags
;
1844 static void set_block_group_readonly(struct btrfs_block_group_cache
*cache
)
1846 spin_lock(&cache
->space_info
->lock
);
1847 spin_lock(&cache
->lock
);
1849 cache
->space_info
->bytes_readonly
+= cache
->key
.offset
-
1850 btrfs_block_group_used(&cache
->item
);
1853 spin_unlock(&cache
->lock
);
1854 spin_unlock(&cache
->space_info
->lock
);
1857 u64
btrfs_reduce_alloc_profile(struct btrfs_root
*root
, u64 flags
)
1859 u64 num_devices
= root
->fs_info
->fs_devices
->rw_devices
;
1861 if (num_devices
== 1)
1862 flags
&= ~(BTRFS_BLOCK_GROUP_RAID1
| BTRFS_BLOCK_GROUP_RAID0
);
1863 if (num_devices
< 4)
1864 flags
&= ~BTRFS_BLOCK_GROUP_RAID10
;
1866 if ((flags
& BTRFS_BLOCK_GROUP_DUP
) &&
1867 (flags
& (BTRFS_BLOCK_GROUP_RAID1
|
1868 BTRFS_BLOCK_GROUP_RAID10
))) {
1869 flags
&= ~BTRFS_BLOCK_GROUP_DUP
;
1872 if ((flags
& BTRFS_BLOCK_GROUP_RAID1
) &&
1873 (flags
& BTRFS_BLOCK_GROUP_RAID10
)) {
1874 flags
&= ~BTRFS_BLOCK_GROUP_RAID1
;
1877 if ((flags
& BTRFS_BLOCK_GROUP_RAID0
) &&
1878 ((flags
& BTRFS_BLOCK_GROUP_RAID1
) |
1879 (flags
& BTRFS_BLOCK_GROUP_RAID10
) |
1880 (flags
& BTRFS_BLOCK_GROUP_DUP
)))
1881 flags
&= ~BTRFS_BLOCK_GROUP_RAID0
;
1885 static int do_chunk_alloc(struct btrfs_trans_handle
*trans
,
1886 struct btrfs_root
*extent_root
, u64 alloc_bytes
,
1887 u64 flags
, int force
)
1889 struct btrfs_space_info
*space_info
;
1893 mutex_lock(&extent_root
->fs_info
->chunk_mutex
);
1895 flags
= btrfs_reduce_alloc_profile(extent_root
, flags
);
1897 space_info
= __find_space_info(extent_root
->fs_info
, flags
);
1899 ret
= update_space_info(extent_root
->fs_info
, flags
,
1903 BUG_ON(!space_info
);
1905 spin_lock(&space_info
->lock
);
1906 if (space_info
->force_alloc
) {
1908 space_info
->force_alloc
= 0;
1910 if (space_info
->full
) {
1911 spin_unlock(&space_info
->lock
);
1915 thresh
= space_info
->total_bytes
- space_info
->bytes_readonly
;
1916 thresh
= div_factor(thresh
, 6);
1918 (space_info
->bytes_used
+ space_info
->bytes_pinned
+
1919 space_info
->bytes_reserved
+ alloc_bytes
) < thresh
) {
1920 spin_unlock(&space_info
->lock
);
1923 spin_unlock(&space_info
->lock
);
1925 ret
= btrfs_alloc_chunk(trans
, extent_root
, flags
);
1927 printk("space info full %Lu\n", flags
);
1928 space_info
->full
= 1;
1931 mutex_unlock(&extent_root
->fs_info
->chunk_mutex
);
1935 static int update_block_group(struct btrfs_trans_handle
*trans
,
1936 struct btrfs_root
*root
,
1937 u64 bytenr
, u64 num_bytes
, int alloc
,
1940 struct btrfs_block_group_cache
*cache
;
1941 struct btrfs_fs_info
*info
= root
->fs_info
;
1942 u64 total
= num_bytes
;
1947 cache
= btrfs_lookup_block_group(info
, bytenr
);
1950 byte_in_group
= bytenr
- cache
->key
.objectid
;
1951 WARN_ON(byte_in_group
> cache
->key
.offset
);
1953 spin_lock(&cache
->space_info
->lock
);
1954 spin_lock(&cache
->lock
);
1956 old_val
= btrfs_block_group_used(&cache
->item
);
1957 num_bytes
= min(total
, cache
->key
.offset
- byte_in_group
);
1959 old_val
+= num_bytes
;
1960 cache
->space_info
->bytes_used
+= num_bytes
;
1962 cache
->space_info
->bytes_readonly
-= num_bytes
;
1965 btrfs_set_block_group_used(&cache
->item
, old_val
);
1966 spin_unlock(&cache
->lock
);
1967 spin_unlock(&cache
->space_info
->lock
);
1969 old_val
-= num_bytes
;
1970 cache
->space_info
->bytes_used
-= num_bytes
;
1972 cache
->space_info
->bytes_readonly
+= num_bytes
;
1973 btrfs_set_block_group_used(&cache
->item
, old_val
);
1974 spin_unlock(&cache
->lock
);
1975 spin_unlock(&cache
->space_info
->lock
);
1978 ret
= btrfs_add_free_space(cache
, bytenr
,
1985 bytenr
+= num_bytes
;
1990 static u64
first_logical_byte(struct btrfs_root
*root
, u64 search_start
)
1992 struct btrfs_block_group_cache
*cache
;
1994 cache
= btrfs_lookup_first_block_group(root
->fs_info
, search_start
);
1998 return cache
->key
.objectid
;
2001 int btrfs_update_pinned_extents(struct btrfs_root
*root
,
2002 u64 bytenr
, u64 num
, int pin
)
2005 struct btrfs_block_group_cache
*cache
;
2006 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
2008 WARN_ON(!mutex_is_locked(&root
->fs_info
->pinned_mutex
));
2010 set_extent_dirty(&fs_info
->pinned_extents
,
2011 bytenr
, bytenr
+ num
- 1, GFP_NOFS
);
2013 clear_extent_dirty(&fs_info
->pinned_extents
,
2014 bytenr
, bytenr
+ num
- 1, GFP_NOFS
);
2017 cache
= btrfs_lookup_block_group(fs_info
, bytenr
);
2019 len
= min(num
, cache
->key
.offset
-
2020 (bytenr
- cache
->key
.objectid
));
2022 spin_lock(&cache
->space_info
->lock
);
2023 spin_lock(&cache
->lock
);
2024 cache
->pinned
+= len
;
2025 cache
->space_info
->bytes_pinned
+= len
;
2026 spin_unlock(&cache
->lock
);
2027 spin_unlock(&cache
->space_info
->lock
);
2028 fs_info
->total_pinned
+= len
;
2030 spin_lock(&cache
->space_info
->lock
);
2031 spin_lock(&cache
->lock
);
2032 cache
->pinned
-= len
;
2033 cache
->space_info
->bytes_pinned
-= len
;
2034 spin_unlock(&cache
->lock
);
2035 spin_unlock(&cache
->space_info
->lock
);
2036 fs_info
->total_pinned
-= len
;
2038 btrfs_add_free_space(cache
, bytenr
, len
);
2046 static int update_reserved_extents(struct btrfs_root
*root
,
2047 u64 bytenr
, u64 num
, int reserve
)
2050 struct btrfs_block_group_cache
*cache
;
2051 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
2054 cache
= btrfs_lookup_block_group(fs_info
, bytenr
);
2056 len
= min(num
, cache
->key
.offset
-
2057 (bytenr
- cache
->key
.objectid
));
2059 spin_lock(&cache
->space_info
->lock
);
2060 spin_lock(&cache
->lock
);
2062 cache
->reserved
+= len
;
2063 cache
->space_info
->bytes_reserved
+= len
;
2065 cache
->reserved
-= len
;
2066 cache
->space_info
->bytes_reserved
-= len
;
2068 spin_unlock(&cache
->lock
);
2069 spin_unlock(&cache
->space_info
->lock
);
2076 int btrfs_copy_pinned(struct btrfs_root
*root
, struct extent_io_tree
*copy
)
2081 struct extent_io_tree
*pinned_extents
= &root
->fs_info
->pinned_extents
;
2084 mutex_lock(&root
->fs_info
->pinned_mutex
);
2086 ret
= find_first_extent_bit(pinned_extents
, last
,
2087 &start
, &end
, EXTENT_DIRTY
);
2090 set_extent_dirty(copy
, start
, end
, GFP_NOFS
);
2093 mutex_unlock(&root
->fs_info
->pinned_mutex
);
2097 int btrfs_finish_extent_commit(struct btrfs_trans_handle
*trans
,
2098 struct btrfs_root
*root
,
2099 struct extent_io_tree
*unpin
)
2105 mutex_lock(&root
->fs_info
->pinned_mutex
);
2107 ret
= find_first_extent_bit(unpin
, 0, &start
, &end
,
2111 btrfs_update_pinned_extents(root
, start
, end
+ 1 - start
, 0);
2112 clear_extent_dirty(unpin
, start
, end
, GFP_NOFS
);
2113 if (need_resched()) {
2114 mutex_unlock(&root
->fs_info
->pinned_mutex
);
2116 mutex_lock(&root
->fs_info
->pinned_mutex
);
2119 mutex_unlock(&root
->fs_info
->pinned_mutex
);
2123 static int finish_current_insert(struct btrfs_trans_handle
*trans
,
2124 struct btrfs_root
*extent_root
, int all
)
2131 struct btrfs_fs_info
*info
= extent_root
->fs_info
;
2132 struct btrfs_path
*path
;
2133 struct pending_extent_op
*extent_op
, *tmp
;
2134 struct list_head insert_list
, update_list
;
2136 int num_inserts
= 0, max_inserts
;
2138 path
= btrfs_alloc_path();
2139 INIT_LIST_HEAD(&insert_list
);
2140 INIT_LIST_HEAD(&update_list
);
2142 max_inserts
= extent_root
->leafsize
/
2143 (2 * sizeof(struct btrfs_key
) + 2 * sizeof(struct btrfs_item
) +
2144 sizeof(struct btrfs_extent_ref
) +
2145 sizeof(struct btrfs_extent_item
));
2147 mutex_lock(&info
->extent_ins_mutex
);
2149 ret
= find_first_extent_bit(&info
->extent_ins
, search
, &start
,
2150 &end
, EXTENT_WRITEBACK
);
2152 if (skipped
&& all
&& !num_inserts
) {
2157 mutex_unlock(&info
->extent_ins_mutex
);
2161 ret
= try_lock_extent(&info
->extent_ins
, start
, end
, GFP_NOFS
);
2165 if (need_resched()) {
2166 mutex_unlock(&info
->extent_ins_mutex
);
2168 mutex_lock(&info
->extent_ins_mutex
);
2173 ret
= get_state_private(&info
->extent_ins
, start
, &priv
);
2175 extent_op
= (struct pending_extent_op
*)(unsigned long) priv
;
2177 if (extent_op
->type
== PENDING_EXTENT_INSERT
) {
2179 list_add_tail(&extent_op
->list
, &insert_list
);
2181 if (num_inserts
== max_inserts
) {
2182 mutex_unlock(&info
->extent_ins_mutex
);
2185 } else if (extent_op
->type
== PENDING_BACKREF_UPDATE
) {
2186 list_add_tail(&extent_op
->list
, &update_list
);
2194 * process the update list, clear the writeback bit for it, and if
2195 * somebody marked this thing for deletion then just unlock it and be
2196 * done, the free_extents will handle it
2198 mutex_lock(&info
->extent_ins_mutex
);
2199 list_for_each_entry_safe(extent_op
, tmp
, &update_list
, list
) {
2200 clear_extent_bits(&info
->extent_ins
, extent_op
->bytenr
,
2201 extent_op
->bytenr
+ extent_op
->num_bytes
- 1,
2202 EXTENT_WRITEBACK
, GFP_NOFS
);
2203 if (extent_op
->del
) {
2204 list_del_init(&extent_op
->list
);
2205 unlock_extent(&info
->extent_ins
, extent_op
->bytenr
,
2206 extent_op
->bytenr
+ extent_op
->num_bytes
2211 mutex_unlock(&info
->extent_ins_mutex
);
2214 * still have things left on the update list, go ahead an update
2217 if (!list_empty(&update_list
)) {
2218 ret
= update_backrefs(trans
, extent_root
, path
, &update_list
);
2223 * if no inserts need to be done, but we skipped some extents and we
2224 * need to make sure everything is cleaned then reset everything and
2225 * go back to the beginning
2227 if (!num_inserts
&& all
&& skipped
) {
2230 INIT_LIST_HEAD(&update_list
);
2231 INIT_LIST_HEAD(&insert_list
);
2233 } else if (!num_inserts
) {
2238 * process the insert extents list. Again if we are deleting this
2239 * extent, then just unlock it, pin down the bytes if need be, and be
2240 * done with it. Saves us from having to actually insert the extent
2241 * into the tree and then subsequently come along and delete it
2243 mutex_lock(&info
->extent_ins_mutex
);
2244 list_for_each_entry_safe(extent_op
, tmp
, &insert_list
, list
) {
2245 clear_extent_bits(&info
->extent_ins
, extent_op
->bytenr
,
2246 extent_op
->bytenr
+ extent_op
->num_bytes
- 1,
2247 EXTENT_WRITEBACK
, GFP_NOFS
);
2248 if (extent_op
->del
) {
2249 list_del_init(&extent_op
->list
);
2250 unlock_extent(&info
->extent_ins
, extent_op
->bytenr
,
2251 extent_op
->bytenr
+ extent_op
->num_bytes
2254 mutex_lock(&extent_root
->fs_info
->pinned_mutex
);
2255 ret
= pin_down_bytes(trans
, extent_root
,
2257 extent_op
->num_bytes
, 0);
2258 mutex_unlock(&extent_root
->fs_info
->pinned_mutex
);
2260 ret
= update_block_group(trans
, extent_root
,
2262 extent_op
->num_bytes
,
2269 mutex_unlock(&info
->extent_ins_mutex
);
2271 ret
= insert_extents(trans
, extent_root
, path
, &insert_list
,
2276 * if we broke out of the loop in order to insert stuff because we hit
2277 * the maximum number of inserts at a time we can handle, then loop
2278 * back and pick up where we left off
2280 if (num_inserts
== max_inserts
) {
2281 INIT_LIST_HEAD(&insert_list
);
2282 INIT_LIST_HEAD(&update_list
);
2288 * again, if we need to make absolutely sure there are no more pending
2289 * extent operations left and we know that we skipped some, go back to
2290 * the beginning and do it all again
2292 if (all
&& skipped
) {
2293 INIT_LIST_HEAD(&insert_list
);
2294 INIT_LIST_HEAD(&update_list
);
2301 btrfs_free_path(path
);
2305 static int pin_down_bytes(struct btrfs_trans_handle
*trans
,
2306 struct btrfs_root
*root
,
2307 u64 bytenr
, u64 num_bytes
, int is_data
)
2310 struct extent_buffer
*buf
;
2315 buf
= btrfs_find_tree_block(root
, bytenr
, num_bytes
);
2319 /* we can reuse a block if it hasn't been written
2320 * and it is from this transaction. We can't
2321 * reuse anything from the tree log root because
2322 * it has tiny sub-transactions.
2324 if (btrfs_buffer_uptodate(buf
, 0) &&
2325 btrfs_try_tree_lock(buf
)) {
2326 u64 header_owner
= btrfs_header_owner(buf
);
2327 u64 header_transid
= btrfs_header_generation(buf
);
2328 if (header_owner
!= BTRFS_TREE_LOG_OBJECTID
&&
2329 header_owner
!= BTRFS_TREE_RELOC_OBJECTID
&&
2330 header_transid
== trans
->transid
&&
2331 !btrfs_header_flag(buf
, BTRFS_HEADER_FLAG_WRITTEN
)) {
2332 clean_tree_block(NULL
, root
, buf
);
2333 btrfs_tree_unlock(buf
);
2334 free_extent_buffer(buf
);
2337 btrfs_tree_unlock(buf
);
2339 free_extent_buffer(buf
);
2341 btrfs_update_pinned_extents(root
, bytenr
, num_bytes
, 1);
2348 * remove an extent from the root, returns 0 on success
2350 static int __free_extent(struct btrfs_trans_handle
*trans
,
2351 struct btrfs_root
*root
,
2352 u64 bytenr
, u64 num_bytes
, u64 parent
,
2353 u64 root_objectid
, u64 ref_generation
,
2354 u64 owner_objectid
, int pin
, int mark_free
)
2356 struct btrfs_path
*path
;
2357 struct btrfs_key key
;
2358 struct btrfs_fs_info
*info
= root
->fs_info
;
2359 struct btrfs_root
*extent_root
= info
->extent_root
;
2360 struct extent_buffer
*leaf
;
2362 int extent_slot
= 0;
2363 int found_extent
= 0;
2365 struct btrfs_extent_item
*ei
;
2368 key
.objectid
= bytenr
;
2369 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
2370 key
.offset
= num_bytes
;
2371 path
= btrfs_alloc_path();
2376 ret
= lookup_extent_backref(trans
, extent_root
, path
,
2377 bytenr
, parent
, root_objectid
,
2378 ref_generation
, owner_objectid
, 1);
2380 struct btrfs_key found_key
;
2381 extent_slot
= path
->slots
[0];
2382 while(extent_slot
> 0) {
2384 btrfs_item_key_to_cpu(path
->nodes
[0], &found_key
,
2386 if (found_key
.objectid
!= bytenr
)
2388 if (found_key
.type
== BTRFS_EXTENT_ITEM_KEY
&&
2389 found_key
.offset
== num_bytes
) {
2393 if (path
->slots
[0] - extent_slot
> 5)
2396 if (!found_extent
) {
2397 ret
= remove_extent_backref(trans
, extent_root
, path
);
2399 btrfs_release_path(extent_root
, path
);
2400 ret
= btrfs_search_slot(trans
, extent_root
,
2403 printk(KERN_ERR
"umm, got %d back from search"
2404 ", was looking for %Lu\n", ret
,
2406 btrfs_print_leaf(extent_root
, path
->nodes
[0]);
2409 extent_slot
= path
->slots
[0];
2412 btrfs_print_leaf(extent_root
, path
->nodes
[0]);
2414 printk("Unable to find ref byte nr %Lu root %Lu "
2415 "gen %Lu owner %Lu\n", bytenr
,
2416 root_objectid
, ref_generation
, owner_objectid
);
2419 leaf
= path
->nodes
[0];
2420 ei
= btrfs_item_ptr(leaf
, extent_slot
,
2421 struct btrfs_extent_item
);
2422 refs
= btrfs_extent_refs(leaf
, ei
);
2425 btrfs_set_extent_refs(leaf
, ei
, refs
);
2427 btrfs_mark_buffer_dirty(leaf
);
2429 if (refs
== 0 && found_extent
&& path
->slots
[0] == extent_slot
+ 1) {
2430 struct btrfs_extent_ref
*ref
;
2431 ref
= btrfs_item_ptr(leaf
, path
->slots
[0],
2432 struct btrfs_extent_ref
);
2433 BUG_ON(btrfs_ref_num_refs(leaf
, ref
) != 1);
2434 /* if the back ref and the extent are next to each other
2435 * they get deleted below in one shot
2437 path
->slots
[0] = extent_slot
;
2439 } else if (found_extent
) {
2440 /* otherwise delete the extent back ref */
2441 ret
= remove_extent_backref(trans
, extent_root
, path
);
2443 /* if refs are 0, we need to setup the path for deletion */
2445 btrfs_release_path(extent_root
, path
);
2446 ret
= btrfs_search_slot(trans
, extent_root
, &key
, path
,
2455 #ifdef BIO_RW_DISCARD
2456 u64 map_length
= num_bytes
;
2457 struct btrfs_multi_bio
*multi
= NULL
;
2461 mutex_lock(&root
->fs_info
->pinned_mutex
);
2462 ret
= pin_down_bytes(trans
, root
, bytenr
, num_bytes
,
2463 owner_objectid
>= BTRFS_FIRST_FREE_OBJECTID
);
2464 mutex_unlock(&root
->fs_info
->pinned_mutex
);
2470 /* block accounting for super block */
2471 spin_lock_irq(&info
->delalloc_lock
);
2472 super_used
= btrfs_super_bytes_used(&info
->super_copy
);
2473 btrfs_set_super_bytes_used(&info
->super_copy
,
2474 super_used
- num_bytes
);
2475 spin_unlock_irq(&info
->delalloc_lock
);
2477 /* block accounting for root item */
2478 root_used
= btrfs_root_used(&root
->root_item
);
2479 btrfs_set_root_used(&root
->root_item
,
2480 root_used
- num_bytes
);
2481 ret
= btrfs_del_items(trans
, extent_root
, path
, path
->slots
[0],
2484 btrfs_release_path(extent_root
, path
);
2485 ret
= update_block_group(trans
, root
, bytenr
, num_bytes
, 0,
2489 #ifdef BIO_RW_DISCARD
2490 /* Tell the block device(s) that the sectors can be discarded */
2491 ret
= btrfs_map_block(&root
->fs_info
->mapping_tree
, READ
,
2492 bytenr
, &map_length
, &multi
, 0);
2494 struct btrfs_bio_stripe
*stripe
= multi
->stripes
;
2497 if (map_length
> num_bytes
)
2498 map_length
= num_bytes
;
2500 for (i
= 0; i
< multi
->num_stripes
; i
++, stripe
++) {
2501 blkdev_issue_discard(stripe
->dev
->bdev
,
2502 stripe
->physical
>> 9,
2509 btrfs_free_path(path
);
2510 finish_current_insert(trans
, extent_root
, 0);
2515 * find all the blocks marked as pending in the radix tree and remove
2516 * them from the extent map
2518 static int del_pending_extents(struct btrfs_trans_handle
*trans
, struct
2519 btrfs_root
*extent_root
, int all
)
2527 int nr
= 0, skipped
= 0;
2528 struct extent_io_tree
*pending_del
;
2529 struct extent_io_tree
*extent_ins
;
2530 struct pending_extent_op
*extent_op
;
2531 struct btrfs_fs_info
*info
= extent_root
->fs_info
;
2532 struct list_head delete_list
;
2534 INIT_LIST_HEAD(&delete_list
);
2535 extent_ins
= &extent_root
->fs_info
->extent_ins
;
2536 pending_del
= &extent_root
->fs_info
->pending_del
;
2539 mutex_lock(&info
->extent_ins_mutex
);
2541 ret
= find_first_extent_bit(pending_del
, search
, &start
, &end
,
2544 if (all
&& skipped
&& !nr
) {
2548 mutex_unlock(&info
->extent_ins_mutex
);
2552 ret
= try_lock_extent(extent_ins
, start
, end
, GFP_NOFS
);
2557 if (need_resched()) {
2558 mutex_unlock(&info
->extent_ins_mutex
);
2560 mutex_lock(&info
->extent_ins_mutex
);
2567 ret
= get_state_private(pending_del
, start
, &priv
);
2569 extent_op
= (struct pending_extent_op
*)(unsigned long)priv
;
2571 clear_extent_bits(pending_del
, start
, end
, EXTENT_WRITEBACK
,
2573 if (!test_range_bit(extent_ins
, start
, end
,
2574 EXTENT_WRITEBACK
, 0)) {
2575 list_add_tail(&extent_op
->list
, &delete_list
);
2580 ret
= get_state_private(&info
->extent_ins
, start
,
2583 extent_op
= (struct pending_extent_op
*)
2584 (unsigned long)priv
;
2586 clear_extent_bits(&info
->extent_ins
, start
, end
,
2587 EXTENT_WRITEBACK
, GFP_NOFS
);
2589 if (extent_op
->type
== PENDING_BACKREF_UPDATE
) {
2590 list_add_tail(&extent_op
->list
, &delete_list
);
2596 mutex_lock(&extent_root
->fs_info
->pinned_mutex
);
2597 ret
= pin_down_bytes(trans
, extent_root
, start
,
2598 end
+ 1 - start
, 0);
2599 mutex_unlock(&extent_root
->fs_info
->pinned_mutex
);
2601 ret
= update_block_group(trans
, extent_root
, start
,
2602 end
+ 1 - start
, 0, ret
> 0);
2604 unlock_extent(extent_ins
, start
, end
, GFP_NOFS
);
2613 if (need_resched()) {
2614 mutex_unlock(&info
->extent_ins_mutex
);
2616 mutex_lock(&info
->extent_ins_mutex
);
2621 ret
= free_extents(trans
, extent_root
, &delete_list
);
2625 if (all
&& skipped
) {
2626 INIT_LIST_HEAD(&delete_list
);
2636 * remove an extent from the root, returns 0 on success
2638 static int __btrfs_free_extent(struct btrfs_trans_handle
*trans
,
2639 struct btrfs_root
*root
,
2640 u64 bytenr
, u64 num_bytes
, u64 parent
,
2641 u64 root_objectid
, u64 ref_generation
,
2642 u64 owner_objectid
, int pin
)
2644 struct btrfs_root
*extent_root
= root
->fs_info
->extent_root
;
2648 WARN_ON(num_bytes
< root
->sectorsize
);
2649 if (root
== extent_root
) {
2650 struct pending_extent_op
*extent_op
= NULL
;
2652 mutex_lock(&root
->fs_info
->extent_ins_mutex
);
2653 if (test_range_bit(&root
->fs_info
->extent_ins
, bytenr
,
2654 bytenr
+ num_bytes
- 1, EXTENT_WRITEBACK
, 0)) {
2656 ret
= get_state_private(&root
->fs_info
->extent_ins
,
2659 extent_op
= (struct pending_extent_op
*)
2660 (unsigned long)priv
;
2663 if (extent_op
->type
== PENDING_EXTENT_INSERT
) {
2664 mutex_unlock(&root
->fs_info
->extent_ins_mutex
);
2670 ref_generation
= extent_op
->orig_generation
;
2671 parent
= extent_op
->orig_parent
;
2674 extent_op
= kmalloc(sizeof(*extent_op
), GFP_NOFS
);
2677 extent_op
->type
= PENDING_EXTENT_DELETE
;
2678 extent_op
->bytenr
= bytenr
;
2679 extent_op
->num_bytes
= num_bytes
;
2680 extent_op
->parent
= parent
;
2681 extent_op
->orig_parent
= parent
;
2682 extent_op
->generation
= ref_generation
;
2683 extent_op
->orig_generation
= ref_generation
;
2684 extent_op
->level
= (int)owner_objectid
;
2685 INIT_LIST_HEAD(&extent_op
->list
);
2688 set_extent_bits(&root
->fs_info
->pending_del
,
2689 bytenr
, bytenr
+ num_bytes
- 1,
2690 EXTENT_WRITEBACK
, GFP_NOFS
);
2691 set_state_private(&root
->fs_info
->pending_del
,
2692 bytenr
, (unsigned long)extent_op
);
2693 mutex_unlock(&root
->fs_info
->extent_ins_mutex
);
2696 /* if metadata always pin */
2697 if (owner_objectid
< BTRFS_FIRST_FREE_OBJECTID
) {
2698 if (root
->root_key
.objectid
== BTRFS_TREE_LOG_OBJECTID
) {
2699 struct btrfs_block_group_cache
*cache
;
2701 /* btrfs_free_reserved_extent */
2702 cache
= btrfs_lookup_block_group(root
->fs_info
, bytenr
);
2704 btrfs_add_free_space(cache
, bytenr
, num_bytes
);
2705 update_reserved_extents(root
, bytenr
, num_bytes
, 0);
2711 /* if data pin when any transaction has committed this */
2712 if (ref_generation
!= trans
->transid
)
2715 ret
= __free_extent(trans
, root
, bytenr
, num_bytes
, parent
,
2716 root_objectid
, ref_generation
,
2717 owner_objectid
, pin
, pin
== 0);
2719 finish_current_insert(trans
, root
->fs_info
->extent_root
, 0);
2720 pending_ret
= del_pending_extents(trans
, root
->fs_info
->extent_root
, 0);
2721 return ret
? ret
: pending_ret
;
2724 int btrfs_free_extent(struct btrfs_trans_handle
*trans
,
2725 struct btrfs_root
*root
,
2726 u64 bytenr
, u64 num_bytes
, u64 parent
,
2727 u64 root_objectid
, u64 ref_generation
,
2728 u64 owner_objectid
, int pin
)
2732 ret
= __btrfs_free_extent(trans
, root
, bytenr
, num_bytes
, parent
,
2733 root_objectid
, ref_generation
,
2734 owner_objectid
, pin
);
2738 static u64
stripe_align(struct btrfs_root
*root
, u64 val
)
2740 u64 mask
= ((u64
)root
->stripesize
- 1);
2741 u64 ret
= (val
+ mask
) & ~mask
;
2746 * walks the btree of allocated extents and find a hole of a given size.
2747 * The key ins is changed to record the hole:
2748 * ins->objectid == block start
2749 * ins->flags = BTRFS_EXTENT_ITEM_KEY
2750 * ins->offset == number of blocks
2751 * Any available blocks before search_start are skipped.
2753 static int noinline
find_free_extent(struct btrfs_trans_handle
*trans
,
2754 struct btrfs_root
*orig_root
,
2755 u64 num_bytes
, u64 empty_size
,
2756 u64 search_start
, u64 search_end
,
2757 u64 hint_byte
, struct btrfs_key
*ins
,
2758 u64 exclude_start
, u64 exclude_nr
,
2762 struct btrfs_root
* root
= orig_root
->fs_info
->extent_root
;
2763 u64 total_needed
= num_bytes
;
2764 u64
*last_ptr
= NULL
;
2765 u64 last_wanted
= 0;
2766 struct btrfs_block_group_cache
*block_group
= NULL
;
2767 int chunk_alloc_done
= 0;
2768 int empty_cluster
= 2 * 1024 * 1024;
2769 int allowed_chunk_alloc
= 0;
2770 struct list_head
*head
= NULL
, *cur
= NULL
;
2773 struct btrfs_space_info
*space_info
;
2775 WARN_ON(num_bytes
< root
->sectorsize
);
2776 btrfs_set_key_type(ins
, BTRFS_EXTENT_ITEM_KEY
);
2780 if (orig_root
->ref_cows
|| empty_size
)
2781 allowed_chunk_alloc
= 1;
2783 if (data
& BTRFS_BLOCK_GROUP_METADATA
) {
2784 last_ptr
= &root
->fs_info
->last_alloc
;
2785 empty_cluster
= 64 * 1024;
2788 if ((data
& BTRFS_BLOCK_GROUP_DATA
) && btrfs_test_opt(root
, SSD
))
2789 last_ptr
= &root
->fs_info
->last_data_alloc
;
2793 hint_byte
= *last_ptr
;
2794 last_wanted
= *last_ptr
;
2796 empty_size
+= empty_cluster
;
2800 search_start
= max(search_start
, first_logical_byte(root
, 0));
2801 search_start
= max(search_start
, hint_byte
);
2803 if (last_wanted
&& search_start
!= last_wanted
) {
2805 empty_size
+= empty_cluster
;
2808 total_needed
+= empty_size
;
2809 block_group
= btrfs_lookup_block_group(root
->fs_info
, search_start
);
2811 block_group
= btrfs_lookup_first_block_group(root
->fs_info
,
2813 space_info
= __find_space_info(root
->fs_info
, data
);
2815 down_read(&space_info
->groups_sem
);
2817 struct btrfs_free_space
*free_space
;
2819 * the only way this happens if our hint points to a block
2820 * group thats not of the proper type, while looping this
2821 * should never happen
2827 goto new_group_no_lock
;
2829 mutex_lock(&block_group
->alloc_mutex
);
2830 if (unlikely(!block_group_bits(block_group
, data
)))
2833 ret
= cache_block_group(root
, block_group
);
2835 mutex_unlock(&block_group
->alloc_mutex
);
2839 if (block_group
->ro
)
2842 free_space
= btrfs_find_free_space(block_group
, search_start
,
2845 u64 start
= block_group
->key
.objectid
;
2846 u64 end
= block_group
->key
.objectid
+
2847 block_group
->key
.offset
;
2849 search_start
= stripe_align(root
, free_space
->offset
);
2851 /* move on to the next group */
2852 if (search_start
+ num_bytes
>= search_end
)
2855 /* move on to the next group */
2856 if (search_start
+ num_bytes
> end
)
2859 if (last_wanted
&& search_start
!= last_wanted
) {
2860 total_needed
+= empty_cluster
;
2861 empty_size
+= empty_cluster
;
2864 * if search_start is still in this block group
2865 * then we just re-search this block group
2867 if (search_start
>= start
&&
2868 search_start
< end
) {
2869 mutex_unlock(&block_group
->alloc_mutex
);
2873 /* else we go to the next block group */
2877 if (exclude_nr
> 0 &&
2878 (search_start
+ num_bytes
> exclude_start
&&
2879 search_start
< exclude_start
+ exclude_nr
)) {
2880 search_start
= exclude_start
+ exclude_nr
;
2882 * if search_start is still in this block group
2883 * then we just re-search this block group
2885 if (search_start
>= start
&&
2886 search_start
< end
) {
2887 mutex_unlock(&block_group
->alloc_mutex
);
2892 /* else we go to the next block group */
2896 ins
->objectid
= search_start
;
2897 ins
->offset
= num_bytes
;
2899 btrfs_remove_free_space_lock(block_group
, search_start
,
2901 /* we are all good, lets return */
2902 mutex_unlock(&block_group
->alloc_mutex
);
2906 mutex_unlock(&block_group
->alloc_mutex
);
2908 /* don't try to compare new allocations against the
2909 * last allocation any more
2914 * Here's how this works.
2915 * loop == 0: we were searching a block group via a hint
2916 * and didn't find anything, so we start at
2917 * the head of the block groups and keep searching
2918 * loop == 1: we're searching through all of the block groups
2919 * if we hit the head again we have searched
2920 * all of the block groups for this space and we
2921 * need to try and allocate, if we cant error out.
2922 * loop == 2: we allocated more space and are looping through
2923 * all of the block groups again.
2926 head
= &space_info
->block_groups
;
2929 } else if (loop
== 1 && cur
== head
) {
2932 /* at this point we give up on the empty_size
2933 * allocations and just try to allocate the min
2936 * The extra_loop field was set if an empty_size
2937 * allocation was attempted above, and if this
2938 * is try we need to try the loop again without
2939 * the additional empty_size.
2941 total_needed
-= empty_size
;
2943 keep_going
= extra_loop
;
2946 if (allowed_chunk_alloc
&& !chunk_alloc_done
) {
2947 up_read(&space_info
->groups_sem
);
2948 ret
= do_chunk_alloc(trans
, root
, num_bytes
+
2949 2 * 1024 * 1024, data
, 1);
2950 down_read(&space_info
->groups_sem
);
2953 head
= &space_info
->block_groups
;
2955 * we've allocated a new chunk, keep
2959 chunk_alloc_done
= 1;
2960 } else if (!allowed_chunk_alloc
) {
2961 space_info
->force_alloc
= 1;
2970 } else if (cur
== head
) {
2974 block_group
= list_entry(cur
, struct btrfs_block_group_cache
,
2976 search_start
= block_group
->key
.objectid
;
2980 /* we found what we needed */
2981 if (ins
->objectid
) {
2982 if (!(data
& BTRFS_BLOCK_GROUP_DATA
))
2983 trans
->block_group
= block_group
;
2986 *last_ptr
= ins
->objectid
+ ins
->offset
;
2989 printk(KERN_ERR
"we were searching for %Lu bytes, num_bytes %Lu,"
2990 " loop %d, allowed_alloc %d\n", total_needed
, num_bytes
,
2991 loop
, allowed_chunk_alloc
);
2995 up_read(&space_info
->groups_sem
);
2999 static void dump_space_info(struct btrfs_space_info
*info
, u64 bytes
)
3001 struct btrfs_block_group_cache
*cache
;
3002 struct list_head
*l
;
3004 printk(KERN_INFO
"space_info has %Lu free, is %sfull\n",
3005 info
->total_bytes
- info
->bytes_used
- info
->bytes_pinned
-
3006 info
->bytes_reserved
, (info
->full
) ? "" : "not ");
3008 down_read(&info
->groups_sem
);
3009 list_for_each(l
, &info
->block_groups
) {
3010 cache
= list_entry(l
, struct btrfs_block_group_cache
, list
);
3011 spin_lock(&cache
->lock
);
3012 printk(KERN_INFO
"block group %Lu has %Lu bytes, %Lu used "
3013 "%Lu pinned %Lu reserved\n",
3014 cache
->key
.objectid
, cache
->key
.offset
,
3015 btrfs_block_group_used(&cache
->item
),
3016 cache
->pinned
, cache
->reserved
);
3017 btrfs_dump_free_space(cache
, bytes
);
3018 spin_unlock(&cache
->lock
);
3020 up_read(&info
->groups_sem
);
3023 static int __btrfs_reserve_extent(struct btrfs_trans_handle
*trans
,
3024 struct btrfs_root
*root
,
3025 u64 num_bytes
, u64 min_alloc_size
,
3026 u64 empty_size
, u64 hint_byte
,
3027 u64 search_end
, struct btrfs_key
*ins
,
3031 u64 search_start
= 0;
3033 struct btrfs_fs_info
*info
= root
->fs_info
;
3036 alloc_profile
= info
->avail_data_alloc_bits
&
3037 info
->data_alloc_profile
;
3038 data
= BTRFS_BLOCK_GROUP_DATA
| alloc_profile
;
3039 } else if (root
== root
->fs_info
->chunk_root
) {
3040 alloc_profile
= info
->avail_system_alloc_bits
&
3041 info
->system_alloc_profile
;
3042 data
= BTRFS_BLOCK_GROUP_SYSTEM
| alloc_profile
;
3044 alloc_profile
= info
->avail_metadata_alloc_bits
&
3045 info
->metadata_alloc_profile
;
3046 data
= BTRFS_BLOCK_GROUP_METADATA
| alloc_profile
;
3049 data
= btrfs_reduce_alloc_profile(root
, data
);
3051 * the only place that sets empty_size is btrfs_realloc_node, which
3052 * is not called recursively on allocations
3054 if (empty_size
|| root
->ref_cows
) {
3055 if (!(data
& BTRFS_BLOCK_GROUP_METADATA
)) {
3056 ret
= do_chunk_alloc(trans
, root
->fs_info
->extent_root
,
3058 BTRFS_BLOCK_GROUP_METADATA
|
3059 (info
->metadata_alloc_profile
&
3060 info
->avail_metadata_alloc_bits
), 0);
3062 ret
= do_chunk_alloc(trans
, root
->fs_info
->extent_root
,
3063 num_bytes
+ 2 * 1024 * 1024, data
, 0);
3066 WARN_ON(num_bytes
< root
->sectorsize
);
3067 ret
= find_free_extent(trans
, root
, num_bytes
, empty_size
,
3068 search_start
, search_end
, hint_byte
, ins
,
3069 trans
->alloc_exclude_start
,
3070 trans
->alloc_exclude_nr
, data
);
3072 if (ret
== -ENOSPC
&& num_bytes
> min_alloc_size
) {
3073 num_bytes
= num_bytes
>> 1;
3074 num_bytes
= num_bytes
& ~(root
->sectorsize
- 1);
3075 num_bytes
= max(num_bytes
, min_alloc_size
);
3076 do_chunk_alloc(trans
, root
->fs_info
->extent_root
,
3077 num_bytes
, data
, 1);
3081 struct btrfs_space_info
*sinfo
;
3083 sinfo
= __find_space_info(root
->fs_info
, data
);
3084 printk("allocation failed flags %Lu, wanted %Lu\n",
3086 dump_space_info(sinfo
, num_bytes
);
3093 int btrfs_free_reserved_extent(struct btrfs_root
*root
, u64 start
, u64 len
)
3095 struct btrfs_block_group_cache
*cache
;
3097 cache
= btrfs_lookup_block_group(root
->fs_info
, start
);
3099 printk(KERN_ERR
"Unable to find block group for %Lu\n", start
);
3102 btrfs_add_free_space(cache
, start
, len
);
3103 update_reserved_extents(root
, start
, len
, 0);
3107 int btrfs_reserve_extent(struct btrfs_trans_handle
*trans
,
3108 struct btrfs_root
*root
,
3109 u64 num_bytes
, u64 min_alloc_size
,
3110 u64 empty_size
, u64 hint_byte
,
3111 u64 search_end
, struct btrfs_key
*ins
,
3115 ret
= __btrfs_reserve_extent(trans
, root
, num_bytes
, min_alloc_size
,
3116 empty_size
, hint_byte
, search_end
, ins
,
3118 update_reserved_extents(root
, ins
->objectid
, ins
->offset
, 1);
3122 static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle
*trans
,
3123 struct btrfs_root
*root
, u64 parent
,
3124 u64 root_objectid
, u64 ref_generation
,
3125 u64 owner
, struct btrfs_key
*ins
)
3131 u64 num_bytes
= ins
->offset
;
3133 struct btrfs_fs_info
*info
= root
->fs_info
;
3134 struct btrfs_root
*extent_root
= info
->extent_root
;
3135 struct btrfs_extent_item
*extent_item
;
3136 struct btrfs_extent_ref
*ref
;
3137 struct btrfs_path
*path
;
3138 struct btrfs_key keys
[2];
3141 parent
= ins
->objectid
;
3143 /* block accounting for super block */
3144 spin_lock_irq(&info
->delalloc_lock
);
3145 super_used
= btrfs_super_bytes_used(&info
->super_copy
);
3146 btrfs_set_super_bytes_used(&info
->super_copy
, super_used
+ num_bytes
);
3147 spin_unlock_irq(&info
->delalloc_lock
);
3149 /* block accounting for root item */
3150 root_used
= btrfs_root_used(&root
->root_item
);
3151 btrfs_set_root_used(&root
->root_item
, root_used
+ num_bytes
);
3153 if (root
== extent_root
) {
3154 struct pending_extent_op
*extent_op
;
3156 extent_op
= kmalloc(sizeof(*extent_op
), GFP_NOFS
);
3159 extent_op
->type
= PENDING_EXTENT_INSERT
;
3160 extent_op
->bytenr
= ins
->objectid
;
3161 extent_op
->num_bytes
= ins
->offset
;
3162 extent_op
->parent
= parent
;
3163 extent_op
->orig_parent
= 0;
3164 extent_op
->generation
= ref_generation
;
3165 extent_op
->orig_generation
= 0;
3166 extent_op
->level
= (int)owner
;
3167 INIT_LIST_HEAD(&extent_op
->list
);
3170 mutex_lock(&root
->fs_info
->extent_ins_mutex
);
3171 set_extent_bits(&root
->fs_info
->extent_ins
, ins
->objectid
,
3172 ins
->objectid
+ ins
->offset
- 1,
3173 EXTENT_WRITEBACK
, GFP_NOFS
);
3174 set_state_private(&root
->fs_info
->extent_ins
,
3175 ins
->objectid
, (unsigned long)extent_op
);
3176 mutex_unlock(&root
->fs_info
->extent_ins_mutex
);
3180 memcpy(&keys
[0], ins
, sizeof(*ins
));
3181 keys
[1].objectid
= ins
->objectid
;
3182 keys
[1].type
= BTRFS_EXTENT_REF_KEY
;
3183 keys
[1].offset
= parent
;
3184 sizes
[0] = sizeof(*extent_item
);
3185 sizes
[1] = sizeof(*ref
);
3187 path
= btrfs_alloc_path();
3190 ret
= btrfs_insert_empty_items(trans
, extent_root
, path
, keys
,
3194 extent_item
= btrfs_item_ptr(path
->nodes
[0], path
->slots
[0],
3195 struct btrfs_extent_item
);
3196 btrfs_set_extent_refs(path
->nodes
[0], extent_item
, 1);
3197 ref
= btrfs_item_ptr(path
->nodes
[0], path
->slots
[0] + 1,
3198 struct btrfs_extent_ref
);
3200 btrfs_set_ref_root(path
->nodes
[0], ref
, root_objectid
);
3201 btrfs_set_ref_generation(path
->nodes
[0], ref
, ref_generation
);
3202 btrfs_set_ref_objectid(path
->nodes
[0], ref
, owner
);
3203 btrfs_set_ref_num_refs(path
->nodes
[0], ref
, 1);
3205 btrfs_mark_buffer_dirty(path
->nodes
[0]);
3207 trans
->alloc_exclude_start
= 0;
3208 trans
->alloc_exclude_nr
= 0;
3209 btrfs_free_path(path
);
3210 finish_current_insert(trans
, extent_root
, 0);
3211 pending_ret
= del_pending_extents(trans
, extent_root
, 0);
3221 ret
= update_block_group(trans
, root
, ins
->objectid
, ins
->offset
, 1, 0);
3223 printk("update block group failed for %Lu %Lu\n",
3224 ins
->objectid
, ins
->offset
);
3231 int btrfs_alloc_reserved_extent(struct btrfs_trans_handle
*trans
,
3232 struct btrfs_root
*root
, u64 parent
,
3233 u64 root_objectid
, u64 ref_generation
,
3234 u64 owner
, struct btrfs_key
*ins
)
3238 if (root_objectid
== BTRFS_TREE_LOG_OBJECTID
)
3240 ret
= __btrfs_alloc_reserved_extent(trans
, root
, parent
, root_objectid
,
3241 ref_generation
, owner
, ins
);
3242 update_reserved_extents(root
, ins
->objectid
, ins
->offset
, 0);
3247 * this is used by the tree logging recovery code. It records that
3248 * an extent has been allocated and makes sure to clear the free
3249 * space cache bits as well
3251 int btrfs_alloc_logged_extent(struct btrfs_trans_handle
*trans
,
3252 struct btrfs_root
*root
, u64 parent
,
3253 u64 root_objectid
, u64 ref_generation
,
3254 u64 owner
, struct btrfs_key
*ins
)
3257 struct btrfs_block_group_cache
*block_group
;
3259 block_group
= btrfs_lookup_block_group(root
->fs_info
, ins
->objectid
);
3260 mutex_lock(&block_group
->alloc_mutex
);
3261 cache_block_group(root
, block_group
);
3263 ret
= btrfs_remove_free_space_lock(block_group
, ins
->objectid
,
3265 mutex_unlock(&block_group
->alloc_mutex
);
3267 ret
= __btrfs_alloc_reserved_extent(trans
, root
, parent
, root_objectid
,
3268 ref_generation
, owner
, ins
);
3273 * finds a free extent and does all the dirty work required for allocation
3274 * returns the key for the extent through ins, and a tree buffer for
3275 * the first block of the extent through buf.
3277 * returns 0 if everything worked, non-zero otherwise.
3279 int btrfs_alloc_extent(struct btrfs_trans_handle
*trans
,
3280 struct btrfs_root
*root
,
3281 u64 num_bytes
, u64 parent
, u64 min_alloc_size
,
3282 u64 root_objectid
, u64 ref_generation
,
3283 u64 owner_objectid
, u64 empty_size
, u64 hint_byte
,
3284 u64 search_end
, struct btrfs_key
*ins
, u64 data
)
3288 ret
= __btrfs_reserve_extent(trans
, root
, num_bytes
,
3289 min_alloc_size
, empty_size
, hint_byte
,
3290 search_end
, ins
, data
);
3292 if (root_objectid
!= BTRFS_TREE_LOG_OBJECTID
) {
3293 ret
= __btrfs_alloc_reserved_extent(trans
, root
, parent
,
3294 root_objectid
, ref_generation
,
3295 owner_objectid
, ins
);
3299 update_reserved_extents(root
, ins
->objectid
, ins
->offset
, 1);
3304 struct extent_buffer
*btrfs_init_new_buffer(struct btrfs_trans_handle
*trans
,
3305 struct btrfs_root
*root
,
3306 u64 bytenr
, u32 blocksize
)
3308 struct extent_buffer
*buf
;
3310 buf
= btrfs_find_create_tree_block(root
, bytenr
, blocksize
);
3312 return ERR_PTR(-ENOMEM
);
3313 btrfs_set_header_generation(buf
, trans
->transid
);
3314 btrfs_tree_lock(buf
);
3315 clean_tree_block(trans
, root
, buf
);
3316 btrfs_set_buffer_uptodate(buf
);
3317 if (root
->root_key
.objectid
== BTRFS_TREE_LOG_OBJECTID
) {
3318 set_extent_dirty(&root
->dirty_log_pages
, buf
->start
,
3319 buf
->start
+ buf
->len
- 1, GFP_NOFS
);
3321 set_extent_dirty(&trans
->transaction
->dirty_pages
, buf
->start
,
3322 buf
->start
+ buf
->len
- 1, GFP_NOFS
);
3324 trans
->blocks_used
++;
3329 * helper function to allocate a block for a given tree
3330 * returns the tree buffer or NULL.
3332 struct extent_buffer
*btrfs_alloc_free_block(struct btrfs_trans_handle
*trans
,
3333 struct btrfs_root
*root
,
3334 u32 blocksize
, u64 parent
,
3341 struct btrfs_key ins
;
3343 struct extent_buffer
*buf
;
3345 ret
= btrfs_alloc_extent(trans
, root
, blocksize
, parent
, blocksize
,
3346 root_objectid
, ref_generation
, level
,
3347 empty_size
, hint
, (u64
)-1, &ins
, 0);
3350 return ERR_PTR(ret
);
3353 buf
= btrfs_init_new_buffer(trans
, root
, ins
.objectid
, blocksize
);
3357 int btrfs_drop_leaf_ref(struct btrfs_trans_handle
*trans
,
3358 struct btrfs_root
*root
, struct extent_buffer
*leaf
)
3361 u64 leaf_generation
;
3362 struct btrfs_key key
;
3363 struct btrfs_file_extent_item
*fi
;
3368 BUG_ON(!btrfs_is_leaf(leaf
));
3369 nritems
= btrfs_header_nritems(leaf
);
3370 leaf_owner
= btrfs_header_owner(leaf
);
3371 leaf_generation
= btrfs_header_generation(leaf
);
3373 for (i
= 0; i
< nritems
; i
++) {
3377 btrfs_item_key_to_cpu(leaf
, &key
, i
);
3378 if (btrfs_key_type(&key
) != BTRFS_EXTENT_DATA_KEY
)
3380 fi
= btrfs_item_ptr(leaf
, i
, struct btrfs_file_extent_item
);
3381 if (btrfs_file_extent_type(leaf
, fi
) ==
3382 BTRFS_FILE_EXTENT_INLINE
)
3385 * FIXME make sure to insert a trans record that
3386 * repeats the snapshot del on crash
3388 disk_bytenr
= btrfs_file_extent_disk_bytenr(leaf
, fi
);
3389 if (disk_bytenr
== 0)
3392 ret
= __btrfs_free_extent(trans
, root
, disk_bytenr
,
3393 btrfs_file_extent_disk_num_bytes(leaf
, fi
),
3394 leaf
->start
, leaf_owner
, leaf_generation
,
3398 atomic_inc(&root
->fs_info
->throttle_gen
);
3399 wake_up(&root
->fs_info
->transaction_throttle
);
3405 static int noinline
cache_drop_leaf_ref(struct btrfs_trans_handle
*trans
,
3406 struct btrfs_root
*root
,
3407 struct btrfs_leaf_ref
*ref
)
3411 struct btrfs_extent_info
*info
= ref
->extents
;
3413 for (i
= 0; i
< ref
->nritems
; i
++) {
3414 ret
= __btrfs_free_extent(trans
, root
, info
->bytenr
,
3415 info
->num_bytes
, ref
->bytenr
,
3416 ref
->owner
, ref
->generation
,
3419 atomic_inc(&root
->fs_info
->throttle_gen
);
3420 wake_up(&root
->fs_info
->transaction_throttle
);
3430 int drop_snap_lookup_refcount(struct btrfs_root
*root
, u64 start
, u64 len
,
3435 ret
= btrfs_lookup_extent_ref(NULL
, root
, start
, len
, refs
);
3438 #if 0 // some debugging code in case we see problems here
3439 /* if the refs count is one, it won't get increased again. But
3440 * if the ref count is > 1, someone may be decreasing it at
3441 * the same time we are.
3444 struct extent_buffer
*eb
= NULL
;
3445 eb
= btrfs_find_create_tree_block(root
, start
, len
);
3447 btrfs_tree_lock(eb
);
3449 mutex_lock(&root
->fs_info
->alloc_mutex
);
3450 ret
= lookup_extent_ref(NULL
, root
, start
, len
, refs
);
3452 mutex_unlock(&root
->fs_info
->alloc_mutex
);
3455 btrfs_tree_unlock(eb
);
3456 free_extent_buffer(eb
);
3459 printk("block %llu went down to one during drop_snap\n",
3460 (unsigned long long)start
);
3471 * helper function for drop_snapshot, this walks down the tree dropping ref
3472 * counts as it goes.
3474 static int noinline
walk_down_tree(struct btrfs_trans_handle
*trans
,
3475 struct btrfs_root
*root
,
3476 struct btrfs_path
*path
, int *level
)
3482 struct extent_buffer
*next
;
3483 struct extent_buffer
*cur
;
3484 struct extent_buffer
*parent
;
3485 struct btrfs_leaf_ref
*ref
;
3490 WARN_ON(*level
< 0);
3491 WARN_ON(*level
>= BTRFS_MAX_LEVEL
);
3492 ret
= drop_snap_lookup_refcount(root
, path
->nodes
[*level
]->start
,
3493 path
->nodes
[*level
]->len
, &refs
);
3499 * walk down to the last node level and free all the leaves
3501 while(*level
>= 0) {
3502 WARN_ON(*level
< 0);
3503 WARN_ON(*level
>= BTRFS_MAX_LEVEL
);
3504 cur
= path
->nodes
[*level
];
3506 if (btrfs_header_level(cur
) != *level
)
3509 if (path
->slots
[*level
] >=
3510 btrfs_header_nritems(cur
))
3513 ret
= btrfs_drop_leaf_ref(trans
, root
, cur
);
3517 bytenr
= btrfs_node_blockptr(cur
, path
->slots
[*level
]);
3518 ptr_gen
= btrfs_node_ptr_generation(cur
, path
->slots
[*level
]);
3519 blocksize
= btrfs_level_size(root
, *level
- 1);
3521 ret
= drop_snap_lookup_refcount(root
, bytenr
, blocksize
, &refs
);
3524 parent
= path
->nodes
[*level
];
3525 root_owner
= btrfs_header_owner(parent
);
3526 root_gen
= btrfs_header_generation(parent
);
3527 path
->slots
[*level
]++;
3529 ret
= __btrfs_free_extent(trans
, root
, bytenr
,
3530 blocksize
, parent
->start
,
3531 root_owner
, root_gen
,
3535 atomic_inc(&root
->fs_info
->throttle_gen
);
3536 wake_up(&root
->fs_info
->transaction_throttle
);
3542 * at this point, we have a single ref, and since the
3543 * only place referencing this extent is a dead root
3544 * the reference count should never go higher.
3545 * So, we don't need to check it again
3548 ref
= btrfs_lookup_leaf_ref(root
, bytenr
);
3549 if (ref
&& ref
->generation
!= ptr_gen
) {
3550 btrfs_free_leaf_ref(root
, ref
);
3554 ret
= cache_drop_leaf_ref(trans
, root
, ref
);
3556 btrfs_remove_leaf_ref(root
, ref
);
3557 btrfs_free_leaf_ref(root
, ref
);
3561 if (printk_ratelimit()) {
3562 printk("leaf ref miss for bytenr %llu\n",
3563 (unsigned long long)bytenr
);
3566 next
= btrfs_find_tree_block(root
, bytenr
, blocksize
);
3567 if (!next
|| !btrfs_buffer_uptodate(next
, ptr_gen
)) {
3568 free_extent_buffer(next
);
3570 next
= read_tree_block(root
, bytenr
, blocksize
,
3575 * this is a debugging check and can go away
3576 * the ref should never go all the way down to 1
3579 ret
= lookup_extent_ref(NULL
, root
, bytenr
, blocksize
,
3585 WARN_ON(*level
<= 0);
3586 if (path
->nodes
[*level
-1])
3587 free_extent_buffer(path
->nodes
[*level
-1]);
3588 path
->nodes
[*level
-1] = next
;
3589 *level
= btrfs_header_level(next
);
3590 path
->slots
[*level
] = 0;
3594 WARN_ON(*level
< 0);
3595 WARN_ON(*level
>= BTRFS_MAX_LEVEL
);
3597 if (path
->nodes
[*level
] == root
->node
) {
3598 parent
= path
->nodes
[*level
];
3599 bytenr
= path
->nodes
[*level
]->start
;
3601 parent
= path
->nodes
[*level
+ 1];
3602 bytenr
= btrfs_node_blockptr(parent
, path
->slots
[*level
+ 1]);
3605 blocksize
= btrfs_level_size(root
, *level
);
3606 root_owner
= btrfs_header_owner(parent
);
3607 root_gen
= btrfs_header_generation(parent
);
3609 ret
= __btrfs_free_extent(trans
, root
, bytenr
, blocksize
,
3610 parent
->start
, root_owner
, root_gen
,
3612 free_extent_buffer(path
->nodes
[*level
]);
3613 path
->nodes
[*level
] = NULL
;
3622 * helper function for drop_subtree, this function is similar to
3623 * walk_down_tree. The main difference is that it checks reference
3624 * counts while tree blocks are locked.
3626 static int noinline
walk_down_subtree(struct btrfs_trans_handle
*trans
,
3627 struct btrfs_root
*root
,
3628 struct btrfs_path
*path
, int *level
)
3630 struct extent_buffer
*next
;
3631 struct extent_buffer
*cur
;
3632 struct extent_buffer
*parent
;
3639 cur
= path
->nodes
[*level
];
3640 ret
= btrfs_lookup_extent_ref(trans
, root
, cur
->start
, cur
->len
,
3646 while (*level
>= 0) {
3647 cur
= path
->nodes
[*level
];
3649 ret
= btrfs_drop_leaf_ref(trans
, root
, cur
);
3651 clean_tree_block(trans
, root
, cur
);
3654 if (path
->slots
[*level
] >= btrfs_header_nritems(cur
)) {
3655 clean_tree_block(trans
, root
, cur
);
3659 bytenr
= btrfs_node_blockptr(cur
, path
->slots
[*level
]);
3660 blocksize
= btrfs_level_size(root
, *level
- 1);
3661 ptr_gen
= btrfs_node_ptr_generation(cur
, path
->slots
[*level
]);
3663 next
= read_tree_block(root
, bytenr
, blocksize
, ptr_gen
);
3664 btrfs_tree_lock(next
);
3666 ret
= btrfs_lookup_extent_ref(trans
, root
, bytenr
, blocksize
,
3670 parent
= path
->nodes
[*level
];
3671 ret
= btrfs_free_extent(trans
, root
, bytenr
,
3672 blocksize
, parent
->start
,
3673 btrfs_header_owner(parent
),
3674 btrfs_header_generation(parent
),
3677 path
->slots
[*level
]++;
3678 btrfs_tree_unlock(next
);
3679 free_extent_buffer(next
);
3683 *level
= btrfs_header_level(next
);
3684 path
->nodes
[*level
] = next
;
3685 path
->slots
[*level
] = 0;
3686 path
->locks
[*level
] = 1;
3690 parent
= path
->nodes
[*level
+ 1];
3691 bytenr
= path
->nodes
[*level
]->start
;
3692 blocksize
= path
->nodes
[*level
]->len
;
3694 ret
= btrfs_free_extent(trans
, root
, bytenr
, blocksize
,
3695 parent
->start
, btrfs_header_owner(parent
),
3696 btrfs_header_generation(parent
), *level
, 1);
3699 if (path
->locks
[*level
]) {
3700 btrfs_tree_unlock(path
->nodes
[*level
]);
3701 path
->locks
[*level
] = 0;
3703 free_extent_buffer(path
->nodes
[*level
]);
3704 path
->nodes
[*level
] = NULL
;
3711 * helper for dropping snapshots. This walks back up the tree in the path
3712 * to find the first node higher up where we haven't yet gone through
3715 static int noinline
walk_up_tree(struct btrfs_trans_handle
*trans
,
3716 struct btrfs_root
*root
,
3717 struct btrfs_path
*path
,
3718 int *level
, int max_level
)
3722 struct btrfs_root_item
*root_item
= &root
->root_item
;
3727 for (i
= *level
; i
< max_level
&& path
->nodes
[i
]; i
++) {
3728 slot
= path
->slots
[i
];
3729 if (slot
< btrfs_header_nritems(path
->nodes
[i
]) - 1) {
3730 struct extent_buffer
*node
;
3731 struct btrfs_disk_key disk_key
;
3732 node
= path
->nodes
[i
];
3735 WARN_ON(*level
== 0);
3736 btrfs_node_key(node
, &disk_key
, path
->slots
[i
]);
3737 memcpy(&root_item
->drop_progress
,
3738 &disk_key
, sizeof(disk_key
));
3739 root_item
->drop_level
= i
;
3742 struct extent_buffer
*parent
;
3743 if (path
->nodes
[*level
] == root
->node
)
3744 parent
= path
->nodes
[*level
];
3746 parent
= path
->nodes
[*level
+ 1];
3748 root_owner
= btrfs_header_owner(parent
);
3749 root_gen
= btrfs_header_generation(parent
);
3751 clean_tree_block(trans
, root
, path
->nodes
[*level
]);
3752 ret
= btrfs_free_extent(trans
, root
,
3753 path
->nodes
[*level
]->start
,
3754 path
->nodes
[*level
]->len
,
3755 parent
->start
, root_owner
,
3756 root_gen
, *level
, 1);
3758 if (path
->locks
[*level
]) {
3759 btrfs_tree_unlock(path
->nodes
[*level
]);
3760 path
->locks
[*level
] = 0;
3762 free_extent_buffer(path
->nodes
[*level
]);
3763 path
->nodes
[*level
] = NULL
;
3771 * drop the reference count on the tree rooted at 'snap'. This traverses
3772 * the tree freeing any blocks that have a ref count of zero after being
3775 int btrfs_drop_snapshot(struct btrfs_trans_handle
*trans
, struct btrfs_root
3781 struct btrfs_path
*path
;
3784 struct btrfs_root_item
*root_item
= &root
->root_item
;
3786 WARN_ON(!mutex_is_locked(&root
->fs_info
->drop_mutex
));
3787 path
= btrfs_alloc_path();
3790 level
= btrfs_header_level(root
->node
);
3792 if (btrfs_disk_key_objectid(&root_item
->drop_progress
) == 0) {
3793 path
->nodes
[level
] = root
->node
;
3794 extent_buffer_get(root
->node
);
3795 path
->slots
[level
] = 0;
3797 struct btrfs_key key
;
3798 struct btrfs_disk_key found_key
;
3799 struct extent_buffer
*node
;
3801 btrfs_disk_key_to_cpu(&key
, &root_item
->drop_progress
);
3802 level
= root_item
->drop_level
;
3803 path
->lowest_level
= level
;
3804 wret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
3809 node
= path
->nodes
[level
];
3810 btrfs_node_key(node
, &found_key
, path
->slots
[level
]);
3811 WARN_ON(memcmp(&found_key
, &root_item
->drop_progress
,
3812 sizeof(found_key
)));
3814 * unlock our path, this is safe because only this
3815 * function is allowed to delete this snapshot
3817 for (i
= 0; i
< BTRFS_MAX_LEVEL
; i
++) {
3818 if (path
->nodes
[i
] && path
->locks
[i
]) {
3820 btrfs_tree_unlock(path
->nodes
[i
]);
3825 wret
= walk_down_tree(trans
, root
, path
, &level
);
3831 wret
= walk_up_tree(trans
, root
, path
, &level
,
3837 if (trans
->transaction
->in_commit
) {
3841 atomic_inc(&root
->fs_info
->throttle_gen
);
3842 wake_up(&root
->fs_info
->transaction_throttle
);
3844 for (i
= 0; i
<= orig_level
; i
++) {
3845 if (path
->nodes
[i
]) {
3846 free_extent_buffer(path
->nodes
[i
]);
3847 path
->nodes
[i
] = NULL
;
3851 btrfs_free_path(path
);
3855 int btrfs_drop_subtree(struct btrfs_trans_handle
*trans
,
3856 struct btrfs_root
*root
,
3857 struct extent_buffer
*node
,
3858 struct extent_buffer
*parent
)
3860 struct btrfs_path
*path
;
3866 path
= btrfs_alloc_path();
3869 BUG_ON(!btrfs_tree_locked(parent
));
3870 parent_level
= btrfs_header_level(parent
);
3871 extent_buffer_get(parent
);
3872 path
->nodes
[parent_level
] = parent
;
3873 path
->slots
[parent_level
] = btrfs_header_nritems(parent
);
3875 BUG_ON(!btrfs_tree_locked(node
));
3876 level
= btrfs_header_level(node
);
3877 extent_buffer_get(node
);
3878 path
->nodes
[level
] = node
;
3879 path
->slots
[level
] = 0;
3882 wret
= walk_down_subtree(trans
, root
, path
, &level
);
3888 wret
= walk_up_tree(trans
, root
, path
, &level
, parent_level
);
3895 btrfs_free_path(path
);
3899 static unsigned long calc_ra(unsigned long start
, unsigned long last
,
3902 return min(last
, start
+ nr
- 1);
3905 static int noinline
relocate_inode_pages(struct inode
*inode
, u64 start
,
3910 unsigned long first_index
;
3911 unsigned long last_index
;
3914 struct extent_io_tree
*io_tree
= &BTRFS_I(inode
)->io_tree
;
3915 struct file_ra_state
*ra
;
3916 struct btrfs_ordered_extent
*ordered
;
3917 unsigned int total_read
= 0;
3918 unsigned int total_dirty
= 0;
3921 ra
= kzalloc(sizeof(*ra
), GFP_NOFS
);
3923 mutex_lock(&inode
->i_mutex
);
3924 first_index
= start
>> PAGE_CACHE_SHIFT
;
3925 last_index
= (start
+ len
- 1) >> PAGE_CACHE_SHIFT
;
3927 /* make sure the dirty trick played by the caller work */
3928 ret
= invalidate_inode_pages2_range(inode
->i_mapping
,
3929 first_index
, last_index
);
3933 file_ra_state_init(ra
, inode
->i_mapping
);
3935 for (i
= first_index
; i
<= last_index
; i
++) {
3936 if (total_read
% ra
->ra_pages
== 0) {
3937 btrfs_force_ra(inode
->i_mapping
, ra
, NULL
, i
,
3938 calc_ra(i
, last_index
, ra
->ra_pages
));
3942 if (((u64
)i
<< PAGE_CACHE_SHIFT
) > i_size_read(inode
))
3944 page
= grab_cache_page(inode
->i_mapping
, i
);
3949 if (!PageUptodate(page
)) {
3950 btrfs_readpage(NULL
, page
);
3952 if (!PageUptodate(page
)) {
3954 page_cache_release(page
);
3959 wait_on_page_writeback(page
);
3961 page_start
= (u64
)page
->index
<< PAGE_CACHE_SHIFT
;
3962 page_end
= page_start
+ PAGE_CACHE_SIZE
- 1;
3963 lock_extent(io_tree
, page_start
, page_end
, GFP_NOFS
);
3965 ordered
= btrfs_lookup_ordered_extent(inode
, page_start
);
3967 unlock_extent(io_tree
, page_start
, page_end
, GFP_NOFS
);
3969 page_cache_release(page
);
3970 btrfs_start_ordered_extent(inode
, ordered
, 1);
3971 btrfs_put_ordered_extent(ordered
);
3974 set_page_extent_mapped(page
);
3976 btrfs_set_extent_delalloc(inode
, page_start
, page_end
);
3977 if (i
== first_index
)
3978 set_extent_bits(io_tree
, page_start
, page_end
,
3979 EXTENT_BOUNDARY
, GFP_NOFS
);
3981 set_page_dirty(page
);
3984 unlock_extent(io_tree
, page_start
, page_end
, GFP_NOFS
);
3986 page_cache_release(page
);
3991 mutex_unlock(&inode
->i_mutex
);
3992 balance_dirty_pages_ratelimited_nr(inode
->i_mapping
, total_dirty
);
3996 static int noinline
relocate_data_extent(struct inode
*reloc_inode
,
3997 struct btrfs_key
*extent_key
,
4000 struct btrfs_root
*root
= BTRFS_I(reloc_inode
)->root
;
4001 struct extent_map_tree
*em_tree
= &BTRFS_I(reloc_inode
)->extent_tree
;
4002 struct extent_map
*em
;
4003 u64 start
= extent_key
->objectid
- offset
;
4004 u64 end
= start
+ extent_key
->offset
- 1;
4006 em
= alloc_extent_map(GFP_NOFS
);
4007 BUG_ON(!em
|| IS_ERR(em
));
4010 em
->len
= extent_key
->offset
;
4011 em
->block_len
= extent_key
->offset
;
4012 em
->block_start
= extent_key
->objectid
;
4013 em
->bdev
= root
->fs_info
->fs_devices
->latest_bdev
;
4014 set_bit(EXTENT_FLAG_PINNED
, &em
->flags
);
4016 /* setup extent map to cheat btrfs_readpage */
4017 lock_extent(&BTRFS_I(reloc_inode
)->io_tree
, start
, end
, GFP_NOFS
);
4020 spin_lock(&em_tree
->lock
);
4021 ret
= add_extent_mapping(em_tree
, em
);
4022 spin_unlock(&em_tree
->lock
);
4023 if (ret
!= -EEXIST
) {
4024 free_extent_map(em
);
4027 btrfs_drop_extent_cache(reloc_inode
, start
, end
, 0);
4029 unlock_extent(&BTRFS_I(reloc_inode
)->io_tree
, start
, end
, GFP_NOFS
);
4031 return relocate_inode_pages(reloc_inode
, start
, extent_key
->offset
);
4034 struct btrfs_ref_path
{
4036 u64 nodes
[BTRFS_MAX_LEVEL
];
4038 u64 root_generation
;
4045 struct btrfs_key node_keys
[BTRFS_MAX_LEVEL
];
4046 u64 new_nodes
[BTRFS_MAX_LEVEL
];
4049 struct disk_extent
{
4060 static int is_cowonly_root(u64 root_objectid
)
4062 if (root_objectid
== BTRFS_ROOT_TREE_OBJECTID
||
4063 root_objectid
== BTRFS_EXTENT_TREE_OBJECTID
||
4064 root_objectid
== BTRFS_CHUNK_TREE_OBJECTID
||
4065 root_objectid
== BTRFS_DEV_TREE_OBJECTID
||
4066 root_objectid
== BTRFS_TREE_LOG_OBJECTID
)
4071 static int noinline
__next_ref_path(struct btrfs_trans_handle
*trans
,
4072 struct btrfs_root
*extent_root
,
4073 struct btrfs_ref_path
*ref_path
,
4076 struct extent_buffer
*leaf
;
4077 struct btrfs_path
*path
;
4078 struct btrfs_extent_ref
*ref
;
4079 struct btrfs_key key
;
4080 struct btrfs_key found_key
;
4086 path
= btrfs_alloc_path();
4091 ref_path
->lowest_level
= -1;
4092 ref_path
->current_level
= -1;
4093 ref_path
->shared_level
= -1;
4097 level
= ref_path
->current_level
- 1;
4098 while (level
>= -1) {
4100 if (level
< ref_path
->lowest_level
)
4104 bytenr
= ref_path
->nodes
[level
];
4106 bytenr
= ref_path
->extent_start
;
4108 BUG_ON(bytenr
== 0);
4110 parent
= ref_path
->nodes
[level
+ 1];
4111 ref_path
->nodes
[level
+ 1] = 0;
4112 ref_path
->current_level
= level
;
4113 BUG_ON(parent
== 0);
4115 key
.objectid
= bytenr
;
4116 key
.offset
= parent
+ 1;
4117 key
.type
= BTRFS_EXTENT_REF_KEY
;
4119 ret
= btrfs_search_slot(trans
, extent_root
, &key
, path
, 0, 0);
4124 leaf
= path
->nodes
[0];
4125 nritems
= btrfs_header_nritems(leaf
);
4126 if (path
->slots
[0] >= nritems
) {
4127 ret
= btrfs_next_leaf(extent_root
, path
);
4132 leaf
= path
->nodes
[0];
4135 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
4136 if (found_key
.objectid
== bytenr
&&
4137 found_key
.type
== BTRFS_EXTENT_REF_KEY
) {
4138 if (level
< ref_path
->shared_level
)
4139 ref_path
->shared_level
= level
;
4144 btrfs_release_path(extent_root
, path
);
4147 /* reached lowest level */
4151 level
= ref_path
->current_level
;
4152 while (level
< BTRFS_MAX_LEVEL
- 1) {
4155 bytenr
= ref_path
->nodes
[level
];
4157 bytenr
= ref_path
->extent_start
;
4159 BUG_ON(bytenr
== 0);
4161 key
.objectid
= bytenr
;
4163 key
.type
= BTRFS_EXTENT_REF_KEY
;
4165 ret
= btrfs_search_slot(trans
, extent_root
, &key
, path
, 0, 0);
4169 leaf
= path
->nodes
[0];
4170 nritems
= btrfs_header_nritems(leaf
);
4171 if (path
->slots
[0] >= nritems
) {
4172 ret
= btrfs_next_leaf(extent_root
, path
);
4176 /* the extent was freed by someone */
4177 if (ref_path
->lowest_level
== level
)
4179 btrfs_release_path(extent_root
, path
);
4182 leaf
= path
->nodes
[0];
4185 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
4186 if (found_key
.objectid
!= bytenr
||
4187 found_key
.type
!= BTRFS_EXTENT_REF_KEY
) {
4188 /* the extent was freed by someone */
4189 if (ref_path
->lowest_level
== level
) {
4193 btrfs_release_path(extent_root
, path
);
4197 ref
= btrfs_item_ptr(leaf
, path
->slots
[0],
4198 struct btrfs_extent_ref
);
4199 ref_objectid
= btrfs_ref_objectid(leaf
, ref
);
4200 if (ref_objectid
< BTRFS_FIRST_FREE_OBJECTID
) {
4202 level
= (int)ref_objectid
;
4203 BUG_ON(level
>= BTRFS_MAX_LEVEL
);
4204 ref_path
->lowest_level
= level
;
4205 ref_path
->current_level
= level
;
4206 ref_path
->nodes
[level
] = bytenr
;
4208 WARN_ON(ref_objectid
!= level
);
4211 WARN_ON(level
!= -1);
4215 if (ref_path
->lowest_level
== level
) {
4216 ref_path
->owner_objectid
= ref_objectid
;
4217 ref_path
->num_refs
= btrfs_ref_num_refs(leaf
, ref
);
4221 * the block is tree root or the block isn't in reference
4224 if (found_key
.objectid
== found_key
.offset
||
4225 is_cowonly_root(btrfs_ref_root(leaf
, ref
))) {
4226 ref_path
->root_objectid
= btrfs_ref_root(leaf
, ref
);
4227 ref_path
->root_generation
=
4228 btrfs_ref_generation(leaf
, ref
);
4230 /* special reference from the tree log */
4231 ref_path
->nodes
[0] = found_key
.offset
;
4232 ref_path
->current_level
= 0;
4239 BUG_ON(ref_path
->nodes
[level
] != 0);
4240 ref_path
->nodes
[level
] = found_key
.offset
;
4241 ref_path
->current_level
= level
;
4244 * the reference was created in the running transaction,
4245 * no need to continue walking up.
4247 if (btrfs_ref_generation(leaf
, ref
) == trans
->transid
) {
4248 ref_path
->root_objectid
= btrfs_ref_root(leaf
, ref
);
4249 ref_path
->root_generation
=
4250 btrfs_ref_generation(leaf
, ref
);
4255 btrfs_release_path(extent_root
, path
);
4258 /* reached max tree level, but no tree root found. */
4261 btrfs_free_path(path
);
4265 static int btrfs_first_ref_path(struct btrfs_trans_handle
*trans
,
4266 struct btrfs_root
*extent_root
,
4267 struct btrfs_ref_path
*ref_path
,
4270 memset(ref_path
, 0, sizeof(*ref_path
));
4271 ref_path
->extent_start
= extent_start
;
4273 return __next_ref_path(trans
, extent_root
, ref_path
, 1);
4276 static int btrfs_next_ref_path(struct btrfs_trans_handle
*trans
,
4277 struct btrfs_root
*extent_root
,
4278 struct btrfs_ref_path
*ref_path
)
4280 return __next_ref_path(trans
, extent_root
, ref_path
, 0);
4283 static int noinline
get_new_locations(struct inode
*reloc_inode
,
4284 struct btrfs_key
*extent_key
,
4285 u64 offset
, int no_fragment
,
4286 struct disk_extent
**extents
,
4289 struct btrfs_root
*root
= BTRFS_I(reloc_inode
)->root
;
4290 struct btrfs_path
*path
;
4291 struct btrfs_file_extent_item
*fi
;
4292 struct extent_buffer
*leaf
;
4293 struct disk_extent
*exts
= *extents
;
4294 struct btrfs_key found_key
;
4299 int max
= *nr_extents
;
4302 WARN_ON(!no_fragment
&& *extents
);
4305 exts
= kmalloc(sizeof(*exts
) * max
, GFP_NOFS
);
4310 path
= btrfs_alloc_path();
4313 cur_pos
= extent_key
->objectid
- offset
;
4314 last_byte
= extent_key
->objectid
+ extent_key
->offset
;
4315 ret
= btrfs_lookup_file_extent(NULL
, root
, path
, reloc_inode
->i_ino
,
4325 leaf
= path
->nodes
[0];
4326 nritems
= btrfs_header_nritems(leaf
);
4327 if (path
->slots
[0] >= nritems
) {
4328 ret
= btrfs_next_leaf(root
, path
);
4333 leaf
= path
->nodes
[0];
4336 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
4337 if (found_key
.offset
!= cur_pos
||
4338 found_key
.type
!= BTRFS_EXTENT_DATA_KEY
||
4339 found_key
.objectid
!= reloc_inode
->i_ino
)
4342 fi
= btrfs_item_ptr(leaf
, path
->slots
[0],
4343 struct btrfs_file_extent_item
);
4344 if (btrfs_file_extent_type(leaf
, fi
) !=
4345 BTRFS_FILE_EXTENT_REG
||
4346 btrfs_file_extent_disk_bytenr(leaf
, fi
) == 0)
4350 struct disk_extent
*old
= exts
;
4352 exts
= kzalloc(sizeof(*exts
) * max
, GFP_NOFS
);
4353 memcpy(exts
, old
, sizeof(*exts
) * nr
);
4354 if (old
!= *extents
)
4358 exts
[nr
].disk_bytenr
=
4359 btrfs_file_extent_disk_bytenr(leaf
, fi
);
4360 exts
[nr
].disk_num_bytes
=
4361 btrfs_file_extent_disk_num_bytes(leaf
, fi
);
4362 exts
[nr
].offset
= btrfs_file_extent_offset(leaf
, fi
);
4363 exts
[nr
].num_bytes
= btrfs_file_extent_num_bytes(leaf
, fi
);
4364 exts
[nr
].ram_bytes
= btrfs_file_extent_ram_bytes(leaf
, fi
);
4365 exts
[nr
].compression
= btrfs_file_extent_compression(leaf
, fi
);
4366 exts
[nr
].encryption
= btrfs_file_extent_encryption(leaf
, fi
);
4367 exts
[nr
].other_encoding
= btrfs_file_extent_other_encoding(leaf
,
4369 BUG_ON(exts
[nr
].offset
> 0);
4370 BUG_ON(exts
[nr
].compression
|| exts
[nr
].encryption
);
4371 BUG_ON(exts
[nr
].num_bytes
!= exts
[nr
].disk_num_bytes
);
4373 cur_pos
+= exts
[nr
].num_bytes
;
4376 if (cur_pos
+ offset
>= last_byte
)
4386 WARN_ON(cur_pos
+ offset
> last_byte
);
4387 if (cur_pos
+ offset
< last_byte
) {
4393 btrfs_free_path(path
);
4395 if (exts
!= *extents
)
4404 static int noinline
replace_one_extent(struct btrfs_trans_handle
*trans
,
4405 struct btrfs_root
*root
,
4406 struct btrfs_path
*path
,
4407 struct btrfs_key
*extent_key
,
4408 struct btrfs_key
*leaf_key
,
4409 struct btrfs_ref_path
*ref_path
,
4410 struct disk_extent
*new_extents
,
4413 struct extent_buffer
*leaf
;
4414 struct btrfs_file_extent_item
*fi
;
4415 struct inode
*inode
= NULL
;
4416 struct btrfs_key key
;
4424 int extent_locked
= 0;
4428 memcpy(&key
, leaf_key
, sizeof(key
));
4429 first_pos
= INT_LIMIT(loff_t
) - extent_key
->offset
;
4430 if (ref_path
->owner_objectid
!= BTRFS_MULTIPLE_OBJECTIDS
) {
4431 if (key
.objectid
< ref_path
->owner_objectid
||
4432 (key
.objectid
== ref_path
->owner_objectid
&&
4433 key
.type
< BTRFS_EXTENT_DATA_KEY
)) {
4434 key
.objectid
= ref_path
->owner_objectid
;
4435 key
.type
= BTRFS_EXTENT_DATA_KEY
;
4441 ret
= btrfs_search_slot(trans
, root
, &key
, path
, 0, 1);
4445 leaf
= path
->nodes
[0];
4446 nritems
= btrfs_header_nritems(leaf
);
4448 if (extent_locked
&& ret
> 0) {
4450 * the file extent item was modified by someone
4451 * before the extent got locked.
4453 unlock_extent(&BTRFS_I(inode
)->io_tree
, lock_start
,
4454 lock_end
, GFP_NOFS
);
4458 if (path
->slots
[0] >= nritems
) {
4459 if (++nr_scaned
> 2)
4462 BUG_ON(extent_locked
);
4463 ret
= btrfs_next_leaf(root
, path
);
4468 leaf
= path
->nodes
[0];
4469 nritems
= btrfs_header_nritems(leaf
);
4472 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
4474 if (ref_path
->owner_objectid
!= BTRFS_MULTIPLE_OBJECTIDS
) {
4475 if ((key
.objectid
> ref_path
->owner_objectid
) ||
4476 (key
.objectid
== ref_path
->owner_objectid
&&
4477 key
.type
> BTRFS_EXTENT_DATA_KEY
) ||
4478 (key
.offset
>= first_pos
+ extent_key
->offset
))
4482 if (inode
&& key
.objectid
!= inode
->i_ino
) {
4483 BUG_ON(extent_locked
);
4484 btrfs_release_path(root
, path
);
4485 mutex_unlock(&inode
->i_mutex
);
4491 if (key
.type
!= BTRFS_EXTENT_DATA_KEY
) {
4496 fi
= btrfs_item_ptr(leaf
, path
->slots
[0],
4497 struct btrfs_file_extent_item
);
4498 extent_type
= btrfs_file_extent_type(leaf
, fi
);
4499 if ((extent_type
!= BTRFS_FILE_EXTENT_REG
&&
4500 extent_type
!= BTRFS_FILE_EXTENT_PREALLOC
) ||
4501 (btrfs_file_extent_disk_bytenr(leaf
, fi
) !=
4502 extent_key
->objectid
)) {
4508 num_bytes
= btrfs_file_extent_num_bytes(leaf
, fi
);
4509 ext_offset
= btrfs_file_extent_offset(leaf
, fi
);
4511 if (first_pos
> key
.offset
- ext_offset
)
4512 first_pos
= key
.offset
- ext_offset
;
4514 if (!extent_locked
) {
4515 lock_start
= key
.offset
;
4516 lock_end
= lock_start
+ num_bytes
- 1;
4518 if (lock_start
> key
.offset
||
4519 lock_end
+ 1 < key
.offset
+ num_bytes
) {
4520 unlock_extent(&BTRFS_I(inode
)->io_tree
,
4521 lock_start
, lock_end
, GFP_NOFS
);
4527 btrfs_release_path(root
, path
);
4529 inode
= btrfs_iget_locked(root
->fs_info
->sb
,
4530 key
.objectid
, root
);
4531 if (inode
->i_state
& I_NEW
) {
4532 BTRFS_I(inode
)->root
= root
;
4533 BTRFS_I(inode
)->location
.objectid
=
4535 BTRFS_I(inode
)->location
.type
=
4536 BTRFS_INODE_ITEM_KEY
;
4537 BTRFS_I(inode
)->location
.offset
= 0;
4538 btrfs_read_locked_inode(inode
);
4539 unlock_new_inode(inode
);
4542 * some code call btrfs_commit_transaction while
4543 * holding the i_mutex, so we can't use mutex_lock
4546 if (is_bad_inode(inode
) ||
4547 !mutex_trylock(&inode
->i_mutex
)) {
4550 key
.offset
= (u64
)-1;
4555 if (!extent_locked
) {
4556 struct btrfs_ordered_extent
*ordered
;
4558 btrfs_release_path(root
, path
);
4560 lock_extent(&BTRFS_I(inode
)->io_tree
, lock_start
,
4561 lock_end
, GFP_NOFS
);
4562 ordered
= btrfs_lookup_first_ordered_extent(inode
,
4565 ordered
->file_offset
<= lock_end
&&
4566 ordered
->file_offset
+ ordered
->len
> lock_start
) {
4567 unlock_extent(&BTRFS_I(inode
)->io_tree
,
4568 lock_start
, lock_end
, GFP_NOFS
);
4569 btrfs_start_ordered_extent(inode
, ordered
, 1);
4570 btrfs_put_ordered_extent(ordered
);
4571 key
.offset
+= num_bytes
;
4575 btrfs_put_ordered_extent(ordered
);
4581 if (nr_extents
== 1) {
4582 /* update extent pointer in place */
4583 btrfs_set_file_extent_disk_bytenr(leaf
, fi
,
4584 new_extents
[0].disk_bytenr
);
4585 btrfs_set_file_extent_disk_num_bytes(leaf
, fi
,
4586 new_extents
[0].disk_num_bytes
);
4587 btrfs_mark_buffer_dirty(leaf
);
4589 btrfs_drop_extent_cache(inode
, key
.offset
,
4590 key
.offset
+ num_bytes
- 1, 0);
4592 ret
= btrfs_inc_extent_ref(trans
, root
,
4593 new_extents
[0].disk_bytenr
,
4594 new_extents
[0].disk_num_bytes
,
4596 root
->root_key
.objectid
,
4601 ret
= btrfs_free_extent(trans
, root
,
4602 extent_key
->objectid
,
4605 btrfs_header_owner(leaf
),
4606 btrfs_header_generation(leaf
),
4610 btrfs_release_path(root
, path
);
4611 key
.offset
+= num_bytes
;
4619 * drop old extent pointer at first, then insert the
4620 * new pointers one bye one
4622 btrfs_release_path(root
, path
);
4623 ret
= btrfs_drop_extents(trans
, root
, inode
, key
.offset
,
4624 key
.offset
+ num_bytes
,
4625 key
.offset
, &alloc_hint
);
4628 for (i
= 0; i
< nr_extents
; i
++) {
4629 if (ext_offset
>= new_extents
[i
].num_bytes
) {
4630 ext_offset
-= new_extents
[i
].num_bytes
;
4633 extent_len
= min(new_extents
[i
].num_bytes
-
4634 ext_offset
, num_bytes
);
4636 ret
= btrfs_insert_empty_item(trans
, root
,
4641 leaf
= path
->nodes
[0];
4642 fi
= btrfs_item_ptr(leaf
, path
->slots
[0],
4643 struct btrfs_file_extent_item
);
4644 btrfs_set_file_extent_generation(leaf
, fi
,
4646 btrfs_set_file_extent_type(leaf
, fi
,
4647 BTRFS_FILE_EXTENT_REG
);
4648 btrfs_set_file_extent_disk_bytenr(leaf
, fi
,
4649 new_extents
[i
].disk_bytenr
);
4650 btrfs_set_file_extent_disk_num_bytes(leaf
, fi
,
4651 new_extents
[i
].disk_num_bytes
);
4652 btrfs_set_file_extent_ram_bytes(leaf
, fi
,
4653 new_extents
[i
].ram_bytes
);
4655 btrfs_set_file_extent_compression(leaf
, fi
,
4656 new_extents
[i
].compression
);
4657 btrfs_set_file_extent_encryption(leaf
, fi
,
4658 new_extents
[i
].encryption
);
4659 btrfs_set_file_extent_other_encoding(leaf
, fi
,
4660 new_extents
[i
].other_encoding
);
4662 btrfs_set_file_extent_num_bytes(leaf
, fi
,
4664 ext_offset
+= new_extents
[i
].offset
;
4665 btrfs_set_file_extent_offset(leaf
, fi
,
4667 btrfs_mark_buffer_dirty(leaf
);
4669 btrfs_drop_extent_cache(inode
, key
.offset
,
4670 key
.offset
+ extent_len
- 1, 0);
4672 ret
= btrfs_inc_extent_ref(trans
, root
,
4673 new_extents
[i
].disk_bytenr
,
4674 new_extents
[i
].disk_num_bytes
,
4676 root
->root_key
.objectid
,
4677 trans
->transid
, key
.objectid
);
4679 btrfs_release_path(root
, path
);
4681 inode_add_bytes(inode
, extent_len
);
4684 num_bytes
-= extent_len
;
4685 key
.offset
+= extent_len
;
4690 BUG_ON(i
>= nr_extents
);
4694 if (extent_locked
) {
4695 unlock_extent(&BTRFS_I(inode
)->io_tree
, lock_start
,
4696 lock_end
, GFP_NOFS
);
4700 if (ref_path
->owner_objectid
!= BTRFS_MULTIPLE_OBJECTIDS
&&
4701 key
.offset
>= first_pos
+ extent_key
->offset
)
4708 btrfs_release_path(root
, path
);
4710 mutex_unlock(&inode
->i_mutex
);
4711 if (extent_locked
) {
4712 unlock_extent(&BTRFS_I(inode
)->io_tree
, lock_start
,
4713 lock_end
, GFP_NOFS
);
4720 int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle
*trans
,
4721 struct btrfs_root
*root
,
4722 struct extent_buffer
*buf
, u64 orig_start
)
4727 BUG_ON(btrfs_header_generation(buf
) != trans
->transid
);
4728 BUG_ON(root
->root_key
.objectid
!= BTRFS_TREE_RELOC_OBJECTID
);
4730 level
= btrfs_header_level(buf
);
4732 struct btrfs_leaf_ref
*ref
;
4733 struct btrfs_leaf_ref
*orig_ref
;
4735 orig_ref
= btrfs_lookup_leaf_ref(root
, orig_start
);
4739 ref
= btrfs_alloc_leaf_ref(root
, orig_ref
->nritems
);
4741 btrfs_free_leaf_ref(root
, orig_ref
);
4745 ref
->nritems
= orig_ref
->nritems
;
4746 memcpy(ref
->extents
, orig_ref
->extents
,
4747 sizeof(ref
->extents
[0]) * ref
->nritems
);
4749 btrfs_free_leaf_ref(root
, orig_ref
);
4751 ref
->root_gen
= trans
->transid
;
4752 ref
->bytenr
= buf
->start
;
4753 ref
->owner
= btrfs_header_owner(buf
);
4754 ref
->generation
= btrfs_header_generation(buf
);
4755 ret
= btrfs_add_leaf_ref(root
, ref
, 0);
4757 btrfs_free_leaf_ref(root
, ref
);
4762 static int noinline
invalidate_extent_cache(struct btrfs_root
*root
,
4763 struct extent_buffer
*leaf
,
4764 struct btrfs_block_group_cache
*group
,
4765 struct btrfs_root
*target_root
)
4767 struct btrfs_key key
;
4768 struct inode
*inode
= NULL
;
4769 struct btrfs_file_extent_item
*fi
;
4771 u64 skip_objectid
= 0;
4775 nritems
= btrfs_header_nritems(leaf
);
4776 for (i
= 0; i
< nritems
; i
++) {
4777 btrfs_item_key_to_cpu(leaf
, &key
, i
);
4778 if (key
.objectid
== skip_objectid
||
4779 key
.type
!= BTRFS_EXTENT_DATA_KEY
)
4781 fi
= btrfs_item_ptr(leaf
, i
, struct btrfs_file_extent_item
);
4782 if (btrfs_file_extent_type(leaf
, fi
) ==
4783 BTRFS_FILE_EXTENT_INLINE
)
4785 if (btrfs_file_extent_disk_bytenr(leaf
, fi
) == 0)
4787 if (!inode
|| inode
->i_ino
!= key
.objectid
) {
4789 inode
= btrfs_ilookup(target_root
->fs_info
->sb
,
4790 key
.objectid
, target_root
, 1);
4793 skip_objectid
= key
.objectid
;
4796 num_bytes
= btrfs_file_extent_num_bytes(leaf
, fi
);
4798 lock_extent(&BTRFS_I(inode
)->io_tree
, key
.offset
,
4799 key
.offset
+ num_bytes
- 1, GFP_NOFS
);
4800 btrfs_drop_extent_cache(inode
, key
.offset
,
4801 key
.offset
+ num_bytes
- 1, 1);
4802 unlock_extent(&BTRFS_I(inode
)->io_tree
, key
.offset
,
4803 key
.offset
+ num_bytes
- 1, GFP_NOFS
);
4810 static int noinline
replace_extents_in_leaf(struct btrfs_trans_handle
*trans
,
4811 struct btrfs_root
*root
,
4812 struct extent_buffer
*leaf
,
4813 struct btrfs_block_group_cache
*group
,
4814 struct inode
*reloc_inode
)
4816 struct btrfs_key key
;
4817 struct btrfs_key extent_key
;
4818 struct btrfs_file_extent_item
*fi
;
4819 struct btrfs_leaf_ref
*ref
;
4820 struct disk_extent
*new_extent
;
4829 new_extent
= kmalloc(sizeof(*new_extent
), GFP_NOFS
);
4830 BUG_ON(!new_extent
);
4832 ref
= btrfs_lookup_leaf_ref(root
, leaf
->start
);
4836 nritems
= btrfs_header_nritems(leaf
);
4837 for (i
= 0; i
< nritems
; i
++) {
4838 btrfs_item_key_to_cpu(leaf
, &key
, i
);
4839 if (btrfs_key_type(&key
) != BTRFS_EXTENT_DATA_KEY
)
4841 fi
= btrfs_item_ptr(leaf
, i
, struct btrfs_file_extent_item
);
4842 if (btrfs_file_extent_type(leaf
, fi
) ==
4843 BTRFS_FILE_EXTENT_INLINE
)
4845 bytenr
= btrfs_file_extent_disk_bytenr(leaf
, fi
);
4846 num_bytes
= btrfs_file_extent_disk_num_bytes(leaf
, fi
);
4851 if (bytenr
>= group
->key
.objectid
+ group
->key
.offset
||
4852 bytenr
+ num_bytes
<= group
->key
.objectid
)
4855 extent_key
.objectid
= bytenr
;
4856 extent_key
.offset
= num_bytes
;
4857 extent_key
.type
= BTRFS_EXTENT_ITEM_KEY
;
4859 ret
= get_new_locations(reloc_inode
, &extent_key
,
4860 group
->key
.objectid
, 1,
4861 &new_extent
, &nr_extent
);
4866 BUG_ON(ref
->extents
[ext_index
].bytenr
!= bytenr
);
4867 BUG_ON(ref
->extents
[ext_index
].num_bytes
!= num_bytes
);
4868 ref
->extents
[ext_index
].bytenr
= new_extent
->disk_bytenr
;
4869 ref
->extents
[ext_index
].num_bytes
= new_extent
->disk_num_bytes
;
4871 btrfs_set_file_extent_disk_bytenr(leaf
, fi
,
4872 new_extent
->disk_bytenr
);
4873 btrfs_set_file_extent_disk_num_bytes(leaf
, fi
,
4874 new_extent
->disk_num_bytes
);
4875 btrfs_mark_buffer_dirty(leaf
);
4877 ret
= btrfs_inc_extent_ref(trans
, root
,
4878 new_extent
->disk_bytenr
,
4879 new_extent
->disk_num_bytes
,
4881 root
->root_key
.objectid
,
4882 trans
->transid
, key
.objectid
);
4884 ret
= btrfs_free_extent(trans
, root
,
4885 bytenr
, num_bytes
, leaf
->start
,
4886 btrfs_header_owner(leaf
),
4887 btrfs_header_generation(leaf
),
4893 BUG_ON(ext_index
+ 1 != ref
->nritems
);
4894 btrfs_free_leaf_ref(root
, ref
);
4898 int btrfs_free_reloc_root(struct btrfs_trans_handle
*trans
,
4899 struct btrfs_root
*root
)
4901 struct btrfs_root
*reloc_root
;
4904 if (root
->reloc_root
) {
4905 reloc_root
= root
->reloc_root
;
4906 root
->reloc_root
= NULL
;
4907 list_add(&reloc_root
->dead_list
,
4908 &root
->fs_info
->dead_reloc_roots
);
4910 btrfs_set_root_bytenr(&reloc_root
->root_item
,
4911 reloc_root
->node
->start
);
4912 btrfs_set_root_level(&root
->root_item
,
4913 btrfs_header_level(reloc_root
->node
));
4914 memset(&reloc_root
->root_item
.drop_progress
, 0,
4915 sizeof(struct btrfs_disk_key
));
4916 reloc_root
->root_item
.drop_level
= 0;
4918 ret
= btrfs_update_root(trans
, root
->fs_info
->tree_root
,
4919 &reloc_root
->root_key
,
4920 &reloc_root
->root_item
);
4926 int btrfs_drop_dead_reloc_roots(struct btrfs_root
*root
)
4928 struct btrfs_trans_handle
*trans
;
4929 struct btrfs_root
*reloc_root
;
4930 struct btrfs_root
*prev_root
= NULL
;
4931 struct list_head dead_roots
;
4935 INIT_LIST_HEAD(&dead_roots
);
4936 list_splice_init(&root
->fs_info
->dead_reloc_roots
, &dead_roots
);
4938 while (!list_empty(&dead_roots
)) {
4939 reloc_root
= list_entry(dead_roots
.prev
,
4940 struct btrfs_root
, dead_list
);
4941 list_del_init(&reloc_root
->dead_list
);
4943 BUG_ON(reloc_root
->commit_root
!= NULL
);
4945 trans
= btrfs_join_transaction(root
, 1);
4948 mutex_lock(&root
->fs_info
->drop_mutex
);
4949 ret
= btrfs_drop_snapshot(trans
, reloc_root
);
4952 mutex_unlock(&root
->fs_info
->drop_mutex
);
4954 nr
= trans
->blocks_used
;
4955 ret
= btrfs_end_transaction(trans
, root
);
4957 btrfs_btree_balance_dirty(root
, nr
);
4960 free_extent_buffer(reloc_root
->node
);
4962 ret
= btrfs_del_root(trans
, root
->fs_info
->tree_root
,
4963 &reloc_root
->root_key
);
4965 mutex_unlock(&root
->fs_info
->drop_mutex
);
4967 nr
= trans
->blocks_used
;
4968 ret
= btrfs_end_transaction(trans
, root
);
4970 btrfs_btree_balance_dirty(root
, nr
);
4973 prev_root
= reloc_root
;
4976 btrfs_remove_leaf_refs(prev_root
, (u64
)-1, 0);
4982 int btrfs_add_dead_reloc_root(struct btrfs_root
*root
)
4984 list_add(&root
->dead_list
, &root
->fs_info
->dead_reloc_roots
);
4988 int btrfs_cleanup_reloc_trees(struct btrfs_root
*root
)
4990 struct btrfs_root
*reloc_root
;
4991 struct btrfs_trans_handle
*trans
;
4992 struct btrfs_key location
;
4996 mutex_lock(&root
->fs_info
->tree_reloc_mutex
);
4997 ret
= btrfs_find_dead_roots(root
, BTRFS_TREE_RELOC_OBJECTID
, NULL
);
4999 found
= !list_empty(&root
->fs_info
->dead_reloc_roots
);
5000 mutex_unlock(&root
->fs_info
->tree_reloc_mutex
);
5003 trans
= btrfs_start_transaction(root
, 1);
5005 ret
= btrfs_commit_transaction(trans
, root
);
5009 location
.objectid
= BTRFS_DATA_RELOC_TREE_OBJECTID
;
5010 location
.offset
= (u64
)-1;
5011 location
.type
= BTRFS_ROOT_ITEM_KEY
;
5013 reloc_root
= btrfs_read_fs_root_no_name(root
->fs_info
, &location
);
5014 BUG_ON(!reloc_root
);
5015 btrfs_orphan_cleanup(reloc_root
);
5019 static int noinline
init_reloc_tree(struct btrfs_trans_handle
*trans
,
5020 struct btrfs_root
*root
)
5022 struct btrfs_root
*reloc_root
;
5023 struct extent_buffer
*eb
;
5024 struct btrfs_root_item
*root_item
;
5025 struct btrfs_key root_key
;
5028 BUG_ON(!root
->ref_cows
);
5029 if (root
->reloc_root
)
5032 root_item
= kmalloc(sizeof(*root_item
), GFP_NOFS
);
5035 ret
= btrfs_copy_root(trans
, root
, root
->commit_root
,
5036 &eb
, BTRFS_TREE_RELOC_OBJECTID
);
5039 root_key
.objectid
= BTRFS_TREE_RELOC_OBJECTID
;
5040 root_key
.offset
= root
->root_key
.objectid
;
5041 root_key
.type
= BTRFS_ROOT_ITEM_KEY
;
5043 memcpy(root_item
, &root
->root_item
, sizeof(root_item
));
5044 btrfs_set_root_refs(root_item
, 0);
5045 btrfs_set_root_bytenr(root_item
, eb
->start
);
5046 btrfs_set_root_level(root_item
, btrfs_header_level(eb
));
5047 btrfs_set_root_generation(root_item
, trans
->transid
);
5049 btrfs_tree_unlock(eb
);
5050 free_extent_buffer(eb
);
5052 ret
= btrfs_insert_root(trans
, root
->fs_info
->tree_root
,
5053 &root_key
, root_item
);
5057 reloc_root
= btrfs_read_fs_root_no_radix(root
->fs_info
->tree_root
,
5059 BUG_ON(!reloc_root
);
5060 reloc_root
->last_trans
= trans
->transid
;
5061 reloc_root
->commit_root
= NULL
;
5062 reloc_root
->ref_tree
= &root
->fs_info
->reloc_ref_tree
;
5064 root
->reloc_root
= reloc_root
;
5069 * Core function of space balance.
5071 * The idea is using reloc trees to relocate tree blocks in reference
5072 * counted roots. There is one reloc tree for each subvol, and all
5073 * reloc trees share same root key objectid. Reloc trees are snapshots
5074 * of the latest committed roots of subvols (root->commit_root).
5076 * To relocate a tree block referenced by a subvol, there are two steps.
5077 * COW the block through subvol's reloc tree, then update block pointer
5078 * in the subvol to point to the new block. Since all reloc trees share
5079 * same root key objectid, doing special handing for tree blocks owned
5080 * by them is easy. Once a tree block has been COWed in one reloc tree,
5081 * we can use the resulting new block directly when the same block is
5082 * required to COW again through other reloc trees. By this way, relocated
5083 * tree blocks are shared between reloc trees, so they are also shared
5086 static int noinline
relocate_one_path(struct btrfs_trans_handle
*trans
,
5087 struct btrfs_root
*root
,
5088 struct btrfs_path
*path
,
5089 struct btrfs_key
*first_key
,
5090 struct btrfs_ref_path
*ref_path
,
5091 struct btrfs_block_group_cache
*group
,
5092 struct inode
*reloc_inode
)
5094 struct btrfs_root
*reloc_root
;
5095 struct extent_buffer
*eb
= NULL
;
5096 struct btrfs_key
*keys
;
5100 int lowest_level
= 0;
5103 if (ref_path
->owner_objectid
< BTRFS_FIRST_FREE_OBJECTID
)
5104 lowest_level
= ref_path
->owner_objectid
;
5106 if (!root
->ref_cows
) {
5107 path
->lowest_level
= lowest_level
;
5108 ret
= btrfs_search_slot(trans
, root
, first_key
, path
, 0, 1);
5110 path
->lowest_level
= 0;
5111 btrfs_release_path(root
, path
);
5115 mutex_lock(&root
->fs_info
->tree_reloc_mutex
);
5116 ret
= init_reloc_tree(trans
, root
);
5118 reloc_root
= root
->reloc_root
;
5120 shared_level
= ref_path
->shared_level
;
5121 ref_path
->shared_level
= BTRFS_MAX_LEVEL
- 1;
5123 keys
= ref_path
->node_keys
;
5124 nodes
= ref_path
->new_nodes
;
5125 memset(&keys
[shared_level
+ 1], 0,
5126 sizeof(*keys
) * (BTRFS_MAX_LEVEL
- shared_level
- 1));
5127 memset(&nodes
[shared_level
+ 1], 0,
5128 sizeof(*nodes
) * (BTRFS_MAX_LEVEL
- shared_level
- 1));
5130 if (nodes
[lowest_level
] == 0) {
5131 path
->lowest_level
= lowest_level
;
5132 ret
= btrfs_search_slot(trans
, reloc_root
, first_key
, path
,
5135 for (level
= lowest_level
; level
< BTRFS_MAX_LEVEL
; level
++) {
5136 eb
= path
->nodes
[level
];
5137 if (!eb
|| eb
== reloc_root
->node
)
5139 nodes
[level
] = eb
->start
;
5141 btrfs_item_key_to_cpu(eb
, &keys
[level
], 0);
5143 btrfs_node_key_to_cpu(eb
, &keys
[level
], 0);
5146 ref_path
->owner_objectid
>= BTRFS_FIRST_FREE_OBJECTID
) {
5147 eb
= path
->nodes
[0];
5148 ret
= replace_extents_in_leaf(trans
, reloc_root
, eb
,
5149 group
, reloc_inode
);
5152 btrfs_release_path(reloc_root
, path
);
5154 ret
= btrfs_merge_path(trans
, reloc_root
, keys
, nodes
,
5160 * replace tree blocks in the fs tree with tree blocks in
5163 ret
= btrfs_merge_path(trans
, root
, keys
, nodes
, lowest_level
);
5166 if (ref_path
->owner_objectid
>= BTRFS_FIRST_FREE_OBJECTID
) {
5167 ret
= btrfs_search_slot(trans
, reloc_root
, first_key
, path
,
5170 extent_buffer_get(path
->nodes
[0]);
5171 eb
= path
->nodes
[0];
5172 btrfs_release_path(reloc_root
, path
);
5173 ret
= invalidate_extent_cache(reloc_root
, eb
, group
, root
);
5175 free_extent_buffer(eb
);
5178 mutex_unlock(&root
->fs_info
->tree_reloc_mutex
);
5179 path
->lowest_level
= 0;
5183 static int noinline
relocate_tree_block(struct btrfs_trans_handle
*trans
,
5184 struct btrfs_root
*root
,
5185 struct btrfs_path
*path
,
5186 struct btrfs_key
*first_key
,
5187 struct btrfs_ref_path
*ref_path
)
5191 ret
= relocate_one_path(trans
, root
, path
, first_key
,
5192 ref_path
, NULL
, NULL
);
5195 if (root
== root
->fs_info
->extent_root
)
5196 btrfs_extent_post_op(trans
, root
);
5201 static int noinline
del_extent_zero(struct btrfs_trans_handle
*trans
,
5202 struct btrfs_root
*extent_root
,
5203 struct btrfs_path
*path
,
5204 struct btrfs_key
*extent_key
)
5208 ret
= btrfs_search_slot(trans
, extent_root
, extent_key
, path
, -1, 1);
5211 ret
= btrfs_del_item(trans
, extent_root
, path
);
5213 btrfs_release_path(extent_root
, path
);
5217 static struct btrfs_root noinline
*read_ref_root(struct btrfs_fs_info
*fs_info
,
5218 struct btrfs_ref_path
*ref_path
)
5220 struct btrfs_key root_key
;
5222 root_key
.objectid
= ref_path
->root_objectid
;
5223 root_key
.type
= BTRFS_ROOT_ITEM_KEY
;
5224 if (is_cowonly_root(ref_path
->root_objectid
))
5225 root_key
.offset
= 0;
5227 root_key
.offset
= (u64
)-1;
5229 return btrfs_read_fs_root_no_name(fs_info
, &root_key
);
5232 static int noinline
relocate_one_extent(struct btrfs_root
*extent_root
,
5233 struct btrfs_path
*path
,
5234 struct btrfs_key
*extent_key
,
5235 struct btrfs_block_group_cache
*group
,
5236 struct inode
*reloc_inode
, int pass
)
5238 struct btrfs_trans_handle
*trans
;
5239 struct btrfs_root
*found_root
;
5240 struct btrfs_ref_path
*ref_path
= NULL
;
5241 struct disk_extent
*new_extents
= NULL
;
5246 struct btrfs_key first_key
;
5250 trans
= btrfs_start_transaction(extent_root
, 1);
5253 if (extent_key
->objectid
== 0) {
5254 ret
= del_extent_zero(trans
, extent_root
, path
, extent_key
);
5258 ref_path
= kmalloc(sizeof(*ref_path
), GFP_NOFS
);
5264 for (loops
= 0; ; loops
++) {
5266 ret
= btrfs_first_ref_path(trans
, extent_root
, ref_path
,
5267 extent_key
->objectid
);
5269 ret
= btrfs_next_ref_path(trans
, extent_root
, ref_path
);
5276 if (ref_path
->root_objectid
== BTRFS_TREE_LOG_OBJECTID
||
5277 ref_path
->root_objectid
== BTRFS_TREE_RELOC_OBJECTID
)
5280 found_root
= read_ref_root(extent_root
->fs_info
, ref_path
);
5281 BUG_ON(!found_root
);
5283 * for reference counted tree, only process reference paths
5284 * rooted at the latest committed root.
5286 if (found_root
->ref_cows
&&
5287 ref_path
->root_generation
!= found_root
->root_key
.offset
)
5290 if (ref_path
->owner_objectid
>= BTRFS_FIRST_FREE_OBJECTID
) {
5293 * copy data extents to new locations
5295 u64 group_start
= group
->key
.objectid
;
5296 ret
= relocate_data_extent(reloc_inode
,
5305 level
= ref_path
->owner_objectid
;
5308 if (prev_block
!= ref_path
->nodes
[level
]) {
5309 struct extent_buffer
*eb
;
5310 u64 block_start
= ref_path
->nodes
[level
];
5311 u64 block_size
= btrfs_level_size(found_root
, level
);
5313 eb
= read_tree_block(found_root
, block_start
,
5315 btrfs_tree_lock(eb
);
5316 BUG_ON(level
!= btrfs_header_level(eb
));
5319 btrfs_item_key_to_cpu(eb
, &first_key
, 0);
5321 btrfs_node_key_to_cpu(eb
, &first_key
, 0);
5323 btrfs_tree_unlock(eb
);
5324 free_extent_buffer(eb
);
5325 prev_block
= block_start
;
5328 if (ref_path
->owner_objectid
>= BTRFS_FIRST_FREE_OBJECTID
&&
5331 * use fallback method to process the remaining
5335 u64 group_start
= group
->key
.objectid
;
5336 new_extents
= kmalloc(sizeof(*new_extents
),
5339 ret
= get_new_locations(reloc_inode
,
5347 btrfs_record_root_in_trans(found_root
);
5348 ret
= replace_one_extent(trans
, found_root
,
5350 &first_key
, ref_path
,
5351 new_extents
, nr_extents
);
5357 btrfs_record_root_in_trans(found_root
);
5358 if (ref_path
->owner_objectid
< BTRFS_FIRST_FREE_OBJECTID
) {
5359 ret
= relocate_tree_block(trans
, found_root
, path
,
5360 &first_key
, ref_path
);
5363 * try to update data extent references while
5364 * keeping metadata shared between snapshots.
5366 ret
= relocate_one_path(trans
, found_root
, path
,
5367 &first_key
, ref_path
,
5368 group
, reloc_inode
);
5375 btrfs_end_transaction(trans
, extent_root
);
5381 static u64
update_block_group_flags(struct btrfs_root
*root
, u64 flags
)
5384 u64 stripped
= BTRFS_BLOCK_GROUP_RAID0
|
5385 BTRFS_BLOCK_GROUP_RAID1
| BTRFS_BLOCK_GROUP_RAID10
;
5387 num_devices
= root
->fs_info
->fs_devices
->rw_devices
;
5388 if (num_devices
== 1) {
5389 stripped
|= BTRFS_BLOCK_GROUP_DUP
;
5390 stripped
= flags
& ~stripped
;
5392 /* turn raid0 into single device chunks */
5393 if (flags
& BTRFS_BLOCK_GROUP_RAID0
)
5396 /* turn mirroring into duplication */
5397 if (flags
& (BTRFS_BLOCK_GROUP_RAID1
|
5398 BTRFS_BLOCK_GROUP_RAID10
))
5399 return stripped
| BTRFS_BLOCK_GROUP_DUP
;
5402 /* they already had raid on here, just return */
5403 if (flags
& stripped
)
5406 stripped
|= BTRFS_BLOCK_GROUP_DUP
;
5407 stripped
= flags
& ~stripped
;
5409 /* switch duplicated blocks with raid1 */
5410 if (flags
& BTRFS_BLOCK_GROUP_DUP
)
5411 return stripped
| BTRFS_BLOCK_GROUP_RAID1
;
5413 /* turn single device chunks into raid0 */
5414 return stripped
| BTRFS_BLOCK_GROUP_RAID0
;
5419 int __alloc_chunk_for_shrink(struct btrfs_root
*root
,
5420 struct btrfs_block_group_cache
*shrink_block_group
,
5423 struct btrfs_trans_handle
*trans
;
5424 u64 new_alloc_flags
;
5427 spin_lock(&shrink_block_group
->lock
);
5428 if (btrfs_block_group_used(&shrink_block_group
->item
) > 0) {
5429 spin_unlock(&shrink_block_group
->lock
);
5431 trans
= btrfs_start_transaction(root
, 1);
5432 spin_lock(&shrink_block_group
->lock
);
5434 new_alloc_flags
= update_block_group_flags(root
,
5435 shrink_block_group
->flags
);
5436 if (new_alloc_flags
!= shrink_block_group
->flags
) {
5438 btrfs_block_group_used(&shrink_block_group
->item
);
5440 calc
= shrink_block_group
->key
.offset
;
5442 spin_unlock(&shrink_block_group
->lock
);
5444 do_chunk_alloc(trans
, root
->fs_info
->extent_root
,
5445 calc
+ 2 * 1024 * 1024, new_alloc_flags
, force
);
5447 btrfs_end_transaction(trans
, root
);
5449 spin_unlock(&shrink_block_group
->lock
);
5453 static int __insert_orphan_inode(struct btrfs_trans_handle
*trans
,
5454 struct btrfs_root
*root
,
5455 u64 objectid
, u64 size
)
5457 struct btrfs_path
*path
;
5458 struct btrfs_inode_item
*item
;
5459 struct extent_buffer
*leaf
;
5462 path
= btrfs_alloc_path();
5466 ret
= btrfs_insert_empty_inode(trans
, root
, path
, objectid
);
5470 leaf
= path
->nodes
[0];
5471 item
= btrfs_item_ptr(leaf
, path
->slots
[0], struct btrfs_inode_item
);
5472 memset_extent_buffer(leaf
, 0, (unsigned long)item
, sizeof(*item
));
5473 btrfs_set_inode_generation(leaf
, item
, 1);
5474 btrfs_set_inode_size(leaf
, item
, size
);
5475 btrfs_set_inode_mode(leaf
, item
, S_IFREG
| 0600);
5476 btrfs_set_inode_flags(leaf
, item
, BTRFS_INODE_NODATASUM
|
5477 BTRFS_INODE_NOCOMPRESS
);
5478 btrfs_mark_buffer_dirty(leaf
);
5479 btrfs_release_path(root
, path
);
5481 btrfs_free_path(path
);
5485 static struct inode noinline
*create_reloc_inode(struct btrfs_fs_info
*fs_info
,
5486 struct btrfs_block_group_cache
*group
)
5488 struct inode
*inode
= NULL
;
5489 struct btrfs_trans_handle
*trans
;
5490 struct btrfs_root
*root
;
5491 struct btrfs_key root_key
;
5492 u64 objectid
= BTRFS_FIRST_FREE_OBJECTID
;
5495 root_key
.objectid
= BTRFS_DATA_RELOC_TREE_OBJECTID
;
5496 root_key
.type
= BTRFS_ROOT_ITEM_KEY
;
5497 root_key
.offset
= (u64
)-1;
5498 root
= btrfs_read_fs_root_no_name(fs_info
, &root_key
);
5500 return ERR_CAST(root
);
5502 trans
= btrfs_start_transaction(root
, 1);
5505 err
= btrfs_find_free_objectid(trans
, root
, objectid
, &objectid
);
5509 err
= __insert_orphan_inode(trans
, root
, objectid
, group
->key
.offset
);
5512 err
= btrfs_insert_file_extent(trans
, root
, objectid
, 0, 0, 0,
5513 group
->key
.offset
, 0, group
->key
.offset
,
5517 inode
= btrfs_iget_locked(root
->fs_info
->sb
, objectid
, root
);
5518 if (inode
->i_state
& I_NEW
) {
5519 BTRFS_I(inode
)->root
= root
;
5520 BTRFS_I(inode
)->location
.objectid
= objectid
;
5521 BTRFS_I(inode
)->location
.type
= BTRFS_INODE_ITEM_KEY
;
5522 BTRFS_I(inode
)->location
.offset
= 0;
5523 btrfs_read_locked_inode(inode
);
5524 unlock_new_inode(inode
);
5525 BUG_ON(is_bad_inode(inode
));
5530 err
= btrfs_orphan_add(trans
, inode
);
5532 btrfs_end_transaction(trans
, root
);
5536 inode
= ERR_PTR(err
);
5541 int btrfs_relocate_block_group(struct btrfs_root
*root
, u64 group_start
)
5543 struct btrfs_trans_handle
*trans
;
5544 struct btrfs_path
*path
;
5545 struct btrfs_fs_info
*info
= root
->fs_info
;
5546 struct extent_buffer
*leaf
;
5547 struct inode
*reloc_inode
;
5548 struct btrfs_block_group_cache
*block_group
;
5549 struct btrfs_key key
;
5558 root
= root
->fs_info
->extent_root
;
5560 block_group
= btrfs_lookup_block_group(info
, group_start
);
5561 BUG_ON(!block_group
);
5563 printk("btrfs relocating block group %llu flags %llu\n",
5564 (unsigned long long)block_group
->key
.objectid
,
5565 (unsigned long long)block_group
->flags
);
5567 path
= btrfs_alloc_path();
5570 reloc_inode
= create_reloc_inode(info
, block_group
);
5571 BUG_ON(IS_ERR(reloc_inode
));
5573 __alloc_chunk_for_shrink(root
, block_group
, 1);
5574 set_block_group_readonly(block_group
);
5576 btrfs_start_delalloc_inodes(info
->tree_root
);
5577 btrfs_wait_ordered_extents(info
->tree_root
, 0);
5582 key
.objectid
= block_group
->key
.objectid
;
5585 cur_byte
= key
.objectid
;
5587 trans
= btrfs_start_transaction(info
->tree_root
, 1);
5588 btrfs_commit_transaction(trans
, info
->tree_root
);
5590 mutex_lock(&root
->fs_info
->cleaner_mutex
);
5591 btrfs_clean_old_snapshots(info
->tree_root
);
5592 btrfs_remove_leaf_refs(info
->tree_root
, (u64
)-1, 1);
5593 mutex_unlock(&root
->fs_info
->cleaner_mutex
);
5596 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
5600 leaf
= path
->nodes
[0];
5601 nritems
= btrfs_header_nritems(leaf
);
5602 if (path
->slots
[0] >= nritems
) {
5603 ret
= btrfs_next_leaf(root
, path
);
5610 leaf
= path
->nodes
[0];
5611 nritems
= btrfs_header_nritems(leaf
);
5614 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
5616 if (key
.objectid
>= block_group
->key
.objectid
+
5617 block_group
->key
.offset
)
5620 if (progress
&& need_resched()) {
5621 btrfs_release_path(root
, path
);
5628 if (btrfs_key_type(&key
) != BTRFS_EXTENT_ITEM_KEY
||
5629 key
.objectid
+ key
.offset
<= cur_byte
) {
5635 cur_byte
= key
.objectid
+ key
.offset
;
5636 btrfs_release_path(root
, path
);
5638 __alloc_chunk_for_shrink(root
, block_group
, 0);
5639 ret
= relocate_one_extent(root
, path
, &key
, block_group
,
5645 key
.objectid
= cur_byte
;
5650 btrfs_release_path(root
, path
);
5653 btrfs_wait_ordered_range(reloc_inode
, 0, (u64
)-1);
5654 invalidate_mapping_pages(reloc_inode
->i_mapping
, 0, -1);
5655 WARN_ON(reloc_inode
->i_mapping
->nrpages
);
5658 if (total_found
> 0) {
5659 printk("btrfs found %llu extents in pass %d\n",
5660 (unsigned long long)total_found
, pass
);
5662 if (total_found
== skipped
&& pass
> 2) {
5664 reloc_inode
= create_reloc_inode(info
, block_group
);
5670 /* delete reloc_inode */
5673 /* unpin extents in this range */
5674 trans
= btrfs_start_transaction(info
->tree_root
, 1);
5675 btrfs_commit_transaction(trans
, info
->tree_root
);
5677 spin_lock(&block_group
->lock
);
5678 WARN_ON(block_group
->pinned
> 0);
5679 WARN_ON(block_group
->reserved
> 0);
5680 WARN_ON(btrfs_block_group_used(&block_group
->item
) > 0);
5681 spin_unlock(&block_group
->lock
);
5684 btrfs_free_path(path
);
5688 int find_first_block_group(struct btrfs_root
*root
, struct btrfs_path
*path
,
5689 struct btrfs_key
*key
)
5692 struct btrfs_key found_key
;
5693 struct extent_buffer
*leaf
;
5696 ret
= btrfs_search_slot(NULL
, root
, key
, path
, 0, 0);
5701 slot
= path
->slots
[0];
5702 leaf
= path
->nodes
[0];
5703 if (slot
>= btrfs_header_nritems(leaf
)) {
5704 ret
= btrfs_next_leaf(root
, path
);
5711 btrfs_item_key_to_cpu(leaf
, &found_key
, slot
);
5713 if (found_key
.objectid
>= key
->objectid
&&
5714 found_key
.type
== BTRFS_BLOCK_GROUP_ITEM_KEY
) {
5725 int btrfs_free_block_groups(struct btrfs_fs_info
*info
)
5727 struct btrfs_block_group_cache
*block_group
;
5730 spin_lock(&info
->block_group_cache_lock
);
5731 while ((n
= rb_last(&info
->block_group_cache_tree
)) != NULL
) {
5732 block_group
= rb_entry(n
, struct btrfs_block_group_cache
,
5734 rb_erase(&block_group
->cache_node
,
5735 &info
->block_group_cache_tree
);
5736 spin_unlock(&info
->block_group_cache_lock
);
5738 btrfs_remove_free_space_cache(block_group
);
5739 down_write(&block_group
->space_info
->groups_sem
);
5740 list_del(&block_group
->list
);
5741 up_write(&block_group
->space_info
->groups_sem
);
5744 spin_lock(&info
->block_group_cache_lock
);
5746 spin_unlock(&info
->block_group_cache_lock
);
5750 int btrfs_read_block_groups(struct btrfs_root
*root
)
5752 struct btrfs_path
*path
;
5754 struct btrfs_block_group_cache
*cache
;
5755 struct btrfs_fs_info
*info
= root
->fs_info
;
5756 struct btrfs_space_info
*space_info
;
5757 struct btrfs_key key
;
5758 struct btrfs_key found_key
;
5759 struct extent_buffer
*leaf
;
5761 root
= info
->extent_root
;
5764 btrfs_set_key_type(&key
, BTRFS_BLOCK_GROUP_ITEM_KEY
);
5765 path
= btrfs_alloc_path();
5770 ret
= find_first_block_group(root
, path
, &key
);
5778 leaf
= path
->nodes
[0];
5779 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
5780 cache
= kzalloc(sizeof(*cache
), GFP_NOFS
);
5786 spin_lock_init(&cache
->lock
);
5787 mutex_init(&cache
->alloc_mutex
);
5788 INIT_LIST_HEAD(&cache
->list
);
5789 read_extent_buffer(leaf
, &cache
->item
,
5790 btrfs_item_ptr_offset(leaf
, path
->slots
[0]),
5791 sizeof(cache
->item
));
5792 memcpy(&cache
->key
, &found_key
, sizeof(found_key
));
5794 key
.objectid
= found_key
.objectid
+ found_key
.offset
;
5795 btrfs_release_path(root
, path
);
5796 cache
->flags
= btrfs_block_group_flags(&cache
->item
);
5798 ret
= update_space_info(info
, cache
->flags
, found_key
.offset
,
5799 btrfs_block_group_used(&cache
->item
),
5802 cache
->space_info
= space_info
;
5803 down_write(&space_info
->groups_sem
);
5804 list_add_tail(&cache
->list
, &space_info
->block_groups
);
5805 up_write(&space_info
->groups_sem
);
5807 ret
= btrfs_add_block_group_cache(root
->fs_info
, cache
);
5810 set_avail_alloc_bits(root
->fs_info
, cache
->flags
);
5811 if (btrfs_chunk_readonly(root
, cache
->key
.objectid
))
5812 set_block_group_readonly(cache
);
5816 btrfs_free_path(path
);
5820 int btrfs_make_block_group(struct btrfs_trans_handle
*trans
,
5821 struct btrfs_root
*root
, u64 bytes_used
,
5822 u64 type
, u64 chunk_objectid
, u64 chunk_offset
,
5826 struct btrfs_root
*extent_root
;
5827 struct btrfs_block_group_cache
*cache
;
5829 extent_root
= root
->fs_info
->extent_root
;
5831 root
->fs_info
->last_trans_new_blockgroup
= trans
->transid
;
5833 cache
= kzalloc(sizeof(*cache
), GFP_NOFS
);
5837 cache
->key
.objectid
= chunk_offset
;
5838 cache
->key
.offset
= size
;
5839 spin_lock_init(&cache
->lock
);
5840 mutex_init(&cache
->alloc_mutex
);
5841 INIT_LIST_HEAD(&cache
->list
);
5842 btrfs_set_key_type(&cache
->key
, BTRFS_BLOCK_GROUP_ITEM_KEY
);
5844 btrfs_set_block_group_used(&cache
->item
, bytes_used
);
5845 btrfs_set_block_group_chunk_objectid(&cache
->item
, chunk_objectid
);
5846 cache
->flags
= type
;
5847 btrfs_set_block_group_flags(&cache
->item
, type
);
5849 ret
= update_space_info(root
->fs_info
, cache
->flags
, size
, bytes_used
,
5850 &cache
->space_info
);
5852 down_write(&cache
->space_info
->groups_sem
);
5853 list_add_tail(&cache
->list
, &cache
->space_info
->block_groups
);
5854 up_write(&cache
->space_info
->groups_sem
);
5856 ret
= btrfs_add_block_group_cache(root
->fs_info
, cache
);
5859 ret
= btrfs_insert_item(trans
, extent_root
, &cache
->key
, &cache
->item
,
5860 sizeof(cache
->item
));
5863 finish_current_insert(trans
, extent_root
, 0);
5864 ret
= del_pending_extents(trans
, extent_root
, 0);
5866 set_avail_alloc_bits(extent_root
->fs_info
, type
);
5871 int btrfs_remove_block_group(struct btrfs_trans_handle
*trans
,
5872 struct btrfs_root
*root
, u64 group_start
)
5874 struct btrfs_path
*path
;
5875 struct btrfs_block_group_cache
*block_group
;
5876 struct btrfs_key key
;
5879 root
= root
->fs_info
->extent_root
;
5881 block_group
= btrfs_lookup_block_group(root
->fs_info
, group_start
);
5882 BUG_ON(!block_group
);
5883 BUG_ON(!block_group
->ro
);
5885 memcpy(&key
, &block_group
->key
, sizeof(key
));
5887 path
= btrfs_alloc_path();
5890 btrfs_remove_free_space_cache(block_group
);
5891 rb_erase(&block_group
->cache_node
,
5892 &root
->fs_info
->block_group_cache_tree
);
5893 down_write(&block_group
->space_info
->groups_sem
);
5894 list_del(&block_group
->list
);
5895 up_write(&block_group
->space_info
->groups_sem
);
5897 spin_lock(&block_group
->space_info
->lock
);
5898 block_group
->space_info
->total_bytes
-= block_group
->key
.offset
;
5899 block_group
->space_info
->bytes_readonly
-= block_group
->key
.offset
;
5900 spin_unlock(&block_group
->space_info
->lock
);
5901 block_group
->space_info
->full
= 0;
5904 memset(shrink_block_group, 0, sizeof(*shrink_block_group));
5905 kfree(shrink_block_group);
5908 ret
= btrfs_search_slot(trans
, root
, &key
, path
, -1, 1);
5914 ret
= btrfs_del_item(trans
, root
, path
);
5916 btrfs_free_path(path
);