2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
18 #include <linux/sched.h>
19 #include <linux/bio.h>
21 #include "extent_map.h"
23 #include "transaction.h"
24 #include "print-tree.h"
28 struct btrfs_device
*dev
;
33 * this uses a pretty simple search, the expectation is that it is
34 * called very infrequently and that a given device has a small number
37 static int find_free_dev_extent(struct btrfs_trans_handle
*trans
,
38 struct btrfs_device
*device
,
39 struct btrfs_path
*path
,
40 u64 num_bytes
, u64
*start
)
43 struct btrfs_root
*root
= device
->dev_root
;
44 struct btrfs_dev_extent
*dev_extent
= NULL
;
48 u64 search_end
= device
->total_bytes
;
52 struct extent_buffer
*l
;
57 /* FIXME use last free of some kind */
59 key
.objectid
= device
->devid
;
60 key
.offset
= search_start
;
61 key
.type
= BTRFS_DEV_EXTENT_KEY
;
62 ret
= btrfs_search_slot(trans
, root
, &key
, path
, 0, 0);
65 ret
= btrfs_previous_item(root
, path
, 0, key
.type
);
69 btrfs_item_key_to_cpu(l
, &key
, path
->slots
[0]);
72 slot
= path
->slots
[0];
73 if (slot
>= btrfs_header_nritems(l
)) {
74 ret
= btrfs_next_leaf(root
, path
);
81 if (search_start
>= search_end
) {
85 *start
= search_start
;
89 *start
= last_byte
> search_start
?
90 last_byte
: search_start
;
91 if (search_end
<= *start
) {
97 btrfs_item_key_to_cpu(l
, &key
, slot
);
99 if (key
.objectid
< device
->devid
)
102 if (key
.objectid
> device
->devid
)
105 if (key
.offset
>= search_start
&& key
.offset
> last_byte
&&
107 if (last_byte
< search_start
)
108 last_byte
= search_start
;
109 hole_size
= key
.offset
- last_byte
;
110 if (key
.offset
> last_byte
&&
111 hole_size
>= num_bytes
) {
116 if (btrfs_key_type(&key
) != BTRFS_DEV_EXTENT_KEY
) {
121 dev_extent
= btrfs_item_ptr(l
, slot
, struct btrfs_dev_extent
);
122 last_byte
= key
.offset
+ btrfs_dev_extent_length(l
, dev_extent
);
128 /* we have to make sure we didn't find an extent that has already
129 * been allocated by the map tree or the original allocation
131 btrfs_release_path(root
, path
);
132 BUG_ON(*start
< search_start
);
134 if (*start
+ num_bytes
> search_end
) {
138 /* check for pending inserts here */
142 btrfs_release_path(root
, path
);
146 int btrfs_alloc_dev_extent(struct btrfs_trans_handle
*trans
,
147 struct btrfs_device
*device
,
148 u64 owner
, u64 num_bytes
, u64
*start
)
151 struct btrfs_path
*path
;
152 struct btrfs_root
*root
= device
->dev_root
;
153 struct btrfs_dev_extent
*extent
;
154 struct extent_buffer
*leaf
;
155 struct btrfs_key key
;
157 path
= btrfs_alloc_path();
161 ret
= find_free_dev_extent(trans
, device
, path
, num_bytes
, start
);
166 key
.objectid
= device
->devid
;
168 key
.type
= BTRFS_DEV_EXTENT_KEY
;
169 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
,
173 leaf
= path
->nodes
[0];
174 extent
= btrfs_item_ptr(leaf
, path
->slots
[0],
175 struct btrfs_dev_extent
);
176 btrfs_set_dev_extent_owner(leaf
, extent
, owner
);
177 btrfs_set_dev_extent_length(leaf
, extent
, num_bytes
);
178 btrfs_mark_buffer_dirty(leaf
);
180 btrfs_free_path(path
);
184 static int find_next_chunk(struct btrfs_root
*root
, u64
*objectid
)
186 struct btrfs_path
*path
;
188 struct btrfs_key key
;
189 struct btrfs_key found_key
;
191 path
= btrfs_alloc_path();
194 key
.objectid
= (u64
)-1;
195 key
.offset
= (u64
)-1;
196 key
.type
= BTRFS_CHUNK_ITEM_KEY
;
198 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
204 ret
= btrfs_previous_item(root
, path
, 0, BTRFS_CHUNK_ITEM_KEY
);
208 btrfs_item_key_to_cpu(path
->nodes
[0], &found_key
,
210 *objectid
= found_key
.objectid
+ found_key
.offset
;
214 btrfs_free_path(path
);
218 static int find_next_devid(struct btrfs_root
*root
, struct btrfs_path
*path
,
222 struct btrfs_key key
;
223 struct btrfs_key found_key
;
225 key
.objectid
= BTRFS_DEV_ITEMS_OBJECTID
;
226 key
.type
= BTRFS_DEV_ITEM_KEY
;
227 key
.offset
= (u64
)-1;
229 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
235 ret
= btrfs_previous_item(root
, path
, BTRFS_DEV_ITEMS_OBJECTID
,
240 btrfs_item_key_to_cpu(path
->nodes
[0], &found_key
,
242 *objectid
= found_key
.offset
+ 1;
246 btrfs_release_path(root
, path
);
251 * the device information is stored in the chunk root
252 * the btrfs_device struct should be fully filled in
254 int btrfs_add_device(struct btrfs_trans_handle
*trans
,
255 struct btrfs_root
*root
,
256 struct btrfs_device
*device
)
259 struct btrfs_path
*path
;
260 struct btrfs_dev_item
*dev_item
;
261 struct extent_buffer
*leaf
;
262 struct btrfs_key key
;
266 root
= root
->fs_info
->chunk_root
;
268 path
= btrfs_alloc_path();
272 ret
= find_next_devid(root
, path
, &free_devid
);
276 key
.objectid
= BTRFS_DEV_ITEMS_OBJECTID
;
277 key
.type
= BTRFS_DEV_ITEM_KEY
;
278 key
.offset
= free_devid
;
280 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
,
285 leaf
= path
->nodes
[0];
286 dev_item
= btrfs_item_ptr(leaf
, path
->slots
[0], struct btrfs_dev_item
);
288 btrfs_set_device_id(leaf
, dev_item
, device
->devid
);
289 btrfs_set_device_type(leaf
, dev_item
, device
->type
);
290 btrfs_set_device_io_align(leaf
, dev_item
, device
->io_align
);
291 btrfs_set_device_io_width(leaf
, dev_item
, device
->io_width
);
292 btrfs_set_device_sector_size(leaf
, dev_item
, device
->sector_size
);
293 btrfs_set_device_total_bytes(leaf
, dev_item
, device
->total_bytes
);
294 btrfs_set_device_bytes_used(leaf
, dev_item
, device
->bytes_used
);
296 ptr
= (unsigned long)btrfs_device_uuid(dev_item
);
297 write_extent_buffer(leaf
, device
->uuid
, ptr
, BTRFS_DEV_UUID_SIZE
);
298 btrfs_mark_buffer_dirty(leaf
);
302 btrfs_free_path(path
);
305 int btrfs_update_device(struct btrfs_trans_handle
*trans
,
306 struct btrfs_device
*device
)
309 struct btrfs_path
*path
;
310 struct btrfs_root
*root
;
311 struct btrfs_dev_item
*dev_item
;
312 struct extent_buffer
*leaf
;
313 struct btrfs_key key
;
315 root
= device
->dev_root
->fs_info
->chunk_root
;
317 path
= btrfs_alloc_path();
321 key
.objectid
= BTRFS_DEV_ITEMS_OBJECTID
;
322 key
.type
= BTRFS_DEV_ITEM_KEY
;
323 key
.offset
= device
->devid
;
325 ret
= btrfs_search_slot(trans
, root
, &key
, path
, 0, 1);
334 leaf
= path
->nodes
[0];
335 dev_item
= btrfs_item_ptr(leaf
, path
->slots
[0], struct btrfs_dev_item
);
337 btrfs_set_device_id(leaf
, dev_item
, device
->devid
);
338 btrfs_set_device_type(leaf
, dev_item
, device
->type
);
339 btrfs_set_device_io_align(leaf
, dev_item
, device
->io_align
);
340 btrfs_set_device_io_width(leaf
, dev_item
, device
->io_width
);
341 btrfs_set_device_sector_size(leaf
, dev_item
, device
->sector_size
);
342 btrfs_set_device_total_bytes(leaf
, dev_item
, device
->total_bytes
);
343 btrfs_set_device_bytes_used(leaf
, dev_item
, device
->bytes_used
);
344 btrfs_mark_buffer_dirty(leaf
);
347 btrfs_free_path(path
);
351 int btrfs_add_system_chunk(struct btrfs_trans_handle
*trans
,
352 struct btrfs_root
*root
,
353 struct btrfs_key
*key
,
354 struct btrfs_chunk
*chunk
, int item_size
)
356 struct btrfs_super_block
*super_copy
= &root
->fs_info
->super_copy
;
357 struct btrfs_disk_key disk_key
;
361 array_size
= btrfs_super_sys_array_size(super_copy
);
362 if (array_size
+ item_size
> BTRFS_SYSTEM_CHUNK_ARRAY_SIZE
)
365 ptr
= super_copy
->sys_chunk_array
+ array_size
;
366 btrfs_cpu_key_to_disk(&disk_key
, key
);
367 memcpy(ptr
, &disk_key
, sizeof(disk_key
));
368 ptr
+= sizeof(disk_key
);
369 memcpy(ptr
, chunk
, item_size
);
370 item_size
+= sizeof(disk_key
);
371 btrfs_set_super_sys_array_size(super_copy
, array_size
+ item_size
);
375 int btrfs_alloc_chunk(struct btrfs_trans_handle
*trans
,
376 struct btrfs_root
*extent_root
, u64
*start
,
377 u64
*num_bytes
, u64 type
)
380 struct btrfs_root
*chunk_root
= extent_root
->fs_info
->chunk_root
;
381 struct btrfs_stripe
*stripes
;
382 struct btrfs_device
*device
= NULL
;
383 struct btrfs_chunk
*chunk
;
384 struct list_head private_devs
;
385 struct list_head
*dev_list
= &extent_root
->fs_info
->devices
;
386 struct list_head
*cur
;
387 struct extent_map_tree
*em_tree
;
388 struct map_lookup
*map
;
389 struct extent_map
*em
;
391 u64 calc_size
= 1024 * 1024 * 1024;
398 struct btrfs_key key
;
400 if (list_empty(dev_list
))
403 INIT_LIST_HEAD(&private_devs
);
404 cur
= dev_list
->next
;
406 /* build a private list of devices we will allocate from */
407 while(index
< num_stripes
) {
408 device
= list_entry(cur
, struct btrfs_device
, dev_list
);
409 avail
= device
->total_bytes
- device
->bytes_used
;
411 if (avail
> max_avail
)
413 if (avail
>= calc_size
) {
414 list_move_tail(&device
->dev_list
, &private_devs
);
420 if (index
< num_stripes
) {
421 list_splice(&private_devs
, dev_list
);
422 if (!looped
&& max_avail
> 0) {
424 calc_size
= max_avail
;
430 ret
= find_next_chunk(chunk_root
, &key
.objectid
);
434 chunk
= kmalloc(btrfs_chunk_item_size(num_stripes
), GFP_NOFS
);
438 stripes
= &chunk
->stripe
;
440 *num_bytes
= calc_size
;
442 while(index
< num_stripes
) {
443 BUG_ON(list_empty(&private_devs
));
444 cur
= private_devs
.next
;
445 device
= list_entry(cur
, struct btrfs_device
, dev_list
);
446 list_move_tail(&device
->dev_list
, dev_list
);
448 ret
= btrfs_alloc_dev_extent(trans
, device
,
450 calc_size
, &dev_offset
);
453 device
->bytes_used
+= calc_size
;
454 ret
= btrfs_update_device(trans
, device
);
457 btrfs_set_stack_stripe_devid(stripes
+ index
, device
->devid
);
458 btrfs_set_stack_stripe_offset(stripes
+ index
, dev_offset
);
459 physical
= dev_offset
;
462 BUG_ON(!list_empty(&private_devs
));
464 /* key.objectid was set above */
465 key
.offset
= *num_bytes
;
466 key
.type
= BTRFS_CHUNK_ITEM_KEY
;
467 btrfs_set_stack_chunk_owner(chunk
, extent_root
->root_key
.objectid
);
468 btrfs_set_stack_chunk_stripe_len(chunk
, 64 * 1024);
469 btrfs_set_stack_chunk_type(chunk
, type
);
470 btrfs_set_stack_chunk_num_stripes(chunk
, num_stripes
);
471 btrfs_set_stack_chunk_io_align(chunk
, extent_root
->sectorsize
);
472 btrfs_set_stack_chunk_io_width(chunk
, extent_root
->sectorsize
);
473 btrfs_set_stack_chunk_sector_size(chunk
, extent_root
->sectorsize
);
475 ret
= btrfs_insert_item(trans
, chunk_root
, &key
, chunk
,
476 btrfs_chunk_item_size(num_stripes
));
478 *start
= key
.objectid
;
480 em
= alloc_extent_map(GFP_NOFS
);
483 map
= kmalloc(sizeof(*map
), GFP_NOFS
);
489 em
->bdev
= (struct block_device
*)map
;
490 em
->start
= key
.objectid
;
491 em
->len
= key
.offset
;
494 map
->physical
= physical
;
504 em_tree
= &extent_root
->fs_info
->mapping_tree
.map_tree
;
505 spin_lock(&em_tree
->lock
);
506 ret
= add_extent_mapping(em_tree
, em
);
508 spin_unlock(&em_tree
->lock
);
513 void btrfs_mapping_init(struct btrfs_mapping_tree
*tree
)
515 extent_map_tree_init(&tree
->map_tree
, GFP_NOFS
);
518 void btrfs_mapping_tree_free(struct btrfs_mapping_tree
*tree
)
520 struct extent_map
*em
;
523 spin_lock(&tree
->map_tree
.lock
);
524 em
= lookup_extent_mapping(&tree
->map_tree
, 0, (u64
)-1);
526 remove_extent_mapping(&tree
->map_tree
, em
);
527 spin_unlock(&tree
->map_tree
.lock
);
533 /* once for the tree */
538 int btrfs_map_block(struct btrfs_mapping_tree
*map_tree
,
539 u64 logical
, u64
*phys
, u64
*length
,
540 struct btrfs_device
**dev
)
542 struct extent_map
*em
;
543 struct map_lookup
*map
;
544 struct extent_map_tree
*em_tree
= &map_tree
->map_tree
;
548 spin_lock(&em_tree
->lock
);
549 em
= lookup_extent_mapping(em_tree
, logical
, *length
);
552 BUG_ON(em
->start
> logical
|| em
->start
+ em
->len
< logical
);
553 map
= (struct map_lookup
*)em
->bdev
;
554 offset
= logical
- em
->start
;
555 *phys
= map
->physical
+ offset
;
556 *length
= em
->len
- offset
;
559 spin_unlock(&em_tree
->lock
);
563 int btrfs_map_bio(struct btrfs_root
*root
, int rw
, struct bio
*bio
)
565 struct btrfs_mapping_tree
*map_tree
;
566 struct btrfs_device
*dev
;
567 u64 logical
= bio
->bi_sector
<< 9;
571 struct bio_vec
*bvec
;
575 bio_for_each_segment(bvec
, bio
, i
) {
576 length
+= bvec
->bv_len
;
578 map_tree
= &root
->fs_info
->mapping_tree
;
580 ret
= btrfs_map_block(map_tree
, logical
, &physical
, &map_length
, &dev
);
581 if (map_length
< length
) {
582 printk("mapping failed logical %Lu bio len %Lu physical %Lu "
583 "len %Lu\n", logical
, length
, physical
, map_length
);
586 BUG_ON(map_length
< length
);
587 bio
->bi_sector
= physical
>> 9;
588 bio
->bi_bdev
= dev
->bdev
;
593 struct btrfs_device
*btrfs_find_device(struct btrfs_root
*root
, u64 devid
)
595 struct btrfs_device
*dev
;
596 struct list_head
*cur
= root
->fs_info
->devices
.next
;
597 struct list_head
*head
= &root
->fs_info
->devices
;
600 dev
= list_entry(cur
, struct btrfs_device
, dev_list
);
601 if (dev
->devid
== devid
)
608 static int read_one_chunk(struct btrfs_root
*root
, struct btrfs_key
*key
,
609 struct extent_buffer
*leaf
,
610 struct btrfs_chunk
*chunk
)
612 struct btrfs_mapping_tree
*map_tree
= &root
->fs_info
->mapping_tree
;
613 struct map_lookup
*map
;
614 struct extent_map
*em
;
620 logical
= key
->objectid
;
621 length
= key
->offset
;
622 spin_lock(&map_tree
->map_tree
.lock
);
623 em
= lookup_extent_mapping(&map_tree
->map_tree
, logical
, 1);
625 /* already mapped? */
626 if (em
&& em
->start
<= logical
&& em
->start
+ em
->len
> logical
) {
628 spin_unlock(&map_tree
->map_tree
.lock
);
633 spin_unlock(&map_tree
->map_tree
.lock
);
635 map
= kzalloc(sizeof(*map
), GFP_NOFS
);
639 em
= alloc_extent_map(GFP_NOFS
);
642 map
= kmalloc(sizeof(*map
), GFP_NOFS
);
648 em
->bdev
= (struct block_device
*)map
;
653 map
->physical
= btrfs_stripe_offset_nr(leaf
, chunk
, 0);
654 devid
= btrfs_stripe_devid_nr(leaf
, chunk
, 0);
655 map
->dev
= btrfs_find_device(root
, devid
);
662 spin_lock(&map_tree
->map_tree
.lock
);
663 ret
= add_extent_mapping(&map_tree
->map_tree
, em
);
665 spin_unlock(&map_tree
->map_tree
.lock
);
671 static int fill_device_from_item(struct extent_buffer
*leaf
,
672 struct btrfs_dev_item
*dev_item
,
673 struct btrfs_device
*device
)
677 device
->devid
= btrfs_device_id(leaf
, dev_item
);
678 device
->total_bytes
= btrfs_device_total_bytes(leaf
, dev_item
);
679 device
->bytes_used
= btrfs_device_bytes_used(leaf
, dev_item
);
680 device
->type
= btrfs_device_type(leaf
, dev_item
);
681 device
->io_align
= btrfs_device_io_align(leaf
, dev_item
);
682 device
->io_width
= btrfs_device_io_width(leaf
, dev_item
);
683 device
->sector_size
= btrfs_device_sector_size(leaf
, dev_item
);
685 ptr
= (unsigned long)btrfs_device_uuid(dev_item
);
686 read_extent_buffer(leaf
, device
->uuid
, ptr
, BTRFS_DEV_UUID_SIZE
);
691 static int read_one_dev(struct btrfs_root
*root
,
692 struct extent_buffer
*leaf
,
693 struct btrfs_dev_item
*dev_item
)
695 struct btrfs_device
*device
;
699 devid
= btrfs_device_id(leaf
, dev_item
);
700 device
= btrfs_find_device(root
, devid
);
702 device
= kmalloc(sizeof(*device
), GFP_NOFS
);
705 list_add(&device
->dev_list
, &root
->fs_info
->devices
);
708 fill_device_from_item(leaf
, dev_item
, device
);
709 device
->dev_root
= root
->fs_info
->dev_root
;
710 device
->bdev
= root
->fs_info
->sb
->s_bdev
;
713 ret
= btrfs_open_device(device
);
721 int btrfs_read_super_device(struct btrfs_root
*root
, struct extent_buffer
*buf
)
723 struct btrfs_dev_item
*dev_item
;
725 dev_item
= (struct btrfs_dev_item
*)offsetof(struct btrfs_super_block
,
727 return read_one_dev(root
, buf
, dev_item
);
730 int btrfs_read_sys_array(struct btrfs_root
*root
)
732 struct btrfs_super_block
*super_copy
= &root
->fs_info
->super_copy
;
733 struct extent_buffer
*sb
= root
->fs_info
->sb_buffer
;
734 struct btrfs_disk_key
*disk_key
;
735 struct btrfs_chunk
*chunk
;
736 struct btrfs_key key
;
741 unsigned long sb_ptr
;
745 array_size
= btrfs_super_sys_array_size(super_copy
);
748 * we do this loop twice, once for the device items and
749 * once for all of the chunks. This way there are device
750 * structs filled in for every chunk
752 ptr
= super_copy
->sys_chunk_array
;
753 sb_ptr
= offsetof(struct btrfs_super_block
, sys_chunk_array
);
756 while (cur
< array_size
) {
757 disk_key
= (struct btrfs_disk_key
*)ptr
;
758 btrfs_disk_key_to_cpu(&key
, disk_key
);
760 len
= sizeof(*disk_key
);
765 if (key
.type
== BTRFS_CHUNK_ITEM_KEY
) {
766 chunk
= (struct btrfs_chunk
*)sb_ptr
;
767 ret
= read_one_chunk(root
, &key
, sb
, chunk
);
769 num_stripes
= btrfs_chunk_num_stripes(sb
, chunk
);
770 len
= btrfs_chunk_item_size(num_stripes
);
781 int btrfs_read_chunk_tree(struct btrfs_root
*root
)
783 struct btrfs_path
*path
;
784 struct extent_buffer
*leaf
;
785 struct btrfs_key key
;
786 struct btrfs_key found_key
;
790 root
= root
->fs_info
->chunk_root
;
792 path
= btrfs_alloc_path();
796 /* first we search for all of the device items, and then we
797 * read in all of the chunk items. This way we can create chunk
798 * mappings that reference all of the devices that are afound
800 key
.objectid
= BTRFS_DEV_ITEMS_OBJECTID
;
804 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
806 leaf
= path
->nodes
[0];
807 slot
= path
->slots
[0];
808 if (slot
>= btrfs_header_nritems(leaf
)) {
809 ret
= btrfs_next_leaf(root
, path
);
816 btrfs_item_key_to_cpu(leaf
, &found_key
, slot
);
817 if (key
.objectid
== BTRFS_DEV_ITEMS_OBJECTID
) {
818 if (found_key
.objectid
!= BTRFS_DEV_ITEMS_OBJECTID
)
820 if (found_key
.type
== BTRFS_DEV_ITEM_KEY
) {
821 struct btrfs_dev_item
*dev_item
;
822 dev_item
= btrfs_item_ptr(leaf
, slot
,
823 struct btrfs_dev_item
);
824 ret
= read_one_dev(root
, leaf
, dev_item
);
827 } else if (found_key
.type
== BTRFS_CHUNK_ITEM_KEY
) {
828 struct btrfs_chunk
*chunk
;
829 chunk
= btrfs_item_ptr(leaf
, slot
, struct btrfs_chunk
);
830 ret
= read_one_chunk(root
, &found_key
, leaf
, chunk
);
834 if (key
.objectid
== BTRFS_DEV_ITEMS_OBJECTID
) {
836 btrfs_release_path(root
, path
);
840 btrfs_free_path(path
);