]>
git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - fs/btrfs/ctree.c
3 #include "kerncompat.h"
4 #include "radix-tree.h"
7 #include "print-tree.h"
9 static int split_node(struct ctree_root
*root
, struct ctree_path
*path
,
11 static int split_leaf(struct ctree_root
*root
, struct ctree_path
*path
,
13 static int push_node_left(struct ctree_root
*root
, struct tree_buffer
*dst
,
14 struct tree_buffer
*src
);
15 static int balance_node_right(struct ctree_root
*root
,
16 struct tree_buffer
*dst_buf
,
17 struct tree_buffer
*src_buf
);
18 static int del_ptr(struct ctree_root
*root
, struct ctree_path
*path
, int level
,
21 inline void init_path(struct ctree_path
*p
)
23 memset(p
, 0, sizeof(*p
));
26 void release_path(struct ctree_root
*root
, struct ctree_path
*p
)
29 for (i
= 0; i
< MAX_LEVEL
; i
++) {
32 tree_block_release(root
, p
->nodes
[i
]);
34 memset(p
, 0, sizeof(*p
));
37 int btrfs_cow_block(struct ctree_root
*root
,
38 struct tree_buffer
*buf
,
39 struct tree_buffer
*parent
,
41 struct tree_buffer
**cow_ret
)
43 struct tree_buffer
*cow
;
45 if (!list_empty(&buf
->dirty
)) {
49 cow
= alloc_free_block(root
);
50 memcpy(&cow
->node
, &buf
->node
, sizeof(buf
->node
));
51 btrfs_set_header_blocknr(&cow
->node
.header
, cow
->blocknr
);
53 btrfs_inc_ref(root
, buf
);
54 if (buf
== root
->node
) {
57 if (buf
!= root
->commit_root
)
58 free_extent(root
, buf
->blocknr
, 1);
59 tree_block_release(root
, buf
);
61 parent
->node
.blockptrs
[parent_slot
] = cow
->blocknr
;
62 BUG_ON(list_empty(&parent
->dirty
));
63 free_extent(root
, buf
->blocknr
, 1);
65 tree_block_release(root
, buf
);
70 * The leaf data grows from end-to-front in the node.
71 * this returns the address of the start of the last item,
72 * which is the stop of the leaf data stack
74 static inline unsigned int leaf_data_end(struct leaf
*leaf
)
76 u32 nr
= btrfs_header_nritems(&leaf
->header
);
78 return sizeof(leaf
->data
);
79 return leaf
->items
[nr
-1].offset
;
83 * The space between the end of the leaf items and
84 * the start of the leaf data. IOW, how much room
85 * the leaf has left for both items and data
87 int leaf_free_space(struct leaf
*leaf
)
89 int data_end
= leaf_data_end(leaf
);
90 int nritems
= btrfs_header_nritems(&leaf
->header
);
91 char *items_end
= (char *)(leaf
->items
+ nritems
+ 1);
92 return (char *)(leaf
->data
+ data_end
) - (char *)items_end
;
96 * compare two keys in a memcmp fashion
98 int comp_keys(struct btrfs_disk_key
*disk
, struct btrfs_key
*k2
)
102 btrfs_disk_key_to_cpu(&k1
, disk
);
104 if (k1
.objectid
> k2
->objectid
)
106 if (k1
.objectid
< k2
->objectid
)
108 if (k1
.flags
> k2
->flags
)
110 if (k1
.flags
< k2
->flags
)
112 if (k1
.offset
> k2
->offset
)
114 if (k1
.offset
< k2
->offset
)
119 int check_node(struct ctree_path
*path
, int level
)
122 struct node
*parent
= NULL
;
123 struct node
*node
= &path
->nodes
[level
]->node
;
125 u32 nritems
= btrfs_header_nritems(&node
->header
);
127 if (path
->nodes
[level
+ 1])
128 parent
= &path
->nodes
[level
+ 1]->node
;
129 parent_slot
= path
->slots
[level
+ 1];
130 BUG_ON(nritems
== 0);
132 struct btrfs_disk_key
*parent_key
;
133 parent_key
= &parent
->keys
[parent_slot
];
134 BUG_ON(memcmp(parent_key
, node
->keys
,
135 sizeof(struct btrfs_disk_key
)));
136 BUG_ON(parent
->blockptrs
[parent_slot
] !=
137 btrfs_header_blocknr(&node
->header
));
139 BUG_ON(nritems
> NODEPTRS_PER_BLOCK
);
140 for (i
= 0; nritems
> 1 && i
< nritems
- 2; i
++) {
141 struct btrfs_key cpukey
;
142 btrfs_disk_key_to_cpu(&cpukey
, &node
->keys
[i
+ 1]);
143 BUG_ON(comp_keys(&node
->keys
[i
], &cpukey
) >= 0);
148 int check_leaf(struct ctree_path
*path
, int level
)
151 struct leaf
*leaf
= &path
->nodes
[level
]->leaf
;
152 struct node
*parent
= NULL
;
154 u32 nritems
= btrfs_header_nritems(&leaf
->header
);
156 if (path
->nodes
[level
+ 1])
157 parent
= &path
->nodes
[level
+ 1]->node
;
158 parent_slot
= path
->slots
[level
+ 1];
159 BUG_ON(leaf_free_space(leaf
) < 0);
165 struct btrfs_disk_key
*parent_key
;
166 parent_key
= &parent
->keys
[parent_slot
];
167 BUG_ON(memcmp(parent_key
, &leaf
->items
[0].key
,
168 sizeof(struct btrfs_disk_key
)));
169 BUG_ON(parent
->blockptrs
[parent_slot
] !=
170 btrfs_header_blocknr(&leaf
->header
));
172 for (i
= 0; nritems
> 1 && i
< nritems
- 2; i
++) {
173 struct btrfs_key cpukey
;
174 btrfs_disk_key_to_cpu(&cpukey
, &leaf
->items
[i
+ 1].key
);
175 BUG_ON(comp_keys(&leaf
->items
[i
].key
,
177 BUG_ON(leaf
->items
[i
].offset
!= leaf
->items
[i
+ 1].offset
+
178 leaf
->items
[i
+ 1].size
);
180 BUG_ON(leaf
->items
[i
].offset
+ leaf
->items
[i
].size
!=
187 int check_block(struct ctree_path
*path
, int level
)
190 return check_leaf(path
, level
);
191 return check_node(path
, level
);
195 * search for key in the array p. items p are item_size apart
196 * and there are 'max' items in p
197 * the slot in the array is returned via slot, and it points to
198 * the place where you would insert key if it is not found in
201 * slot may point to max if the key is bigger than all of the keys
203 int generic_bin_search(char *p
, int item_size
, struct btrfs_key
*key
,
210 struct btrfs_disk_key
*tmp
;
213 mid
= (low
+ high
) / 2;
214 tmp
= (struct btrfs_disk_key
*)(p
+ mid
* item_size
);
215 ret
= comp_keys(tmp
, key
);
231 * simple bin_search frontend that does the right thing for
234 int bin_search(struct node
*c
, struct btrfs_key
*key
, int *slot
)
236 if (btrfs_is_leaf(c
)) {
237 struct leaf
*l
= (struct leaf
*)c
;
238 return generic_bin_search((void *)l
->items
, sizeof(struct item
),
239 key
, btrfs_header_nritems(&c
->header
),
242 return generic_bin_search((void *)c
->keys
,
243 sizeof(struct btrfs_disk_key
),
244 key
, btrfs_header_nritems(&c
->header
),
250 struct tree_buffer
*read_node_slot(struct ctree_root
*root
,
251 struct tree_buffer
*parent_buf
,
254 struct node
*node
= &parent_buf
->node
;
257 if (slot
>= btrfs_header_nritems(&node
->header
))
259 return read_tree_block(root
, node
->blockptrs
[slot
]);
262 static int balance_level(struct ctree_root
*root
, struct ctree_path
*path
,
265 struct tree_buffer
*right_buf
;
266 struct tree_buffer
*mid_buf
;
267 struct tree_buffer
*left_buf
;
268 struct tree_buffer
*parent_buf
= NULL
;
269 struct node
*right
= NULL
;
271 struct node
*left
= NULL
;
272 struct node
*parent
= NULL
;
276 int orig_slot
= path
->slots
[level
];
282 mid_buf
= path
->nodes
[level
];
283 mid
= &mid_buf
->node
;
284 orig_ptr
= mid
->blockptrs
[orig_slot
];
286 if (level
< MAX_LEVEL
- 1)
287 parent_buf
= path
->nodes
[level
+ 1];
288 pslot
= path
->slots
[level
+ 1];
291 struct tree_buffer
*child
;
292 u64 blocknr
= mid_buf
->blocknr
;
294 if (btrfs_header_nritems(&mid
->header
) != 1)
297 /* promote the child to a root */
298 child
= read_node_slot(root
, mid_buf
, 0);
301 path
->nodes
[level
] = NULL
;
302 /* once for the path */
303 tree_block_release(root
, mid_buf
);
304 /* once for the root ptr */
305 tree_block_release(root
, mid_buf
);
306 clean_tree_block(root
, mid_buf
);
307 return free_extent(root
, blocknr
, 1);
309 parent
= &parent_buf
->node
;
311 if (btrfs_header_nritems(&mid
->header
) > NODEPTRS_PER_BLOCK
/ 4)
314 left_buf
= read_node_slot(root
, parent_buf
, pslot
- 1);
315 right_buf
= read_node_slot(root
, parent_buf
, pslot
+ 1);
317 /* first, try to make some room in the middle buffer */
319 btrfs_cow_block(root
, left_buf
, parent_buf
,
320 pslot
- 1, &left_buf
);
321 left
= &left_buf
->node
;
322 orig_slot
+= btrfs_header_nritems(&left
->header
);
323 wret
= push_node_left(root
, left_buf
, mid_buf
);
329 * then try to empty the right most buffer into the middle
332 btrfs_cow_block(root
, right_buf
, parent_buf
,
333 pslot
+ 1, &right_buf
);
334 right
= &right_buf
->node
;
335 wret
= push_node_left(root
, mid_buf
, right_buf
);
338 if (btrfs_header_nritems(&right
->header
) == 0) {
339 u64 blocknr
= right_buf
->blocknr
;
340 tree_block_release(root
, right_buf
);
341 clean_tree_block(root
, right_buf
);
344 wret
= del_ptr(root
, path
, level
+ 1, pslot
+ 1);
347 wret
= free_extent(root
, blocknr
, 1);
351 memcpy(parent
->keys
+ pslot
+ 1, right
->keys
,
352 sizeof(struct btrfs_disk_key
));
353 BUG_ON(list_empty(&parent_buf
->dirty
));
356 if (btrfs_header_nritems(&mid
->header
) == 1) {
358 * we're not allowed to leave a node with one item in the
359 * tree during a delete. A deletion from lower in the tree
360 * could try to delete the only pointer in this node.
361 * So, pull some keys from the left.
362 * There has to be a left pointer at this point because
363 * otherwise we would have pulled some pointers from the
367 wret
= balance_node_right(root
, mid_buf
, left_buf
);
372 if (btrfs_header_nritems(&mid
->header
) == 0) {
373 /* we've managed to empty the middle node, drop it */
374 u64 blocknr
= mid_buf
->blocknr
;
375 tree_block_release(root
, mid_buf
);
376 clean_tree_block(root
, mid_buf
);
379 wret
= del_ptr(root
, path
, level
+ 1, pslot
);
382 wret
= free_extent(root
, blocknr
, 1);
386 /* update the parent key to reflect our changes */
387 memcpy(parent
->keys
+ pslot
, mid
->keys
,
388 sizeof(struct btrfs_disk_key
));
389 BUG_ON(list_empty(&parent_buf
->dirty
));
392 /* update the path */
394 if (btrfs_header_nritems(&left
->header
) > orig_slot
) {
395 left_buf
->count
++; // released below
396 path
->nodes
[level
] = left_buf
;
397 path
->slots
[level
+ 1] -= 1;
398 path
->slots
[level
] = orig_slot
;
400 tree_block_release(root
, mid_buf
);
402 orig_slot
-= btrfs_header_nritems(&left
->header
);
403 path
->slots
[level
] = orig_slot
;
406 /* double check we haven't messed things up */
407 check_block(path
, level
);
408 if (orig_ptr
!= path
->nodes
[level
]->node
.blockptrs
[path
->slots
[level
]])
412 tree_block_release(root
, right_buf
);
414 tree_block_release(root
, left_buf
);
419 * look for key in the tree. path is filled in with nodes along the way
420 * if key is found, we return zero and you can find the item in the leaf
421 * level of the path (level 0)
423 * If the key isn't found, the path points to the slot where it should
424 * be inserted, and 1 is returned. If there are other errors during the
425 * search a negative error number is returned.
427 * if ins_len > 0, nodes and leaves will be split as we walk down the
428 * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if
431 int search_slot(struct ctree_root
*root
, struct btrfs_key
*key
,
432 struct ctree_path
*p
, int ins_len
, int cow
)
434 struct tree_buffer
*b
;
435 struct tree_buffer
*cow_buf
;
445 level
= btrfs_header_level(&b
->node
.header
);
448 wret
= btrfs_cow_block(root
, b
, p
->nodes
[level
+ 1],
449 p
->slots
[level
+ 1], &cow_buf
);
452 BUG_ON(!cow
&& ins_len
);
455 ret
= check_block(p
, level
);
458 ret
= bin_search(c
, key
, &slot
);
459 if (!btrfs_is_leaf(c
)) {
462 p
->slots
[level
] = slot
;
463 if (ins_len
> 0 && btrfs_header_nritems(&c
->header
) ==
464 NODEPTRS_PER_BLOCK
) {
465 int sret
= split_node(root
, p
, level
);
471 slot
= p
->slots
[level
];
472 } else if (ins_len
< 0) {
473 int sret
= balance_level(root
, p
, level
);
480 slot
= p
->slots
[level
];
481 BUG_ON(btrfs_header_nritems(&c
->header
) == 1);
483 b
= read_tree_block(root
, c
->blockptrs
[slot
]);
485 struct leaf
*l
= (struct leaf
*)c
;
486 p
->slots
[level
] = slot
;
487 if (ins_len
> 0 && leaf_free_space(l
) <
488 sizeof(struct item
) + ins_len
) {
489 int sret
= split_leaf(root
, p
, ins_len
);
494 BUG_ON(root
->node
->count
== 1);
498 BUG_ON(root
->node
->count
== 1);
503 * adjust the pointers going up the tree, starting at level
504 * making sure the right key of each node is points to 'key'.
505 * This is used after shifting pointers to the left, so it stops
506 * fixing up pointers when a given leaf/node is not in slot 0 of the
509 * If this fails to write a tree block, it returns -1, but continues
510 * fixing up the blocks in ram so the tree is consistent.
512 static int fixup_low_keys(struct ctree_root
*root
,
513 struct ctree_path
*path
, struct btrfs_disk_key
*key
,
518 for (i
= level
; i
< MAX_LEVEL
; i
++) {
520 int tslot
= path
->slots
[i
];
523 t
= &path
->nodes
[i
]->node
;
524 memcpy(t
->keys
+ tslot
, key
, sizeof(*key
));
525 BUG_ON(list_empty(&path
->nodes
[i
]->dirty
));
533 * try to push data from one node into the next node left in the
536 * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
537 * error, and > 0 if there was no room in the left hand block.
539 static int push_node_left(struct ctree_root
*root
, struct tree_buffer
*dst_buf
,
540 struct tree_buffer
*src_buf
)
542 struct node
*src
= &src_buf
->node
;
543 struct node
*dst
= &dst_buf
->node
;
549 src_nritems
= btrfs_header_nritems(&src
->header
);
550 dst_nritems
= btrfs_header_nritems(&dst
->header
);
551 push_items
= NODEPTRS_PER_BLOCK
- dst_nritems
;
552 if (push_items
<= 0) {
556 if (src_nritems
< push_items
)
557 push_items
= src_nritems
;
559 memcpy(dst
->keys
+ dst_nritems
, src
->keys
,
560 push_items
* sizeof(struct btrfs_disk_key
));
561 memcpy(dst
->blockptrs
+ dst_nritems
, src
->blockptrs
,
562 push_items
* sizeof(u64
));
563 if (push_items
< src_nritems
) {
564 memmove(src
->keys
, src
->keys
+ push_items
,
565 (src_nritems
- push_items
) *
566 sizeof(struct btrfs_disk_key
));
567 memmove(src
->blockptrs
, src
->blockptrs
+ push_items
,
568 (src_nritems
- push_items
) * sizeof(u64
));
570 btrfs_set_header_nritems(&src
->header
, src_nritems
- push_items
);
571 btrfs_set_header_nritems(&dst
->header
, dst_nritems
+ push_items
);
572 BUG_ON(list_empty(&src_buf
->dirty
));
573 BUG_ON(list_empty(&dst_buf
->dirty
));
578 * try to push data from one node into the next node right in the
581 * returns 0 if some ptrs were pushed, < 0 if there was some horrible
582 * error, and > 0 if there was no room in the right hand block.
584 * this will only push up to 1/2 the contents of the left node over
586 static int balance_node_right(struct ctree_root
*root
,
587 struct tree_buffer
*dst_buf
,
588 struct tree_buffer
*src_buf
)
590 struct node
*src
= &src_buf
->node
;
591 struct node
*dst
= &dst_buf
->node
;
598 src_nritems
= btrfs_header_nritems(&src
->header
);
599 dst_nritems
= btrfs_header_nritems(&dst
->header
);
600 push_items
= NODEPTRS_PER_BLOCK
- dst_nritems
;
601 if (push_items
<= 0) {
605 max_push
= src_nritems
/ 2 + 1;
606 /* don't try to empty the node */
607 if (max_push
> src_nritems
)
609 if (max_push
< push_items
)
610 push_items
= max_push
;
612 memmove(dst
->keys
+ push_items
, dst
->keys
,
613 dst_nritems
* sizeof(struct btrfs_disk_key
));
614 memmove(dst
->blockptrs
+ push_items
, dst
->blockptrs
,
615 dst_nritems
* sizeof(u64
));
616 memcpy(dst
->keys
, src
->keys
+ src_nritems
- push_items
,
617 push_items
* sizeof(struct btrfs_disk_key
));
618 memcpy(dst
->blockptrs
, src
->blockptrs
+ src_nritems
- push_items
,
619 push_items
* sizeof(u64
));
621 btrfs_set_header_nritems(&src
->header
, src_nritems
- push_items
);
622 btrfs_set_header_nritems(&dst
->header
, dst_nritems
+ push_items
);
624 BUG_ON(list_empty(&src_buf
->dirty
));
625 BUG_ON(list_empty(&dst_buf
->dirty
));
630 * helper function to insert a new root level in the tree.
631 * A new node is allocated, and a single item is inserted to
632 * point to the existing root
634 * returns zero on success or < 0 on failure.
636 static int insert_new_root(struct ctree_root
*root
,
637 struct ctree_path
*path
, int level
)
639 struct tree_buffer
*t
;
642 struct btrfs_disk_key
*lower_key
;
644 BUG_ON(path
->nodes
[level
]);
645 BUG_ON(path
->nodes
[level
-1] != root
->node
);
647 t
= alloc_free_block(root
);
649 memset(c
, 0, sizeof(c
));
650 btrfs_set_header_nritems(&c
->header
, 1);
651 btrfs_set_header_level(&c
->header
, level
);
652 btrfs_set_header_blocknr(&c
->header
, t
->blocknr
);
653 btrfs_set_header_parentid(&c
->header
,
654 btrfs_header_parentid(&root
->node
->node
.header
));
655 lower
= &path
->nodes
[level
-1]->node
;
656 if (btrfs_is_leaf(lower
))
657 lower_key
= &((struct leaf
*)lower
)->items
[0].key
;
659 lower_key
= lower
->keys
;
660 memcpy(c
->keys
, lower_key
, sizeof(struct btrfs_disk_key
));
661 c
->blockptrs
[0] = path
->nodes
[level
-1]->blocknr
;
662 /* the super has an extra ref to root->node */
663 tree_block_release(root
, root
->node
);
666 path
->nodes
[level
] = t
;
667 path
->slots
[level
] = 0;
672 * worker function to insert a single pointer in a node.
673 * the node should have enough room for the pointer already
675 * slot and level indicate where you want the key to go, and
676 * blocknr is the block the key points to.
678 * returns zero on success and < 0 on any error
680 static int insert_ptr(struct ctree_root
*root
,
681 struct ctree_path
*path
, struct btrfs_disk_key
*key
,
682 u64 blocknr
, int slot
, int level
)
687 BUG_ON(!path
->nodes
[level
]);
688 lower
= &path
->nodes
[level
]->node
;
689 nritems
= btrfs_header_nritems(&lower
->header
);
692 if (nritems
== NODEPTRS_PER_BLOCK
)
694 if (slot
!= nritems
) {
695 memmove(lower
->keys
+ slot
+ 1, lower
->keys
+ slot
,
696 (nritems
- slot
) * sizeof(struct btrfs_disk_key
));
697 memmove(lower
->blockptrs
+ slot
+ 1, lower
->blockptrs
+ slot
,
698 (nritems
- slot
) * sizeof(u64
));
700 memcpy(lower
->keys
+ slot
, key
, sizeof(struct btrfs_disk_key
));
701 lower
->blockptrs
[slot
] = blocknr
;
702 btrfs_set_header_nritems(&lower
->header
, nritems
+ 1);
703 if (lower
->keys
[1].objectid
== 0)
705 BUG_ON(list_empty(&path
->nodes
[level
]->dirty
));
710 * split the node at the specified level in path in two.
711 * The path is corrected to point to the appropriate node after the split
713 * Before splitting this tries to make some room in the node by pushing
714 * left and right, if either one works, it returns right away.
716 * returns 0 on success and < 0 on failure
718 static int split_node(struct ctree_root
*root
, struct ctree_path
*path
,
721 struct tree_buffer
*t
;
723 struct tree_buffer
*split_buffer
;
730 t
= path
->nodes
[level
];
732 if (t
== root
->node
) {
733 /* trying to split the root, lets make a new one */
734 ret
= insert_new_root(root
, path
, level
+ 1);
738 c_nritems
= btrfs_header_nritems(&c
->header
);
739 split_buffer
= alloc_free_block(root
);
740 split
= &split_buffer
->node
;
741 btrfs_set_header_flags(&split
->header
, btrfs_header_flags(&c
->header
));
742 btrfs_set_header_blocknr(&split
->header
, split_buffer
->blocknr
);
743 btrfs_set_header_parentid(&split
->header
,
744 btrfs_header_parentid(&root
->node
->node
.header
));
745 mid
= (c_nritems
+ 1) / 2;
746 memcpy(split
->keys
, c
->keys
+ mid
,
747 (c_nritems
- mid
) * sizeof(struct btrfs_disk_key
));
748 memcpy(split
->blockptrs
, c
->blockptrs
+ mid
,
749 (c_nritems
- mid
) * sizeof(u64
));
750 btrfs_set_header_nritems(&split
->header
, c_nritems
- mid
);
751 btrfs_set_header_nritems(&c
->header
, mid
);
754 BUG_ON(list_empty(&t
->dirty
));
755 wret
= insert_ptr(root
, path
, split
->keys
, split_buffer
->blocknr
,
756 path
->slots
[level
+ 1] + 1, level
+ 1);
760 if (path
->slots
[level
] >= mid
) {
761 path
->slots
[level
] -= mid
;
762 tree_block_release(root
, t
);
763 path
->nodes
[level
] = split_buffer
;
764 path
->slots
[level
+ 1] += 1;
766 tree_block_release(root
, split_buffer
);
772 * how many bytes are required to store the items in a leaf. start
773 * and nr indicate which items in the leaf to check. This totals up the
774 * space used both by the item structs and the item data
776 static int leaf_space_used(struct leaf
*l
, int start
, int nr
)
779 int end
= start
+ nr
- 1;
783 data_len
= l
->items
[start
].offset
+ l
->items
[start
].size
;
784 data_len
= data_len
- l
->items
[end
].offset
;
785 data_len
+= sizeof(struct item
) * nr
;
790 * push some data in the path leaf to the right, trying to free up at
791 * least data_size bytes. returns zero if the push worked, nonzero otherwise
793 * returns 1 if the push failed because the other node didn't have enough
794 * room, 0 if everything worked out and < 0 if there were major errors.
796 static int push_leaf_right(struct ctree_root
*root
, struct ctree_path
*path
,
799 struct tree_buffer
*left_buf
= path
->nodes
[0];
800 struct leaf
*left
= &left_buf
->leaf
;
802 struct tree_buffer
*right_buf
;
803 struct tree_buffer
*upper
;
813 slot
= path
->slots
[1];
814 if (!path
->nodes
[1]) {
817 upper
= path
->nodes
[1];
818 if (slot
>= btrfs_header_nritems(&upper
->node
.header
) - 1) {
821 right_buf
= read_tree_block(root
, upper
->node
.blockptrs
[slot
+ 1]);
822 right
= &right_buf
->leaf
;
823 free_space
= leaf_free_space(right
);
824 if (free_space
< data_size
+ sizeof(struct item
)) {
825 tree_block_release(root
, right_buf
);
828 /* cow and double check */
829 btrfs_cow_block(root
, right_buf
, upper
, slot
+ 1, &right_buf
);
830 right
= &right_buf
->leaf
;
831 free_space
= leaf_free_space(right
);
832 if (free_space
< data_size
+ sizeof(struct item
)) {
833 tree_block_release(root
, right_buf
);
837 left_nritems
= btrfs_header_nritems(&left
->header
);
838 for (i
= left_nritems
- 1; i
>= 0; i
--) {
839 item
= left
->items
+ i
;
840 if (path
->slots
[0] == i
)
841 push_space
+= data_size
+ sizeof(*item
);
842 if (item
->size
+ sizeof(*item
) + push_space
> free_space
)
845 push_space
+= item
->size
+ sizeof(*item
);
847 if (push_items
== 0) {
848 tree_block_release(root
, right_buf
);
851 right_nritems
= btrfs_header_nritems(&right
->header
);
852 /* push left to right */
853 push_space
= left
->items
[left_nritems
- push_items
].offset
+
854 left
->items
[left_nritems
- push_items
].size
;
855 push_space
-= leaf_data_end(left
);
856 /* make room in the right data area */
857 memmove(right
->data
+ leaf_data_end(right
) - push_space
,
858 right
->data
+ leaf_data_end(right
),
859 LEAF_DATA_SIZE
- leaf_data_end(right
));
860 /* copy from the left data area */
861 memcpy(right
->data
+ LEAF_DATA_SIZE
- push_space
,
862 left
->data
+ leaf_data_end(left
),
864 memmove(right
->items
+ push_items
, right
->items
,
865 right_nritems
* sizeof(struct item
));
866 /* copy the items from left to right */
867 memcpy(right
->items
, left
->items
+ left_nritems
- push_items
,
868 push_items
* sizeof(struct item
));
870 /* update the item pointers */
871 right_nritems
+= push_items
;
872 btrfs_set_header_nritems(&right
->header
, right_nritems
);
873 push_space
= LEAF_DATA_SIZE
;
874 for (i
= 0; i
< right_nritems
; i
++) {
875 right
->items
[i
].offset
= push_space
- right
->items
[i
].size
;
876 push_space
= right
->items
[i
].offset
;
878 left_nritems
-= push_items
;
879 btrfs_set_header_nritems(&left
->header
, left_nritems
);
881 BUG_ON(list_empty(&left_buf
->dirty
));
882 BUG_ON(list_empty(&right_buf
->dirty
));
883 memcpy(upper
->node
.keys
+ slot
+ 1,
884 &right
->items
[0].key
, sizeof(struct btrfs_disk_key
));
885 BUG_ON(list_empty(&upper
->dirty
));
887 /* then fixup the leaf pointer in the path */
888 if (path
->slots
[0] >= left_nritems
) {
889 path
->slots
[0] -= left_nritems
;
890 tree_block_release(root
, path
->nodes
[0]);
891 path
->nodes
[0] = right_buf
;
894 tree_block_release(root
, right_buf
);
899 * push some data in the path leaf to the left, trying to free up at
900 * least data_size bytes. returns zero if the push worked, nonzero otherwise
902 static int push_leaf_left(struct ctree_root
*root
, struct ctree_path
*path
,
905 struct tree_buffer
*right_buf
= path
->nodes
[0];
906 struct leaf
*right
= &right_buf
->leaf
;
907 struct tree_buffer
*t
;
915 u32 old_left_nritems
;
919 slot
= path
->slots
[1];
923 if (!path
->nodes
[1]) {
926 t
= read_tree_block(root
, path
->nodes
[1]->node
.blockptrs
[slot
- 1]);
928 free_space
= leaf_free_space(left
);
929 if (free_space
< data_size
+ sizeof(struct item
)) {
930 tree_block_release(root
, t
);
934 /* cow and double check */
935 btrfs_cow_block(root
, t
, path
->nodes
[1], slot
- 1, &t
);
937 free_space
= leaf_free_space(left
);
938 if (free_space
< data_size
+ sizeof(struct item
)) {
939 tree_block_release(root
, t
);
943 for (i
= 0; i
< btrfs_header_nritems(&right
->header
); i
++) {
944 item
= right
->items
+ i
;
945 if (path
->slots
[0] == i
)
946 push_space
+= data_size
+ sizeof(*item
);
947 if (item
->size
+ sizeof(*item
) + push_space
> free_space
)
950 push_space
+= item
->size
+ sizeof(*item
);
952 if (push_items
== 0) {
953 tree_block_release(root
, t
);
956 /* push data from right to left */
957 memcpy(left
->items
+ btrfs_header_nritems(&left
->header
),
958 right
->items
, push_items
* sizeof(struct item
));
959 push_space
= LEAF_DATA_SIZE
- right
->items
[push_items
-1].offset
;
960 memcpy(left
->data
+ leaf_data_end(left
) - push_space
,
961 right
->data
+ right
->items
[push_items
- 1].offset
,
963 old_left_nritems
= btrfs_header_nritems(&left
->header
);
964 BUG_ON(old_left_nritems
< 0);
966 for(i
= old_left_nritems
; i
< old_left_nritems
+ push_items
; i
++) {
967 left
->items
[i
].offset
-= LEAF_DATA_SIZE
-
968 left
->items
[old_left_nritems
-1].offset
;
970 btrfs_set_header_nritems(&left
->header
, old_left_nritems
+ push_items
);
972 /* fixup right node */
973 push_space
= right
->items
[push_items
-1].offset
- leaf_data_end(right
);
974 memmove(right
->data
+ LEAF_DATA_SIZE
- push_space
, right
->data
+
975 leaf_data_end(right
), push_space
);
976 memmove(right
->items
, right
->items
+ push_items
,
977 (btrfs_header_nritems(&right
->header
) - push_items
) *
978 sizeof(struct item
));
979 btrfs_set_header_nritems(&right
->header
,
980 btrfs_header_nritems(&right
->header
) -
982 push_space
= LEAF_DATA_SIZE
;
984 for (i
= 0; i
< btrfs_header_nritems(&right
->header
); i
++) {
985 right
->items
[i
].offset
= push_space
- right
->items
[i
].size
;
986 push_space
= right
->items
[i
].offset
;
989 BUG_ON(list_empty(&t
->dirty
));
990 BUG_ON(list_empty(&right_buf
->dirty
));
992 wret
= fixup_low_keys(root
, path
, &right
->items
[0].key
, 1);
996 /* then fixup the leaf pointer in the path */
997 if (path
->slots
[0] < push_items
) {
998 path
->slots
[0] += old_left_nritems
;
999 tree_block_release(root
, path
->nodes
[0]);
1001 path
->slots
[1] -= 1;
1003 tree_block_release(root
, t
);
1004 path
->slots
[0] -= push_items
;
1006 BUG_ON(path
->slots
[0] < 0);
1011 * split the path's leaf in two, making sure there is at least data_size
1012 * available for the resulting leaf level of the path.
1014 * returns 0 if all went well and < 0 on failure.
1016 static int split_leaf(struct ctree_root
*root
, struct ctree_path
*path
,
1019 struct tree_buffer
*l_buf
;
1025 struct tree_buffer
*right_buffer
;
1026 int space_needed
= data_size
+ sizeof(struct item
);
1033 l_buf
= path
->nodes
[0];
1036 /* did the pushes work? */
1037 if (leaf_free_space(l
) >= sizeof(struct item
) + data_size
)
1040 if (!path
->nodes
[1]) {
1041 ret
= insert_new_root(root
, path
, 1);
1045 slot
= path
->slots
[0];
1046 nritems
= btrfs_header_nritems(&l
->header
);
1047 mid
= (nritems
+ 1)/ 2;
1048 right_buffer
= alloc_free_block(root
);
1049 BUG_ON(!right_buffer
);
1050 BUG_ON(mid
== nritems
);
1051 right
= &right_buffer
->leaf
;
1052 memset(right
, 0, sizeof(*right
));
1054 /* FIXME, just alloc a new leaf here */
1055 if (leaf_space_used(l
, mid
, nritems
- mid
) + space_needed
>
1059 /* FIXME, just alloc a new leaf here */
1060 if (leaf_space_used(l
, 0, mid
+ 1) + space_needed
>
1064 btrfs_set_header_nritems(&right
->header
, nritems
- mid
);
1065 btrfs_set_header_blocknr(&right
->header
, right_buffer
->blocknr
);
1066 btrfs_set_header_level(&right
->header
, 0);
1067 btrfs_set_header_parentid(&right
->header
,
1068 btrfs_header_parentid(&root
->node
->node
.header
));
1069 data_copy_size
= l
->items
[mid
].offset
+ l
->items
[mid
].size
-
1071 memcpy(right
->items
, l
->items
+ mid
,
1072 (nritems
- mid
) * sizeof(struct item
));
1073 memcpy(right
->data
+ LEAF_DATA_SIZE
- data_copy_size
,
1074 l
->data
+ leaf_data_end(l
), data_copy_size
);
1075 rt_data_off
= LEAF_DATA_SIZE
-
1076 (l
->items
[mid
].offset
+ l
->items
[mid
].size
);
1078 for (i
= 0; i
< btrfs_header_nritems(&right
->header
); i
++)
1079 right
->items
[i
].offset
+= rt_data_off
;
1081 btrfs_set_header_nritems(&l
->header
, mid
);
1083 wret
= insert_ptr(root
, path
, &right
->items
[0].key
,
1084 right_buffer
->blocknr
, path
->slots
[1] + 1, 1);
1087 BUG_ON(list_empty(&right_buffer
->dirty
));
1088 BUG_ON(list_empty(&l_buf
->dirty
));
1089 BUG_ON(path
->slots
[0] != slot
);
1091 tree_block_release(root
, path
->nodes
[0]);
1092 path
->nodes
[0] = right_buffer
;
1093 path
->slots
[0] -= mid
;
1094 path
->slots
[1] += 1;
1096 tree_block_release(root
, right_buffer
);
1097 BUG_ON(path
->slots
[0] < 0);
1102 * Given a key and some data, insert an item into the tree.
1103 * This does all the path init required, making room in the tree if needed.
1105 int insert_item(struct ctree_root
*root
, struct btrfs_key
*cpu_key
,
1106 void *data
, int data_size
)
1112 struct tree_buffer
*leaf_buf
;
1114 unsigned int data_end
;
1115 struct ctree_path path
;
1116 struct btrfs_disk_key disk_key
;
1118 btrfs_cpu_key_to_disk(&disk_key
, cpu_key
);
1120 /* create a root if there isn't one */
1124 ret
= search_slot(root
, cpu_key
, &path
, data_size
, 1);
1126 release_path(root
, &path
);
1132 slot_orig
= path
.slots
[0];
1133 leaf_buf
= path
.nodes
[0];
1134 leaf
= &leaf_buf
->leaf
;
1136 nritems
= btrfs_header_nritems(&leaf
->header
);
1137 data_end
= leaf_data_end(leaf
);
1139 if (leaf_free_space(leaf
) < sizeof(struct item
) + data_size
)
1142 slot
= path
.slots
[0];
1144 if (slot
!= nritems
) {
1146 unsigned int old_data
= leaf
->items
[slot
].offset
+
1147 leaf
->items
[slot
].size
;
1150 * item0..itemN ... dataN.offset..dataN.size .. data0.size
1152 /* first correct the data pointers */
1153 for (i
= slot
; i
< nritems
; i
++)
1154 leaf
->items
[i
].offset
-= data_size
;
1156 /* shift the items */
1157 memmove(leaf
->items
+ slot
+ 1, leaf
->items
+ slot
,
1158 (nritems
- slot
) * sizeof(struct item
));
1160 /* shift the data */
1161 memmove(leaf
->data
+ data_end
- data_size
, leaf
->data
+
1162 data_end
, old_data
- data_end
);
1163 data_end
= old_data
;
1165 /* copy the new data in */
1166 memcpy(&leaf
->items
[slot
].key
, &disk_key
,
1167 sizeof(struct btrfs_disk_key
));
1168 leaf
->items
[slot
].offset
= data_end
- data_size
;
1169 leaf
->items
[slot
].size
= data_size
;
1170 memcpy(leaf
->data
+ data_end
- data_size
, data
, data_size
);
1171 btrfs_set_header_nritems(&leaf
->header
, nritems
+ 1);
1175 ret
= fixup_low_keys(root
, &path
, &disk_key
, 1);
1177 BUG_ON(list_empty(&leaf_buf
->dirty
));
1178 if (leaf_free_space(leaf
) < 0)
1180 check_leaf(&path
, 0);
1182 release_path(root
, &path
);
1187 * delete the pointer from a given node.
1189 * If the delete empties a node, the node is removed from the tree,
1190 * continuing all the way the root if required. The root is converted into
1191 * a leaf if all the nodes are emptied.
1193 static int del_ptr(struct ctree_root
*root
, struct ctree_path
*path
, int level
,
1197 struct tree_buffer
*parent
= path
->nodes
[level
];
1202 node
= &parent
->node
;
1203 nritems
= btrfs_header_nritems(&node
->header
);
1204 if (slot
!= nritems
-1) {
1205 memmove(node
->keys
+ slot
, node
->keys
+ slot
+ 1,
1206 sizeof(struct btrfs_disk_key
) * (nritems
- slot
- 1));
1207 memmove(node
->blockptrs
+ slot
,
1208 node
->blockptrs
+ slot
+ 1,
1209 sizeof(u64
) * (nritems
- slot
- 1));
1212 btrfs_set_header_nritems(&node
->header
, nritems
);
1213 if (nritems
== 0 && parent
== root
->node
) {
1214 BUG_ON(btrfs_header_level(&root
->node
->node
.header
) != 1);
1215 /* just turn the root into a leaf and break */
1216 btrfs_set_header_level(&root
->node
->node
.header
, 0);
1217 } else if (slot
== 0) {
1218 wret
= fixup_low_keys(root
, path
, node
->keys
, level
+ 1);
1222 BUG_ON(list_empty(&parent
->dirty
));
1227 * delete the item at the leaf level in path. If that empties
1228 * the leaf, remove it from the tree
1230 int del_item(struct ctree_root
*root
, struct ctree_path
*path
)
1234 struct tree_buffer
*leaf_buf
;
1241 leaf_buf
= path
->nodes
[0];
1242 leaf
= &leaf_buf
->leaf
;
1243 slot
= path
->slots
[0];
1244 doff
= leaf
->items
[slot
].offset
;
1245 dsize
= leaf
->items
[slot
].size
;
1246 nritems
= btrfs_header_nritems(&leaf
->header
);
1248 if (slot
!= nritems
- 1) {
1250 int data_end
= leaf_data_end(leaf
);
1251 memmove(leaf
->data
+ data_end
+ dsize
,
1252 leaf
->data
+ data_end
,
1254 for (i
= slot
+ 1; i
< nritems
; i
++)
1255 leaf
->items
[i
].offset
+= dsize
;
1256 memmove(leaf
->items
+ slot
, leaf
->items
+ slot
+ 1,
1257 sizeof(struct item
) *
1258 (nritems
- slot
- 1));
1260 btrfs_set_header_nritems(&leaf
->header
, nritems
- 1);
1262 /* delete the leaf if we've emptied it */
1264 if (leaf_buf
== root
->node
) {
1265 btrfs_set_header_level(&leaf
->header
, 0);
1266 BUG_ON(list_empty(&leaf_buf
->dirty
));
1268 clean_tree_block(root
, leaf_buf
);
1269 wret
= del_ptr(root
, path
, 1, path
->slots
[1]);
1272 wret
= free_extent(root
, leaf_buf
->blocknr
, 1);
1277 int used
= leaf_space_used(leaf
, 0, nritems
);
1279 wret
= fixup_low_keys(root
, path
,
1280 &leaf
->items
[0].key
, 1);
1284 BUG_ON(list_empty(&leaf_buf
->dirty
));
1286 /* delete the leaf if it is mostly empty */
1287 if (used
< LEAF_DATA_SIZE
/ 3) {
1288 /* push_leaf_left fixes the path.
1289 * make sure the path still points to our leaf
1290 * for possible call to del_ptr below
1292 slot
= path
->slots
[1];
1294 wret
= push_leaf_left(root
, path
, 1);
1297 if (path
->nodes
[0] == leaf_buf
&&
1298 btrfs_header_nritems(&leaf
->header
)) {
1299 wret
= push_leaf_right(root
, path
, 1);
1303 if (btrfs_header_nritems(&leaf
->header
) == 0) {
1304 u64 blocknr
= leaf_buf
->blocknr
;
1305 clean_tree_block(root
, leaf_buf
);
1306 wret
= del_ptr(root
, path
, 1, slot
);
1309 tree_block_release(root
, leaf_buf
);
1310 wret
= free_extent(root
, blocknr
, 1);
1314 tree_block_release(root
, leaf_buf
);
1322 * walk up the tree as far as required to find the next leaf.
1323 * returns 0 if it found something or 1 if there are no greater leaves.
1324 * returns < 0 on io errors.
1326 int next_leaf(struct ctree_root
*root
, struct ctree_path
*path
)
1331 struct tree_buffer
*c
;
1332 struct tree_buffer
*next
= NULL
;
1334 while(level
< MAX_LEVEL
) {
1335 if (!path
->nodes
[level
])
1337 slot
= path
->slots
[level
] + 1;
1338 c
= path
->nodes
[level
];
1339 if (slot
>= btrfs_header_nritems(&c
->node
.header
)) {
1343 blocknr
= c
->node
.blockptrs
[slot
];
1345 tree_block_release(root
, next
);
1346 next
= read_tree_block(root
, blocknr
);
1349 path
->slots
[level
] = slot
;
1352 c
= path
->nodes
[level
];
1353 tree_block_release(root
, c
);
1354 path
->nodes
[level
] = next
;
1355 path
->slots
[level
] = 0;
1358 next
= read_tree_block(root
, next
->node
.blockptrs
[0]);