]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - fs/btrfs/ctree.c
Btrfs: write barriers on commit, balance level before split
[mirror_ubuntu-artful-kernel.git] / fs / btrfs / ctree.c
CommitLineData
2e635a27 1#include <linux/module.h>
eb60ceac
CM
2#include "ctree.h"
3#include "disk-io.h"
7f5c1516 4#include "transaction.h"
9a8dd150 5
e089f05c
CM
6static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
7 *root, struct btrfs_path *path, int level);
8static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
d4dbff95
CM
9 *root, struct btrfs_key *ins_key,
10 struct btrfs_path *path, int data_size);
e089f05c 11static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root
e20d96d6 12 *root, struct buffer_head *dst, struct buffer_head
e089f05c
CM
13 *src);
14static int balance_node_right(struct btrfs_trans_handle *trans, struct
e20d96d6
CM
15 btrfs_root *root, struct buffer_head *dst_buf,
16 struct buffer_head *src_buf);
e089f05c
CM
17static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
18 struct btrfs_path *path, int level, int slot);
d97e63b6 19
df24a2b9 20inline void btrfs_init_path(struct btrfs_path *p)
2c90e5d6 21{
df24a2b9 22 memset(p, 0, sizeof(*p));
2c90e5d6
CM
23}
24
df24a2b9 25struct btrfs_path *btrfs_alloc_path(void)
2c90e5d6 26{
df24a2b9
CM
27 struct btrfs_path *path;
28 path = kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS);
29 if (path)
30 btrfs_init_path(path);
31 return path;
2c90e5d6
CM
32}
33
df24a2b9 34void btrfs_free_path(struct btrfs_path *p)
be0e5c09 35{
df24a2b9
CM
36 btrfs_release_path(NULL, p);
37 kmem_cache_free(btrfs_path_cachep, p);
be0e5c09
CM
38}
39
234b63a0 40void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
eb60ceac
CM
41{
42 int i;
234b63a0 43 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
eb60ceac
CM
44 if (!p->nodes[i])
45 break;
234b63a0 46 btrfs_block_release(root, p->nodes[i]);
eb60ceac 47 }
aa5d6bed 48 memset(p, 0, sizeof(*p));
eb60ceac
CM
49}
50
e089f05c 51static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
e20d96d6
CM
52 *root, struct buffer_head *buf, struct buffer_head
53 *parent, int parent_slot, struct buffer_head
e089f05c 54 **cow_ret)
02217ed2 55{
e20d96d6
CM
56 struct buffer_head *cow;
57 struct btrfs_node *cow_node;
02217ed2 58
7f5c1516
CM
59 if (btrfs_header_generation(btrfs_buffer_header(buf)) ==
60 trans->transid) {
02217ed2
CM
61 *cow_ret = buf;
62 return 0;
63 }
e089f05c 64 cow = btrfs_alloc_free_block(trans, root);
e20d96d6 65 cow_node = btrfs_buffer_node(cow);
2c90e5d6
CM
66 if (buf->b_size != root->blocksize || cow->b_size != root->blocksize)
67 WARN_ON(1);
e20d96d6 68 memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize);
7eccb903 69 btrfs_set_header_blocknr(&cow_node->header, bh_blocknr(cow));
7f5c1516 70 btrfs_set_header_generation(&cow_node->header, trans->transid);
e089f05c 71 btrfs_inc_ref(trans, root, buf);
02217ed2
CM
72 if (buf == root->node) {
73 root->node = cow;
e20d96d6 74 get_bh(cow);
2c90e5d6 75 if (buf != root->commit_root) {
7eccb903 76 btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1);
2c90e5d6 77 }
234b63a0 78 btrfs_block_release(root, buf);
02217ed2 79 } else {
e20d96d6 80 btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot,
7eccb903 81 bh_blocknr(cow));
d6025579 82 btrfs_mark_buffer_dirty(parent);
7eccb903 83 btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1);
02217ed2 84 }
234b63a0 85 btrfs_block_release(root, buf);
df24a2b9 86 mark_buffer_dirty(cow);
2c90e5d6 87 *cow_ret = cow;
02217ed2
CM
88 return 0;
89}
90
74123bd7
CM
91/*
92 * The leaf data grows from end-to-front in the node.
93 * this returns the address of the start of the last item,
94 * which is the stop of the leaf data stack
95 */
123abc88
CM
96static inline unsigned int leaf_data_end(struct btrfs_root *root,
97 struct btrfs_leaf *leaf)
be0e5c09 98{
7518a238 99 u32 nr = btrfs_header_nritems(&leaf->header);
be0e5c09 100 if (nr == 0)
123abc88 101 return BTRFS_LEAF_DATA_SIZE(root);
0783fcfc 102 return btrfs_item_offset(leaf->items + nr - 1);
be0e5c09
CM
103}
104
74123bd7
CM
105/*
106 * compare two keys in a memcmp fashion
107 */
9aca1d51 108static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
be0e5c09 109{
e2fa7227
CM
110 struct btrfs_key k1;
111
112 btrfs_disk_key_to_cpu(&k1, disk);
113
114 if (k1.objectid > k2->objectid)
be0e5c09 115 return 1;
e2fa7227 116 if (k1.objectid < k2->objectid)
be0e5c09 117 return -1;
f254e52c
CM
118 if (k1.flags > k2->flags)
119 return 1;
120 if (k1.flags < k2->flags)
121 return -1;
70b2befd
CM
122 if (k1.offset > k2->offset)
123 return 1;
124 if (k1.offset < k2->offset)
125 return -1;
be0e5c09
CM
126 return 0;
127}
74123bd7 128
123abc88
CM
129static int check_node(struct btrfs_root *root, struct btrfs_path *path,
130 int level)
aa5d6bed
CM
131{
132 int i;
234b63a0 133 struct btrfs_node *parent = NULL;
e20d96d6 134 struct btrfs_node *node = btrfs_buffer_node(path->nodes[level]);
aa5d6bed 135 int parent_slot;
7518a238 136 u32 nritems = btrfs_header_nritems(&node->header);
aa5d6bed
CM
137
138 if (path->nodes[level + 1])
e20d96d6 139 parent = btrfs_buffer_node(path->nodes[level + 1]);
aa5d6bed 140 parent_slot = path->slots[level + 1];
7518a238
CM
141 BUG_ON(nritems == 0);
142 if (parent) {
e2fa7227 143 struct btrfs_disk_key *parent_key;
123abc88
CM
144 parent_key = &parent->ptrs[parent_slot].key;
145 BUG_ON(memcmp(parent_key, &node->ptrs[0].key,
e2fa7227 146 sizeof(struct btrfs_disk_key)));
1d4f8a0c 147 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
7518a238 148 btrfs_header_blocknr(&node->header));
aa5d6bed 149 }
123abc88 150 BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
7518a238 151 for (i = 0; nritems > 1 && i < nritems - 2; i++) {
e2fa7227 152 struct btrfs_key cpukey;
123abc88 153 btrfs_disk_key_to_cpu(&cpukey, &node->ptrs[i + 1].key);
e66f709b
CM
154if (comp_keys(&node->ptrs[i].key, &cpukey) >= 0) {
155 struct btrfs_key bad;
156 btrfs_disk_key_to_cpu(&bad, &node->ptrs[i].key);
157printk("check_node level %d i is %d bad comp %Lu %u %Lu, %Lu %u %Lu\n",level, i, bad.objectid, bad.flags, bad.offset, cpukey.objectid, cpukey.flags, cpukey.offset);
158}
123abc88 159 BUG_ON(comp_keys(&node->ptrs[i].key, &cpukey) >= 0);
aa5d6bed
CM
160 }
161 return 0;
162}
163
123abc88
CM
164static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
165 int level)
aa5d6bed
CM
166{
167 int i;
e20d96d6 168 struct btrfs_leaf *leaf = btrfs_buffer_leaf(path->nodes[level]);
234b63a0 169 struct btrfs_node *parent = NULL;
aa5d6bed 170 int parent_slot;
7518a238 171 u32 nritems = btrfs_header_nritems(&leaf->header);
aa5d6bed
CM
172
173 if (path->nodes[level + 1])
e20d96d6 174 parent = btrfs_buffer_node(path->nodes[level + 1]);
aa5d6bed 175 parent_slot = path->slots[level + 1];
123abc88 176 BUG_ON(btrfs_leaf_free_space(root, leaf) < 0);
7518a238
CM
177
178 if (nritems == 0)
179 return 0;
180
181 if (parent) {
e2fa7227 182 struct btrfs_disk_key *parent_key;
123abc88 183 parent_key = &parent->ptrs[parent_slot].key;
aa5d6bed 184 BUG_ON(memcmp(parent_key, &leaf->items[0].key,
e2fa7227 185 sizeof(struct btrfs_disk_key)));
1d4f8a0c 186 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
7518a238 187 btrfs_header_blocknr(&leaf->header));
aa5d6bed 188 }
7518a238 189 for (i = 0; nritems > 1 && i < nritems - 2; i++) {
e2fa7227
CM
190 struct btrfs_key cpukey;
191 btrfs_disk_key_to_cpu(&cpukey, &leaf->items[i + 1].key);
aa5d6bed 192 BUG_ON(comp_keys(&leaf->items[i].key,
e2fa7227 193 &cpukey) >= 0);
0783fcfc
CM
194 BUG_ON(btrfs_item_offset(leaf->items + i) !=
195 btrfs_item_end(leaf->items + i + 1));
aa5d6bed 196 if (i == 0) {
0783fcfc
CM
197 BUG_ON(btrfs_item_offset(leaf->items + i) +
198 btrfs_item_size(leaf->items + i) !=
123abc88 199 BTRFS_LEAF_DATA_SIZE(root));
aa5d6bed
CM
200 }
201 }
aa5d6bed
CM
202 return 0;
203}
204
123abc88
CM
205static int check_block(struct btrfs_root *root, struct btrfs_path *path,
206 int level)
aa5d6bed 207{
3eb0314d
CM
208 struct btrfs_node *node = btrfs_buffer_node(path->nodes[level]);
209 if (memcmp(node->header.fsid, root->fs_info->disk_super->fsid,
210 sizeof(node->header.fsid)))
211 BUG();
aa5d6bed 212 if (level == 0)
123abc88
CM
213 return check_leaf(root, path, level);
214 return check_node(root, path, level);
aa5d6bed
CM
215}
216
74123bd7
CM
217/*
218 * search for key in the array p. items p are item_size apart
219 * and there are 'max' items in p
220 * the slot in the array is returned via slot, and it points to
221 * the place where you would insert key if it is not found in
222 * the array.
223 *
224 * slot may point to max if the key is bigger than all of the keys
225 */
9aca1d51 226static int generic_bin_search(char *p, int item_size, struct btrfs_key *key,
be0e5c09
CM
227 int max, int *slot)
228{
229 int low = 0;
230 int high = max;
231 int mid;
232 int ret;
e2fa7227 233 struct btrfs_disk_key *tmp;
be0e5c09
CM
234
235 while(low < high) {
236 mid = (low + high) / 2;
e2fa7227 237 tmp = (struct btrfs_disk_key *)(p + mid * item_size);
be0e5c09
CM
238 ret = comp_keys(tmp, key);
239
240 if (ret < 0)
241 low = mid + 1;
242 else if (ret > 0)
243 high = mid;
244 else {
245 *slot = mid;
246 return 0;
247 }
248 }
249 *slot = low;
250 return 1;
251}
252
97571fd0
CM
253/*
254 * simple bin_search frontend that does the right thing for
255 * leaves vs nodes
256 */
9aca1d51 257static int bin_search(struct btrfs_node *c, struct btrfs_key *key, int *slot)
be0e5c09 258{
7518a238 259 if (btrfs_is_leaf(c)) {
234b63a0 260 struct btrfs_leaf *l = (struct btrfs_leaf *)c;
0783fcfc
CM
261 return generic_bin_search((void *)l->items,
262 sizeof(struct btrfs_item),
7518a238
CM
263 key, btrfs_header_nritems(&c->header),
264 slot);
be0e5c09 265 } else {
123abc88
CM
266 return generic_bin_search((void *)c->ptrs,
267 sizeof(struct btrfs_key_ptr),
7518a238
CM
268 key, btrfs_header_nritems(&c->header),
269 slot);
be0e5c09
CM
270 }
271 return -1;
272}
273
e20d96d6
CM
274static struct buffer_head *read_node_slot(struct btrfs_root *root,
275 struct buffer_head *parent_buf,
bb803951
CM
276 int slot)
277{
e20d96d6 278 struct btrfs_node *node = btrfs_buffer_node(parent_buf);
bb803951
CM
279 if (slot < 0)
280 return NULL;
7518a238 281 if (slot >= btrfs_header_nritems(&node->header))
bb803951 282 return NULL;
1d4f8a0c 283 return read_tree_block(root, btrfs_node_blockptr(node, slot));
bb803951
CM
284}
285
e089f05c
CM
286static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root
287 *root, struct btrfs_path *path, int level)
bb803951 288{
e20d96d6
CM
289 struct buffer_head *right_buf;
290 struct buffer_head *mid_buf;
291 struct buffer_head *left_buf;
292 struct buffer_head *parent_buf = NULL;
234b63a0
CM
293 struct btrfs_node *right = NULL;
294 struct btrfs_node *mid;
295 struct btrfs_node *left = NULL;
296 struct btrfs_node *parent = NULL;
bb803951
CM
297 int ret = 0;
298 int wret;
299 int pslot;
bb803951 300 int orig_slot = path->slots[level];
79f95c82 301 u64 orig_ptr;
bb803951
CM
302
303 if (level == 0)
304 return 0;
305
306 mid_buf = path->nodes[level];
e20d96d6 307 mid = btrfs_buffer_node(mid_buf);
1d4f8a0c 308 orig_ptr = btrfs_node_blockptr(mid, orig_slot);
79f95c82 309
234b63a0 310 if (level < BTRFS_MAX_LEVEL - 1)
bb803951
CM
311 parent_buf = path->nodes[level + 1];
312 pslot = path->slots[level + 1];
313
40689478
CM
314 /*
315 * deal with the case where there is only one pointer in the root
316 * by promoting the node below to a root
317 */
bb803951 318 if (!parent_buf) {
e20d96d6 319 struct buffer_head *child;
7eccb903 320 u64 blocknr = bh_blocknr(mid_buf);
bb803951 321
7518a238 322 if (btrfs_header_nritems(&mid->header) != 1)
bb803951
CM
323 return 0;
324
325 /* promote the child to a root */
326 child = read_node_slot(root, mid_buf, 0);
327 BUG_ON(!child);
328 root->node = child;
329 path->nodes[level] = NULL;
d6025579
CM
330 clean_tree_block(trans, root, mid_buf);
331 wait_on_buffer(mid_buf);
bb803951 332 /* once for the path */
234b63a0 333 btrfs_block_release(root, mid_buf);
bb803951 334 /* once for the root ptr */
234b63a0 335 btrfs_block_release(root, mid_buf);
e089f05c 336 return btrfs_free_extent(trans, root, blocknr, 1, 1);
bb803951 337 }
e20d96d6 338 parent = btrfs_buffer_node(parent_buf);
bb803951 339
123abc88
CM
340 if (btrfs_header_nritems(&mid->header) >
341 BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
bb803951
CM
342 return 0;
343
bb803951
CM
344 left_buf = read_node_slot(root, parent_buf, pslot - 1);
345 right_buf = read_node_slot(root, parent_buf, pslot + 1);
79f95c82
CM
346
347 /* first, try to make some room in the middle buffer */
bb803951 348 if (left_buf) {
e089f05c
CM
349 btrfs_cow_block(trans, root, left_buf, parent_buf, pslot - 1,
350 &left_buf);
e20d96d6 351 left = btrfs_buffer_node(left_buf);
7518a238 352 orig_slot += btrfs_header_nritems(&left->header);
e089f05c 353 wret = push_node_left(trans, root, left_buf, mid_buf);
79f95c82
CM
354 if (wret < 0)
355 ret = wret;
bb803951 356 }
79f95c82
CM
357
358 /*
359 * then try to empty the right most buffer into the middle
360 */
bb803951 361 if (right_buf) {
e089f05c
CM
362 btrfs_cow_block(trans, root, right_buf, parent_buf, pslot + 1,
363 &right_buf);
e20d96d6 364 right = btrfs_buffer_node(right_buf);
e089f05c 365 wret = push_node_left(trans, root, mid_buf, right_buf);
79f95c82
CM
366 if (wret < 0)
367 ret = wret;
7518a238 368 if (btrfs_header_nritems(&right->header) == 0) {
7eccb903 369 u64 blocknr = bh_blocknr(right_buf);
e089f05c 370 clean_tree_block(trans, root, right_buf);
d6025579
CM
371 wait_on_buffer(right_buf);
372 btrfs_block_release(root, right_buf);
bb803951
CM
373 right_buf = NULL;
374 right = NULL;
e089f05c
CM
375 wret = del_ptr(trans, root, path, level + 1, pslot +
376 1);
bb803951
CM
377 if (wret)
378 ret = wret;
e089f05c 379 wret = btrfs_free_extent(trans, root, blocknr, 1, 1);
bb803951
CM
380 if (wret)
381 ret = wret;
382 } else {
d6025579
CM
383 btrfs_memcpy(root, parent,
384 &parent->ptrs[pslot + 1].key,
385 &right->ptrs[0].key,
386 sizeof(struct btrfs_disk_key));
387 btrfs_mark_buffer_dirty(parent_buf);
bb803951
CM
388 }
389 }
7518a238 390 if (btrfs_header_nritems(&mid->header) == 1) {
79f95c82
CM
391 /*
392 * we're not allowed to leave a node with one item in the
393 * tree during a delete. A deletion from lower in the tree
394 * could try to delete the only pointer in this node.
395 * So, pull some keys from the left.
396 * There has to be a left pointer at this point because
397 * otherwise we would have pulled some pointers from the
398 * right
399 */
400 BUG_ON(!left_buf);
e089f05c 401 wret = balance_node_right(trans, root, mid_buf, left_buf);
79f95c82
CM
402 if (wret < 0)
403 ret = wret;
404 BUG_ON(wret == 1);
405 }
7518a238 406 if (btrfs_header_nritems(&mid->header) == 0) {
79f95c82 407 /* we've managed to empty the middle node, drop it */
7eccb903 408 u64 blocknr = bh_blocknr(mid_buf);
e089f05c 409 clean_tree_block(trans, root, mid_buf);
d6025579
CM
410 wait_on_buffer(mid_buf);
411 btrfs_block_release(root, mid_buf);
bb803951
CM
412 mid_buf = NULL;
413 mid = NULL;
e089f05c 414 wret = del_ptr(trans, root, path, level + 1, pslot);
bb803951
CM
415 if (wret)
416 ret = wret;
e089f05c 417 wret = btrfs_free_extent(trans, root, blocknr, 1, 1);
bb803951
CM
418 if (wret)
419 ret = wret;
79f95c82
CM
420 } else {
421 /* update the parent key to reflect our changes */
d6025579
CM
422 btrfs_memcpy(root, parent,
423 &parent->ptrs[pslot].key, &mid->ptrs[0].key,
424 sizeof(struct btrfs_disk_key));
425 btrfs_mark_buffer_dirty(parent_buf);
79f95c82 426 }
bb803951 427
79f95c82 428 /* update the path */
bb803951 429 if (left_buf) {
7518a238 430 if (btrfs_header_nritems(&left->header) > orig_slot) {
e20d96d6 431 get_bh(left_buf);
bb803951
CM
432 path->nodes[level] = left_buf;
433 path->slots[level + 1] -= 1;
434 path->slots[level] = orig_slot;
435 if (mid_buf)
234b63a0 436 btrfs_block_release(root, mid_buf);
bb803951 437 } else {
7518a238 438 orig_slot -= btrfs_header_nritems(&left->header);
bb803951
CM
439 path->slots[level] = orig_slot;
440 }
441 }
79f95c82 442 /* double check we haven't messed things up */
123abc88 443 check_block(root, path, level);
e20d96d6
CM
444 if (orig_ptr !=
445 btrfs_node_blockptr(btrfs_buffer_node(path->nodes[level]),
446 path->slots[level]))
79f95c82 447 BUG();
bb803951
CM
448
449 if (right_buf)
234b63a0 450 btrfs_block_release(root, right_buf);
bb803951 451 if (left_buf)
234b63a0 452 btrfs_block_release(root, left_buf);
bb803951
CM
453 return ret;
454}
455
e66f709b
CM
456/* returns zero if the push worked, non-zero otherwise */
457static int push_nodes_for_insert(struct btrfs_trans_handle *trans,
458 struct btrfs_root *root,
459 struct btrfs_path *path, int level)
460{
461 struct buffer_head *right_buf;
462 struct buffer_head *mid_buf;
463 struct buffer_head *left_buf;
464 struct buffer_head *parent_buf = NULL;
465 struct btrfs_node *right = NULL;
466 struct btrfs_node *mid;
467 struct btrfs_node *left = NULL;
468 struct btrfs_node *parent = NULL;
469 int ret = 0;
470 int wret;
471 int pslot;
472 int orig_slot = path->slots[level];
473 u64 orig_ptr;
474
475 if (level == 0)
476 return 1;
477
478 mid_buf = path->nodes[level];
479 mid = btrfs_buffer_node(mid_buf);
480 orig_ptr = btrfs_node_blockptr(mid, orig_slot);
481
482 if (level < BTRFS_MAX_LEVEL - 1)
483 parent_buf = path->nodes[level + 1];
484 pslot = path->slots[level + 1];
485
486 if (!parent_buf)
487 return 1;
488 parent = btrfs_buffer_node(parent_buf);
489
490 left_buf = read_node_slot(root, parent_buf, pslot - 1);
491
492 /* first, try to make some room in the middle buffer */
493 if (left_buf) {
494 u32 left_nr;
495 btrfs_cow_block(trans, root, left_buf, parent_buf, pslot - 1,
496 &left_buf);
497 left = btrfs_buffer_node(left_buf);
498 left_nr = btrfs_header_nritems(&left->header);
499 wret = push_node_left(trans, root, left_buf, mid_buf);
500 if (wret < 0)
501 ret = wret;
502 if (wret == 0) {
503 orig_slot += left_nr;
504 btrfs_memcpy(root, parent,
505 &parent->ptrs[pslot].key,
506 &mid->ptrs[0].key,
507 sizeof(struct btrfs_disk_key));
508 btrfs_mark_buffer_dirty(parent_buf);
509 if (btrfs_header_nritems(&left->header) > orig_slot) {
510 path->nodes[level] = left_buf;
511 path->slots[level + 1] -= 1;
512 path->slots[level] = orig_slot;
513 btrfs_block_release(root, mid_buf);
514 } else {
515 orig_slot -=
516 btrfs_header_nritems(&left->header);
517 path->slots[level] = orig_slot;
518 btrfs_block_release(root, left_buf);
519 }
520 check_node(root, path, level);
521 return 0;
522 }
523 btrfs_block_release(root, left_buf);
524 }
525 right_buf = read_node_slot(root, parent_buf, pslot + 1);
526
527 /*
528 * then try to empty the right most buffer into the middle
529 */
530 if (right_buf) {
531 btrfs_cow_block(trans, root, right_buf, parent_buf, pslot + 1,
532 &right_buf);
533 right = btrfs_buffer_node(right_buf);
534 wret = balance_node_right(trans, root, right_buf, mid_buf);
535 if (wret < 0)
536 ret = wret;
537 if (wret == 0) {
538 btrfs_memcpy(root, parent,
539 &parent->ptrs[pslot + 1].key,
540 &right->ptrs[0].key,
541 sizeof(struct btrfs_disk_key));
542 btrfs_mark_buffer_dirty(parent_buf);
543 if (btrfs_header_nritems(&mid->header) <= orig_slot) {
544 path->nodes[level] = right_buf;
545 path->slots[level + 1] += 1;
546 path->slots[level] = orig_slot -
547 btrfs_header_nritems(&mid->header);
548 btrfs_block_release(root, mid_buf);
549 } else {
550 btrfs_block_release(root, right_buf);
551 }
552 check_node(root, path, level);
553 return 0;
554 }
555 btrfs_block_release(root, right_buf);
556 }
557 check_node(root, path, level);
558 return 1;
559}
560
74123bd7
CM
561/*
562 * look for key in the tree. path is filled in with nodes along the way
563 * if key is found, we return zero and you can find the item in the leaf
564 * level of the path (level 0)
565 *
566 * If the key isn't found, the path points to the slot where it should
aa5d6bed
CM
567 * be inserted, and 1 is returned. If there are other errors during the
568 * search a negative error number is returned.
97571fd0
CM
569 *
570 * if ins_len > 0, nodes and leaves will be split as we walk down the
571 * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if
572 * possible)
74123bd7 573 */
e089f05c
CM
574int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
575 *root, struct btrfs_key *key, struct btrfs_path *p, int
576 ins_len, int cow)
be0e5c09 577{
e20d96d6
CM
578 struct buffer_head *b;
579 struct buffer_head *cow_buf;
234b63a0 580 struct btrfs_node *c;
be0e5c09
CM
581 int slot;
582 int ret;
583 int level;
5c680ed6 584
22b0ebda
CM
585 WARN_ON(p->nodes[0] != NULL);
586 WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex));
bb803951
CM
587again:
588 b = root->node;
e20d96d6 589 get_bh(b);
eb60ceac 590 while (b) {
e20d96d6
CM
591 c = btrfs_buffer_node(b);
592 level = btrfs_header_level(&c->header);
02217ed2
CM
593 if (cow) {
594 int wret;
e20d96d6
CM
595 wret = btrfs_cow_block(trans, root, b,
596 p->nodes[level + 1],
597 p->slots[level + 1],
e089f05c 598 &cow_buf);
02217ed2 599 b = cow_buf;
2c90e5d6 600 c = btrfs_buffer_node(b);
02217ed2
CM
601 }
602 BUG_ON(!cow && ins_len);
2c90e5d6
CM
603 if (level != btrfs_header_level(&c->header))
604 WARN_ON(1);
605 level = btrfs_header_level(&c->header);
eb60ceac 606 p->nodes[level] = b;
123abc88 607 ret = check_block(root, p, level);
aa5d6bed
CM
608 if (ret)
609 return -1;
be0e5c09 610 ret = bin_search(c, key, &slot);
7518a238 611 if (!btrfs_is_leaf(c)) {
be0e5c09
CM
612 if (ret && slot > 0)
613 slot -= 1;
614 p->slots[level] = slot;
d4dbff95
CM
615 if (ins_len > 0 && btrfs_header_nritems(&c->header) >=
616 BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
e089f05c 617 int sret = split_node(trans, root, p, level);
5c680ed6
CM
618 BUG_ON(sret > 0);
619 if (sret)
620 return sret;
621 b = p->nodes[level];
e20d96d6 622 c = btrfs_buffer_node(b);
5c680ed6 623 slot = p->slots[level];
bb803951 624 } else if (ins_len < 0) {
e089f05c
CM
625 int sret = balance_level(trans, root, p,
626 level);
bb803951
CM
627 if (sret)
628 return sret;
629 b = p->nodes[level];
630 if (!b)
631 goto again;
e20d96d6 632 c = btrfs_buffer_node(b);
bb803951 633 slot = p->slots[level];
7518a238 634 BUG_ON(btrfs_header_nritems(&c->header) == 1);
5c680ed6 635 }
1d4f8a0c 636 b = read_tree_block(root, btrfs_node_blockptr(c, slot));
be0e5c09 637 } else {
234b63a0 638 struct btrfs_leaf *l = (struct btrfs_leaf *)c;
be0e5c09 639 p->slots[level] = slot;
123abc88 640 if (ins_len > 0 && btrfs_leaf_free_space(root, l) <
0783fcfc 641 sizeof(struct btrfs_item) + ins_len) {
d4dbff95
CM
642 int sret = split_leaf(trans, root, key,
643 p, ins_len);
5c680ed6
CM
644 BUG_ON(sret > 0);
645 if (sret)
646 return sret;
647 }
be0e5c09
CM
648 return ret;
649 }
650 }
aa5d6bed 651 return 1;
be0e5c09
CM
652}
653
74123bd7
CM
654/*
655 * adjust the pointers going up the tree, starting at level
656 * making sure the right key of each node is points to 'key'.
657 * This is used after shifting pointers to the left, so it stops
658 * fixing up pointers when a given leaf/node is not in slot 0 of the
659 * higher levels
aa5d6bed
CM
660 *
661 * If this fails to write a tree block, it returns -1, but continues
662 * fixing up the blocks in ram so the tree is consistent.
74123bd7 663 */
e089f05c
CM
664static int fixup_low_keys(struct btrfs_trans_handle *trans, struct btrfs_root
665 *root, struct btrfs_path *path, struct btrfs_disk_key
666 *key, int level)
be0e5c09
CM
667{
668 int i;
aa5d6bed 669 int ret = 0;
234b63a0
CM
670 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
671 struct btrfs_node *t;
be0e5c09 672 int tslot = path->slots[i];
eb60ceac 673 if (!path->nodes[i])
be0e5c09 674 break;
e20d96d6 675 t = btrfs_buffer_node(path->nodes[i]);
d6025579
CM
676 btrfs_memcpy(root, t, &t->ptrs[tslot].key, key, sizeof(*key));
677 btrfs_mark_buffer_dirty(path->nodes[i]);
be0e5c09
CM
678 if (tslot != 0)
679 break;
680 }
aa5d6bed 681 return ret;
be0e5c09
CM
682}
683
74123bd7
CM
684/*
685 * try to push data from one node into the next node left in the
79f95c82 686 * tree.
aa5d6bed
CM
687 *
688 * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
689 * error, and > 0 if there was no room in the left hand block.
74123bd7 690 */
e089f05c 691static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root
e20d96d6
CM
692 *root, struct buffer_head *dst_buf, struct
693 buffer_head *src_buf)
be0e5c09 694{
e20d96d6
CM
695 struct btrfs_node *src = btrfs_buffer_node(src_buf);
696 struct btrfs_node *dst = btrfs_buffer_node(dst_buf);
be0e5c09 697 int push_items = 0;
bb803951
CM
698 int src_nritems;
699 int dst_nritems;
aa5d6bed 700 int ret = 0;
be0e5c09 701
7518a238
CM
702 src_nritems = btrfs_header_nritems(&src->header);
703 dst_nritems = btrfs_header_nritems(&dst->header);
123abc88 704 push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
eb60ceac 705 if (push_items <= 0) {
be0e5c09 706 return 1;
eb60ceac 707 }
be0e5c09 708
bb803951 709 if (src_nritems < push_items)
79f95c82
CM
710 push_items = src_nritems;
711
d6025579
CM
712 btrfs_memcpy(root, dst, dst->ptrs + dst_nritems, src->ptrs,
713 push_items * sizeof(struct btrfs_key_ptr));
bb803951 714 if (push_items < src_nritems) {
d6025579 715 btrfs_memmove(root, src, src->ptrs, src->ptrs + push_items,
e2fa7227 716 (src_nritems - push_items) *
123abc88 717 sizeof(struct btrfs_key_ptr));
bb803951 718 }
7518a238
CM
719 btrfs_set_header_nritems(&src->header, src_nritems - push_items);
720 btrfs_set_header_nritems(&dst->header, dst_nritems + push_items);
d6025579
CM
721 btrfs_mark_buffer_dirty(src_buf);
722 btrfs_mark_buffer_dirty(dst_buf);
79f95c82
CM
723 return ret;
724}
725
726/*
727 * try to push data from one node into the next node right in the
728 * tree.
729 *
730 * returns 0 if some ptrs were pushed, < 0 if there was some horrible
731 * error, and > 0 if there was no room in the right hand block.
732 *
733 * this will only push up to 1/2 the contents of the left node over
734 */
e089f05c 735static int balance_node_right(struct btrfs_trans_handle *trans, struct
e20d96d6
CM
736 btrfs_root *root, struct buffer_head *dst_buf,
737 struct buffer_head *src_buf)
79f95c82 738{
e20d96d6
CM
739 struct btrfs_node *src = btrfs_buffer_node(src_buf);
740 struct btrfs_node *dst = btrfs_buffer_node(dst_buf);
79f95c82
CM
741 int push_items = 0;
742 int max_push;
743 int src_nritems;
744 int dst_nritems;
745 int ret = 0;
79f95c82 746
7518a238
CM
747 src_nritems = btrfs_header_nritems(&src->header);
748 dst_nritems = btrfs_header_nritems(&dst->header);
123abc88 749 push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
79f95c82
CM
750 if (push_items <= 0) {
751 return 1;
752 }
753
754 max_push = src_nritems / 2 + 1;
755 /* don't try to empty the node */
756 if (max_push > src_nritems)
757 return 1;
758 if (max_push < push_items)
759 push_items = max_push;
760
d6025579
CM
761 btrfs_memmove(root, dst, dst->ptrs + push_items, dst->ptrs,
762 dst_nritems * sizeof(struct btrfs_key_ptr));
763
764 btrfs_memcpy(root, dst, dst->ptrs,
765 src->ptrs + src_nritems - push_items,
766 push_items * sizeof(struct btrfs_key_ptr));
79f95c82 767
7518a238
CM
768 btrfs_set_header_nritems(&src->header, src_nritems - push_items);
769 btrfs_set_header_nritems(&dst->header, dst_nritems + push_items);
79f95c82 770
d6025579
CM
771 btrfs_mark_buffer_dirty(src_buf);
772 btrfs_mark_buffer_dirty(dst_buf);
aa5d6bed 773 return ret;
be0e5c09
CM
774}
775
97571fd0
CM
776/*
777 * helper function to insert a new root level in the tree.
778 * A new node is allocated, and a single item is inserted to
779 * point to the existing root
aa5d6bed
CM
780 *
781 * returns zero on success or < 0 on failure.
97571fd0 782 */
e089f05c
CM
783static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root
784 *root, struct btrfs_path *path, int level)
5c680ed6 785{
e20d96d6 786 struct buffer_head *t;
234b63a0
CM
787 struct btrfs_node *lower;
788 struct btrfs_node *c;
e2fa7227 789 struct btrfs_disk_key *lower_key;
5c680ed6
CM
790
791 BUG_ON(path->nodes[level]);
792 BUG_ON(path->nodes[level-1] != root->node);
793
e089f05c 794 t = btrfs_alloc_free_block(trans, root);
e20d96d6 795 c = btrfs_buffer_node(t);
123abc88 796 memset(c, 0, root->blocksize);
7518a238
CM
797 btrfs_set_header_nritems(&c->header, 1);
798 btrfs_set_header_level(&c->header, level);
7eccb903 799 btrfs_set_header_blocknr(&c->header, bh_blocknr(t));
7f5c1516 800 btrfs_set_header_generation(&c->header, trans->transid);
e20d96d6 801 lower = btrfs_buffer_node(path->nodes[level-1]);
3eb0314d
CM
802 memcpy(c->header.fsid, root->fs_info->disk_super->fsid,
803 sizeof(c->header.fsid));
7518a238 804 if (btrfs_is_leaf(lower))
234b63a0 805 lower_key = &((struct btrfs_leaf *)lower)->items[0].key;
5c680ed6 806 else
123abc88 807 lower_key = &lower->ptrs[0].key;
d6025579
CM
808 btrfs_memcpy(root, c, &c->ptrs[0].key, lower_key,
809 sizeof(struct btrfs_disk_key));
7eccb903 810 btrfs_set_node_blockptr(c, 0, bh_blocknr(path->nodes[level - 1]));
d5719762 811
d6025579 812 btrfs_mark_buffer_dirty(t);
d5719762 813
5c680ed6 814 /* the super has an extra ref to root->node */
234b63a0 815 btrfs_block_release(root, root->node);
5c680ed6 816 root->node = t;
e20d96d6 817 get_bh(t);
5c680ed6
CM
818 path->nodes[level] = t;
819 path->slots[level] = 0;
820 return 0;
821}
822
74123bd7
CM
823/*
824 * worker function to insert a single pointer in a node.
825 * the node should have enough room for the pointer already
97571fd0 826 *
74123bd7
CM
827 * slot and level indicate where you want the key to go, and
828 * blocknr is the block the key points to.
aa5d6bed
CM
829 *
830 * returns zero on success and < 0 on any error
74123bd7 831 */
e089f05c
CM
832static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
833 *root, struct btrfs_path *path, struct btrfs_disk_key
834 *key, u64 blocknr, int slot, int level)
74123bd7 835{
234b63a0 836 struct btrfs_node *lower;
74123bd7 837 int nritems;
5c680ed6
CM
838
839 BUG_ON(!path->nodes[level]);
e20d96d6 840 lower = btrfs_buffer_node(path->nodes[level]);
7518a238 841 nritems = btrfs_header_nritems(&lower->header);
74123bd7
CM
842 if (slot > nritems)
843 BUG();
123abc88 844 if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
74123bd7
CM
845 BUG();
846 if (slot != nritems) {
d6025579
CM
847 btrfs_memmove(root, lower, lower->ptrs + slot + 1,
848 lower->ptrs + slot,
849 (nritems - slot) * sizeof(struct btrfs_key_ptr));
74123bd7 850 }
d6025579
CM
851 btrfs_memcpy(root, lower, &lower->ptrs[slot].key,
852 key, sizeof(struct btrfs_disk_key));
1d4f8a0c 853 btrfs_set_node_blockptr(lower, slot, blocknr);
7518a238 854 btrfs_set_header_nritems(&lower->header, nritems + 1);
d6025579 855 btrfs_mark_buffer_dirty(path->nodes[level]);
74123bd7
CM
856 return 0;
857}
858
97571fd0
CM
859/*
860 * split the node at the specified level in path in two.
861 * The path is corrected to point to the appropriate node after the split
862 *
863 * Before splitting this tries to make some room in the node by pushing
864 * left and right, if either one works, it returns right away.
aa5d6bed
CM
865 *
866 * returns 0 on success and < 0 on failure
97571fd0 867 */
e089f05c
CM
868static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
869 *root, struct btrfs_path *path, int level)
be0e5c09 870{
e20d96d6 871 struct buffer_head *t;
234b63a0 872 struct btrfs_node *c;
e20d96d6 873 struct buffer_head *split_buffer;
234b63a0 874 struct btrfs_node *split;
be0e5c09 875 int mid;
5c680ed6 876 int ret;
aa5d6bed 877 int wret;
7518a238 878 u32 c_nritems;
eb60ceac 879
5c680ed6 880 t = path->nodes[level];
e20d96d6 881 c = btrfs_buffer_node(t);
5c680ed6
CM
882 if (t == root->node) {
883 /* trying to split the root, lets make a new one */
e089f05c 884 ret = insert_new_root(trans, root, path, level + 1);
5c680ed6
CM
885 if (ret)
886 return ret;
e66f709b
CM
887 } else {
888 ret = push_nodes_for_insert(trans, root, path, level);
889 t = path->nodes[level];
890 c = btrfs_buffer_node(t);
891 if (!ret &&
892 btrfs_header_nritems(&c->header) <
893 BTRFS_NODEPTRS_PER_BLOCK(root) - 1)
894 return 0;
be0e5c09 895 }
e66f709b 896
7518a238 897 c_nritems = btrfs_header_nritems(&c->header);
e089f05c 898 split_buffer = btrfs_alloc_free_block(trans, root);
e20d96d6 899 split = btrfs_buffer_node(split_buffer);
7518a238 900 btrfs_set_header_flags(&split->header, btrfs_header_flags(&c->header));
9a6f11ed 901 btrfs_set_header_level(&split->header, btrfs_header_level(&c->header));
7eccb903 902 btrfs_set_header_blocknr(&split->header, bh_blocknr(split_buffer));
7f5c1516 903 btrfs_set_header_generation(&split->header, trans->transid);
3eb0314d
CM
904 memcpy(split->header.fsid, root->fs_info->disk_super->fsid,
905 sizeof(split->header.fsid));
7518a238 906 mid = (c_nritems + 1) / 2;
d6025579
CM
907 btrfs_memcpy(root, split, split->ptrs, c->ptrs + mid,
908 (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
7518a238
CM
909 btrfs_set_header_nritems(&split->header, c_nritems - mid);
910 btrfs_set_header_nritems(&c->header, mid);
aa5d6bed
CM
911 ret = 0;
912
d6025579
CM
913 btrfs_mark_buffer_dirty(t);
914 btrfs_mark_buffer_dirty(split_buffer);
e089f05c 915 wret = insert_ptr(trans, root, path, &split->ptrs[0].key,
7eccb903 916 bh_blocknr(split_buffer), path->slots[level + 1] + 1,
123abc88 917 level + 1);
aa5d6bed
CM
918 if (wret)
919 ret = wret;
920
5de08d7d 921 if (path->slots[level] >= mid) {
5c680ed6 922 path->slots[level] -= mid;
234b63a0 923 btrfs_block_release(root, t);
5c680ed6
CM
924 path->nodes[level] = split_buffer;
925 path->slots[level + 1] += 1;
926 } else {
234b63a0 927 btrfs_block_release(root, split_buffer);
be0e5c09 928 }
aa5d6bed 929 return ret;
be0e5c09
CM
930}
931
74123bd7
CM
932/*
933 * how many bytes are required to store the items in a leaf. start
934 * and nr indicate which items in the leaf to check. This totals up the
935 * space used both by the item structs and the item data
936 */
234b63a0 937static int leaf_space_used(struct btrfs_leaf *l, int start, int nr)
be0e5c09
CM
938{
939 int data_len;
d4dbff95
CM
940 int nritems = btrfs_header_nritems(&l->header);
941 int end = min(nritems, start + nr) - 1;
be0e5c09
CM
942
943 if (!nr)
944 return 0;
0783fcfc
CM
945 data_len = btrfs_item_end(l->items + start);
946 data_len = data_len - btrfs_item_offset(l->items + end);
947 data_len += sizeof(struct btrfs_item) * nr;
d4dbff95 948 WARN_ON(data_len < 0);
be0e5c09
CM
949 return data_len;
950}
951
d4dbff95
CM
952/*
953 * The space between the end of the leaf items and
954 * the start of the leaf data. IOW, how much room
955 * the leaf has left for both items and data
956 */
957int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf)
958{
959 int nritems = btrfs_header_nritems(&leaf->header);
960 return BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
961}
962
00ec4c51
CM
963/*
964 * push some data in the path leaf to the right, trying to free up at
965 * least data_size bytes. returns zero if the push worked, nonzero otherwise
aa5d6bed
CM
966 *
967 * returns 1 if the push failed because the other node didn't have enough
968 * room, 0 if everything worked out and < 0 if there were major errors.
00ec4c51 969 */
e089f05c
CM
970static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
971 *root, struct btrfs_path *path, int data_size)
00ec4c51 972{
e20d96d6
CM
973 struct buffer_head *left_buf = path->nodes[0];
974 struct btrfs_leaf *left = btrfs_buffer_leaf(left_buf);
234b63a0 975 struct btrfs_leaf *right;
e20d96d6
CM
976 struct buffer_head *right_buf;
977 struct buffer_head *upper;
978 struct btrfs_node *upper_node;
00ec4c51
CM
979 int slot;
980 int i;
981 int free_space;
982 int push_space = 0;
983 int push_items = 0;
0783fcfc 984 struct btrfs_item *item;
7518a238
CM
985 u32 left_nritems;
986 u32 right_nritems;
00ec4c51
CM
987
988 slot = path->slots[1];
989 if (!path->nodes[1]) {
990 return 1;
991 }
992 upper = path->nodes[1];
e20d96d6
CM
993 upper_node = btrfs_buffer_node(upper);
994 if (slot >= btrfs_header_nritems(&upper_node->header) - 1) {
00ec4c51
CM
995 return 1;
996 }
e20d96d6
CM
997 right_buf = read_tree_block(root,
998 btrfs_node_blockptr(btrfs_buffer_node(upper), slot + 1));
999 right = btrfs_buffer_leaf(right_buf);
123abc88 1000 free_space = btrfs_leaf_free_space(root, right);
0783fcfc 1001 if (free_space < data_size + sizeof(struct btrfs_item)) {
234b63a0 1002 btrfs_block_release(root, right_buf);
00ec4c51
CM
1003 return 1;
1004 }
02217ed2 1005 /* cow and double check */
e089f05c 1006 btrfs_cow_block(trans, root, right_buf, upper, slot + 1, &right_buf);
e20d96d6 1007 right = btrfs_buffer_leaf(right_buf);
123abc88 1008 free_space = btrfs_leaf_free_space(root, right);
0783fcfc 1009 if (free_space < data_size + sizeof(struct btrfs_item)) {
234b63a0 1010 btrfs_block_release(root, right_buf);
02217ed2
CM
1011 return 1;
1012 }
1013
7518a238 1014 left_nritems = btrfs_header_nritems(&left->header);
a429e513
CM
1015 if (left_nritems == 0) {
1016 btrfs_block_release(root, right_buf);
1017 return 1;
1018 }
1019 for (i = left_nritems - 1; i >= 1; i--) {
00ec4c51
CM
1020 item = left->items + i;
1021 if (path->slots[0] == i)
1022 push_space += data_size + sizeof(*item);
0783fcfc
CM
1023 if (btrfs_item_size(item) + sizeof(*item) + push_space >
1024 free_space)
00ec4c51
CM
1025 break;
1026 push_items++;
0783fcfc 1027 push_space += btrfs_item_size(item) + sizeof(*item);
00ec4c51
CM
1028 }
1029 if (push_items == 0) {
234b63a0 1030 btrfs_block_release(root, right_buf);
00ec4c51
CM
1031 return 1;
1032 }
a429e513
CM
1033 if (push_items == left_nritems)
1034 WARN_ON(1);
7518a238 1035 right_nritems = btrfs_header_nritems(&right->header);
00ec4c51 1036 /* push left to right */
0783fcfc 1037 push_space = btrfs_item_end(left->items + left_nritems - push_items);
123abc88 1038 push_space -= leaf_data_end(root, left);
00ec4c51 1039 /* make room in the right data area */
d6025579
CM
1040 btrfs_memmove(root, right, btrfs_leaf_data(right) +
1041 leaf_data_end(root, right) - push_space,
1042 btrfs_leaf_data(right) +
1043 leaf_data_end(root, right), BTRFS_LEAF_DATA_SIZE(root) -
1044 leaf_data_end(root, right));
00ec4c51 1045 /* copy from the left data area */
d6025579
CM
1046 btrfs_memcpy(root, right, btrfs_leaf_data(right) +
1047 BTRFS_LEAF_DATA_SIZE(root) - push_space,
1048 btrfs_leaf_data(left) + leaf_data_end(root, left),
1049 push_space);
1050 btrfs_memmove(root, right, right->items + push_items, right->items,
0783fcfc 1051 right_nritems * sizeof(struct btrfs_item));
00ec4c51 1052 /* copy the items from left to right */
d6025579
CM
1053 btrfs_memcpy(root, right, right->items, left->items +
1054 left_nritems - push_items,
1055 push_items * sizeof(struct btrfs_item));
00ec4c51
CM
1056
1057 /* update the item pointers */
7518a238
CM
1058 right_nritems += push_items;
1059 btrfs_set_header_nritems(&right->header, right_nritems);
123abc88 1060 push_space = BTRFS_LEAF_DATA_SIZE(root);
7518a238 1061 for (i = 0; i < right_nritems; i++) {
0783fcfc
CM
1062 btrfs_set_item_offset(right->items + i, push_space -
1063 btrfs_item_size(right->items + i));
1064 push_space = btrfs_item_offset(right->items + i);
00ec4c51 1065 }
7518a238
CM
1066 left_nritems -= push_items;
1067 btrfs_set_header_nritems(&left->header, left_nritems);
00ec4c51 1068
d6025579
CM
1069 btrfs_mark_buffer_dirty(left_buf);
1070 btrfs_mark_buffer_dirty(right_buf);
a429e513 1071
d6025579 1072 btrfs_memcpy(root, upper_node, &upper_node->ptrs[slot + 1].key,
e2fa7227 1073 &right->items[0].key, sizeof(struct btrfs_disk_key));
d6025579 1074 btrfs_mark_buffer_dirty(upper);
02217ed2 1075
00ec4c51 1076 /* then fixup the leaf pointer in the path */
7518a238
CM
1077 if (path->slots[0] >= left_nritems) {
1078 path->slots[0] -= left_nritems;
234b63a0 1079 btrfs_block_release(root, path->nodes[0]);
00ec4c51
CM
1080 path->nodes[0] = right_buf;
1081 path->slots[1] += 1;
1082 } else {
234b63a0 1083 btrfs_block_release(root, right_buf);
00ec4c51
CM
1084 }
1085 return 0;
1086}
74123bd7
CM
1087/*
1088 * push some data in the path leaf to the left, trying to free up at
1089 * least data_size bytes. returns zero if the push worked, nonzero otherwise
1090 */
e089f05c
CM
1091static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
1092 *root, struct btrfs_path *path, int data_size)
be0e5c09 1093{
e20d96d6
CM
1094 struct buffer_head *right_buf = path->nodes[0];
1095 struct btrfs_leaf *right = btrfs_buffer_leaf(right_buf);
1096 struct buffer_head *t;
234b63a0 1097 struct btrfs_leaf *left;
be0e5c09
CM
1098 int slot;
1099 int i;
1100 int free_space;
1101 int push_space = 0;
1102 int push_items = 0;
0783fcfc 1103 struct btrfs_item *item;
7518a238 1104 u32 old_left_nritems;
aa5d6bed
CM
1105 int ret = 0;
1106 int wret;
be0e5c09
CM
1107
1108 slot = path->slots[1];
1109 if (slot == 0) {
1110 return 1;
1111 }
1112 if (!path->nodes[1]) {
1113 return 1;
1114 }
e20d96d6
CM
1115 t = read_tree_block(root,
1116 btrfs_node_blockptr(btrfs_buffer_node(path->nodes[1]), slot - 1));
1117 left = btrfs_buffer_leaf(t);
123abc88 1118 free_space = btrfs_leaf_free_space(root, left);
0783fcfc 1119 if (free_space < data_size + sizeof(struct btrfs_item)) {
234b63a0 1120 btrfs_block_release(root, t);
be0e5c09
CM
1121 return 1;
1122 }
02217ed2
CM
1123
1124 /* cow and double check */
e089f05c 1125 btrfs_cow_block(trans, root, t, path->nodes[1], slot - 1, &t);
e20d96d6 1126 left = btrfs_buffer_leaf(t);
123abc88 1127 free_space = btrfs_leaf_free_space(root, left);
0783fcfc 1128 if (free_space < data_size + sizeof(struct btrfs_item)) {
234b63a0 1129 btrfs_block_release(root, t);
02217ed2
CM
1130 return 1;
1131 }
1132
a429e513
CM
1133 if (btrfs_header_nritems(&right->header) == 0) {
1134 btrfs_block_release(root, t);
1135 return 1;
1136 }
1137
1138 for (i = 0; i < btrfs_header_nritems(&right->header) - 1; i++) {
be0e5c09
CM
1139 item = right->items + i;
1140 if (path->slots[0] == i)
1141 push_space += data_size + sizeof(*item);
0783fcfc
CM
1142 if (btrfs_item_size(item) + sizeof(*item) + push_space >
1143 free_space)
be0e5c09
CM
1144 break;
1145 push_items++;
0783fcfc 1146 push_space += btrfs_item_size(item) + sizeof(*item);
be0e5c09
CM
1147 }
1148 if (push_items == 0) {
234b63a0 1149 btrfs_block_release(root, t);
be0e5c09
CM
1150 return 1;
1151 }
a429e513
CM
1152 if (push_items == btrfs_header_nritems(&right->header))
1153 WARN_ON(1);
be0e5c09 1154 /* push data from right to left */
d6025579
CM
1155 btrfs_memcpy(root, left, left->items +
1156 btrfs_header_nritems(&left->header),
1157 right->items, push_items * sizeof(struct btrfs_item));
123abc88 1158 push_space = BTRFS_LEAF_DATA_SIZE(root) -
0783fcfc 1159 btrfs_item_offset(right->items + push_items -1);
d6025579
CM
1160 btrfs_memcpy(root, left, btrfs_leaf_data(left) +
1161 leaf_data_end(root, left) - push_space,
1162 btrfs_leaf_data(right) +
1163 btrfs_item_offset(right->items + push_items - 1),
1164 push_space);
7518a238 1165 old_left_nritems = btrfs_header_nritems(&left->header);
eb60ceac
CM
1166 BUG_ON(old_left_nritems < 0);
1167
0783fcfc 1168 for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
123abc88
CM
1169 u32 ioff = btrfs_item_offset(left->items + i);
1170 btrfs_set_item_offset(left->items + i, ioff -
1171 (BTRFS_LEAF_DATA_SIZE(root) -
0783fcfc
CM
1172 btrfs_item_offset(left->items +
1173 old_left_nritems - 1)));
be0e5c09 1174 }
7518a238 1175 btrfs_set_header_nritems(&left->header, old_left_nritems + push_items);
be0e5c09
CM
1176
1177 /* fixup right node */
0783fcfc 1178 push_space = btrfs_item_offset(right->items + push_items - 1) -
123abc88 1179 leaf_data_end(root, right);
d6025579
CM
1180 btrfs_memmove(root, right, btrfs_leaf_data(right) +
1181 BTRFS_LEAF_DATA_SIZE(root) - push_space,
1182 btrfs_leaf_data(right) +
1183 leaf_data_end(root, right), push_space);
1184 btrfs_memmove(root, right, right->items, right->items + push_items,
7518a238 1185 (btrfs_header_nritems(&right->header) - push_items) *
0783fcfc 1186 sizeof(struct btrfs_item));
7518a238
CM
1187 btrfs_set_header_nritems(&right->header,
1188 btrfs_header_nritems(&right->header) -
1189 push_items);
123abc88 1190 push_space = BTRFS_LEAF_DATA_SIZE(root);
eb60ceac 1191
7518a238 1192 for (i = 0; i < btrfs_header_nritems(&right->header); i++) {
0783fcfc
CM
1193 btrfs_set_item_offset(right->items + i, push_space -
1194 btrfs_item_size(right->items + i));
1195 push_space = btrfs_item_offset(right->items + i);
be0e5c09 1196 }
eb60ceac 1197
d6025579
CM
1198 btrfs_mark_buffer_dirty(t);
1199 btrfs_mark_buffer_dirty(right_buf);
e089f05c 1200 wret = fixup_low_keys(trans, root, path, &right->items[0].key, 1);
aa5d6bed
CM
1201 if (wret)
1202 ret = wret;
be0e5c09
CM
1203
1204 /* then fixup the leaf pointer in the path */
1205 if (path->slots[0] < push_items) {
1206 path->slots[0] += old_left_nritems;
234b63a0 1207 btrfs_block_release(root, path->nodes[0]);
eb60ceac 1208 path->nodes[0] = t;
be0e5c09
CM
1209 path->slots[1] -= 1;
1210 } else {
234b63a0 1211 btrfs_block_release(root, t);
be0e5c09
CM
1212 path->slots[0] -= push_items;
1213 }
eb60ceac 1214 BUG_ON(path->slots[0] < 0);
aa5d6bed 1215 return ret;
be0e5c09
CM
1216}
1217
74123bd7
CM
1218/*
1219 * split the path's leaf in two, making sure there is at least data_size
1220 * available for the resulting leaf level of the path.
aa5d6bed
CM
1221 *
1222 * returns 0 if all went well and < 0 on failure.
74123bd7 1223 */
e089f05c 1224static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
d4dbff95
CM
1225 *root, struct btrfs_key *ins_key,
1226 struct btrfs_path *path, int data_size)
be0e5c09 1227{
e20d96d6 1228 struct buffer_head *l_buf;
234b63a0 1229 struct btrfs_leaf *l;
7518a238 1230 u32 nritems;
eb60ceac
CM
1231 int mid;
1232 int slot;
234b63a0 1233 struct btrfs_leaf *right;
e20d96d6 1234 struct buffer_head *right_buffer;
0783fcfc 1235 int space_needed = data_size + sizeof(struct btrfs_item);
be0e5c09
CM
1236 int data_copy_size;
1237 int rt_data_off;
1238 int i;
d4dbff95 1239 int ret = 0;
aa5d6bed 1240 int wret;
d4dbff95
CM
1241 int double_split = 0;
1242 struct btrfs_disk_key disk_key;
aa5d6bed 1243
40689478 1244 /* first try to make some room by pushing left and right */
e089f05c 1245 wret = push_leaf_left(trans, root, path, data_size);
eaee50e8
CM
1246 if (wret < 0)
1247 return wret;
1248 if (wret) {
e089f05c 1249 wret = push_leaf_right(trans, root, path, data_size);
eaee50e8
CM
1250 if (wret < 0)
1251 return wret;
1252 }
aa5d6bed 1253 l_buf = path->nodes[0];
e20d96d6 1254 l = btrfs_buffer_leaf(l_buf);
aa5d6bed
CM
1255
1256 /* did the pushes work? */
123abc88
CM
1257 if (btrfs_leaf_free_space(root, l) >=
1258 sizeof(struct btrfs_item) + data_size)
aa5d6bed
CM
1259 return 0;
1260
5c680ed6 1261 if (!path->nodes[1]) {
e089f05c 1262 ret = insert_new_root(trans, root, path, 1);
5c680ed6
CM
1263 if (ret)
1264 return ret;
1265 }
eb60ceac 1266 slot = path->slots[0];
7518a238 1267 nritems = btrfs_header_nritems(&l->header);
eb60ceac 1268 mid = (nritems + 1)/ 2;
e089f05c 1269 right_buffer = btrfs_alloc_free_block(trans, root);
eb60ceac 1270 BUG_ON(!right_buffer);
e20d96d6 1271 right = btrfs_buffer_leaf(right_buffer);
123abc88 1272 memset(&right->header, 0, sizeof(right->header));
7eccb903 1273 btrfs_set_header_blocknr(&right->header, bh_blocknr(right_buffer));
7f5c1516 1274 btrfs_set_header_generation(&right->header, trans->transid);
7518a238 1275 btrfs_set_header_level(&right->header, 0);
3eb0314d
CM
1276 memcpy(right->header.fsid, root->fs_info->disk_super->fsid,
1277 sizeof(right->header.fsid));
d4dbff95
CM
1278 if (mid <= slot) {
1279 if (nritems == 1 ||
1280 leaf_space_used(l, mid, nritems - mid) + space_needed >
1281 BTRFS_LEAF_DATA_SIZE(root)) {
1282 if (slot >= nritems) {
1283 btrfs_cpu_key_to_disk(&disk_key, ins_key);
1284 btrfs_set_header_nritems(&right->header, 0);
1285 wret = insert_ptr(trans, root, path,
1286 &disk_key,
7eccb903 1287 bh_blocknr(right_buffer),
d4dbff95
CM
1288 path->slots[1] + 1, 1);
1289 if (wret)
1290 ret = wret;
1291 btrfs_block_release(root, path->nodes[0]);
1292 path->nodes[0] = right_buffer;
1293 path->slots[0] = 0;
1294 path->slots[1] += 1;
1295 return ret;
1296 }
1297 mid = slot;
1298 double_split = 1;
1299 }
1300 } else {
1301 if (leaf_space_used(l, 0, mid + 1) + space_needed >
1302 BTRFS_LEAF_DATA_SIZE(root)) {
1303 if (slot == 0) {
1304 btrfs_cpu_key_to_disk(&disk_key, ins_key);
1305 btrfs_set_header_nritems(&right->header, 0);
1306 wret = insert_ptr(trans, root, path,
1307 &disk_key,
7eccb903 1308 bh_blocknr(right_buffer),
d4dbff95
CM
1309 path->slots[1] - 1, 1);
1310 if (wret)
1311 ret = wret;
1312 btrfs_block_release(root, path->nodes[0]);
1313 path->nodes[0] = right_buffer;
1314 path->slots[0] = 0;
1315 path->slots[1] -= 1;
a429e513
CM
1316 if (path->slots[1] == 0) {
1317 wret = fixup_low_keys(trans, root,
1318 path, &disk_key, 1);
1319 if (wret)
1320 ret = wret;
1321 }
d4dbff95
CM
1322 return ret;
1323 }
1324 mid = slot;
1325 double_split = 1;
1326 }
1327 }
1328 btrfs_set_header_nritems(&right->header, nritems - mid);
123abc88
CM
1329 data_copy_size = btrfs_item_end(l->items + mid) -
1330 leaf_data_end(root, l);
d6025579
CM
1331 btrfs_memcpy(root, right, right->items, l->items + mid,
1332 (nritems - mid) * sizeof(struct btrfs_item));
1333 btrfs_memcpy(root, right,
1334 btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
1335 data_copy_size, btrfs_leaf_data(l) +
1336 leaf_data_end(root, l), data_copy_size);
123abc88
CM
1337 rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
1338 btrfs_item_end(l->items + mid);
74123bd7 1339
0783fcfc 1340 for (i = 0; i < btrfs_header_nritems(&right->header); i++) {
123abc88 1341 u32 ioff = btrfs_item_offset(right->items + i);
0783fcfc
CM
1342 btrfs_set_item_offset(right->items + i, ioff + rt_data_off);
1343 }
74123bd7 1344
7518a238 1345 btrfs_set_header_nritems(&l->header, mid);
aa5d6bed 1346 ret = 0;
e089f05c 1347 wret = insert_ptr(trans, root, path, &right->items[0].key,
7eccb903 1348 bh_blocknr(right_buffer), path->slots[1] + 1, 1);
aa5d6bed
CM
1349 if (wret)
1350 ret = wret;
d6025579
CM
1351 btrfs_mark_buffer_dirty(right_buffer);
1352 btrfs_mark_buffer_dirty(l_buf);
eb60ceac 1353 BUG_ON(path->slots[0] != slot);
be0e5c09 1354 if (mid <= slot) {
234b63a0 1355 btrfs_block_release(root, path->nodes[0]);
eb60ceac 1356 path->nodes[0] = right_buffer;
be0e5c09
CM
1357 path->slots[0] -= mid;
1358 path->slots[1] += 1;
eb60ceac 1359 } else
234b63a0 1360 btrfs_block_release(root, right_buffer);
eb60ceac 1361 BUG_ON(path->slots[0] < 0);
d4dbff95
CM
1362
1363 if (!double_split)
1364 return ret;
1365 right_buffer = btrfs_alloc_free_block(trans, root);
1366 BUG_ON(!right_buffer);
1367 right = btrfs_buffer_leaf(right_buffer);
1368 memset(&right->header, 0, sizeof(right->header));
7eccb903 1369 btrfs_set_header_blocknr(&right->header, bh_blocknr(right_buffer));
d4dbff95
CM
1370 btrfs_set_header_generation(&right->header, trans->transid);
1371 btrfs_set_header_level(&right->header, 0);
3eb0314d
CM
1372 memcpy(right->header.fsid, root->fs_info->disk_super->fsid,
1373 sizeof(right->header.fsid));
d4dbff95
CM
1374 btrfs_cpu_key_to_disk(&disk_key, ins_key);
1375 btrfs_set_header_nritems(&right->header, 0);
1376 wret = insert_ptr(trans, root, path,
1377 &disk_key,
7eccb903 1378 bh_blocknr(right_buffer),
d4dbff95
CM
1379 path->slots[1], 1);
1380 if (wret)
1381 ret = wret;
a429e513
CM
1382 if (path->slots[1] == 0) {
1383 wret = fixup_low_keys(trans, root, path, &disk_key, 1);
1384 if (wret)
1385 ret = wret;
1386 }
d4dbff95
CM
1387 btrfs_block_release(root, path->nodes[0]);
1388 path->nodes[0] = right_buffer;
1389 path->slots[0] = 0;
1390 check_node(root, path, 1);
1391 check_leaf(root, path, 0);
be0e5c09
CM
1392 return ret;
1393}
1394
b18c6685
CM
1395int btrfs_truncate_item(struct btrfs_trans_handle *trans,
1396 struct btrfs_root *root,
1397 struct btrfs_path *path,
1398 u32 new_size)
1399{
1400 int ret = 0;
1401 int slot;
1402 int slot_orig;
1403 struct btrfs_leaf *leaf;
1404 struct buffer_head *leaf_buf;
1405 u32 nritems;
1406 unsigned int data_end;
1407 unsigned int old_data_start;
1408 unsigned int old_size;
1409 unsigned int size_diff;
1410 int i;
1411
1412 slot_orig = path->slots[0];
1413 leaf_buf = path->nodes[0];
1414 leaf = btrfs_buffer_leaf(leaf_buf);
1415
1416 nritems = btrfs_header_nritems(&leaf->header);
1417 data_end = leaf_data_end(root, leaf);
1418
1419 slot = path->slots[0];
1420 old_data_start = btrfs_item_offset(leaf->items + slot);
1421 old_size = btrfs_item_size(leaf->items + slot);
1422 BUG_ON(old_size <= new_size);
1423 size_diff = old_size - new_size;
1424
1425 BUG_ON(slot < 0);
1426 BUG_ON(slot >= nritems);
1427
1428 /*
1429 * item0..itemN ... dataN.offset..dataN.size .. data0.size
1430 */
1431 /* first correct the data pointers */
1432 for (i = slot; i < nritems; i++) {
1433 u32 ioff = btrfs_item_offset(leaf->items + i);
1434 btrfs_set_item_offset(leaf->items + i,
1435 ioff + size_diff);
1436 }
1437 /* shift the data */
b18c6685
CM
1438 btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) +
1439 data_end + size_diff, btrfs_leaf_data(leaf) +
1440 data_end, old_data_start + new_size - data_end);
1441 btrfs_set_item_size(leaf->items + slot, new_size);
1442 btrfs_mark_buffer_dirty(leaf_buf);
1443
1444 ret = 0;
1445 if (btrfs_leaf_free_space(root, leaf) < 0)
1446 BUG();
1447 check_leaf(root, path, 0);
1448 return ret;
1449}
1450
6567e837
CM
1451int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root
1452 *root, struct btrfs_path *path, u32 data_size)
1453{
1454 int ret = 0;
1455 int slot;
1456 int slot_orig;
1457 struct btrfs_leaf *leaf;
1458 struct buffer_head *leaf_buf;
1459 u32 nritems;
1460 unsigned int data_end;
1461 unsigned int old_data;
1462 unsigned int old_size;
1463 int i;
1464
1465 slot_orig = path->slots[0];
1466 leaf_buf = path->nodes[0];
1467 leaf = btrfs_buffer_leaf(leaf_buf);
1468
1469 nritems = btrfs_header_nritems(&leaf->header);
1470 data_end = leaf_data_end(root, leaf);
1471
1472 if (btrfs_leaf_free_space(root, leaf) < data_size)
1473 BUG();
1474 slot = path->slots[0];
1475 old_data = btrfs_item_end(leaf->items + slot);
1476
1477 BUG_ON(slot < 0);
1478 BUG_ON(slot >= nritems);
1479
1480 /*
1481 * item0..itemN ... dataN.offset..dataN.size .. data0.size
1482 */
1483 /* first correct the data pointers */
1484 for (i = slot; i < nritems; i++) {
1485 u32 ioff = btrfs_item_offset(leaf->items + i);
1486 btrfs_set_item_offset(leaf->items + i,
1487 ioff - data_size);
1488 }
1489 /* shift the data */
1490 btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) +
1491 data_end - data_size, btrfs_leaf_data(leaf) +
1492 data_end, old_data - data_end);
1493 data_end = old_data;
1494 old_size = btrfs_item_size(leaf->items + slot);
1495 btrfs_set_item_size(leaf->items + slot, old_size + data_size);
1496 btrfs_mark_buffer_dirty(leaf_buf);
1497
1498 ret = 0;
1499 if (btrfs_leaf_free_space(root, leaf) < 0)
1500 BUG();
1501 check_leaf(root, path, 0);
1502 return ret;
1503}
1504
74123bd7
CM
1505/*
1506 * Given a key and some data, insert an item into the tree.
1507 * This does all the path init required, making room in the tree if needed.
1508 */
e089f05c
CM
1509int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root
1510 *root, struct btrfs_path *path, struct btrfs_key
1511 *cpu_key, u32 data_size)
be0e5c09 1512{
aa5d6bed 1513 int ret = 0;
be0e5c09 1514 int slot;
eb60ceac 1515 int slot_orig;
234b63a0 1516 struct btrfs_leaf *leaf;
e20d96d6 1517 struct buffer_head *leaf_buf;
7518a238 1518 u32 nritems;
be0e5c09 1519 unsigned int data_end;
e2fa7227
CM
1520 struct btrfs_disk_key disk_key;
1521
1522 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
be0e5c09 1523
74123bd7 1524 /* create a root if there isn't one */
5c680ed6 1525 if (!root->node)
cfaa7295 1526 BUG();
e089f05c 1527 ret = btrfs_search_slot(trans, root, cpu_key, path, data_size, 1);
eb60ceac 1528 if (ret == 0) {
f0930a37 1529 return -EEXIST;
aa5d6bed 1530 }
ed2ff2cb
CM
1531 if (ret < 0)
1532 goto out;
be0e5c09 1533
62e2749e
CM
1534 slot_orig = path->slots[0];
1535 leaf_buf = path->nodes[0];
e20d96d6 1536 leaf = btrfs_buffer_leaf(leaf_buf);
74123bd7 1537
7518a238 1538 nritems = btrfs_header_nritems(&leaf->header);
123abc88 1539 data_end = leaf_data_end(root, leaf);
eb60ceac 1540
123abc88 1541 if (btrfs_leaf_free_space(root, leaf) <
d4dbff95 1542 sizeof(struct btrfs_item) + data_size) {
be0e5c09 1543 BUG();
d4dbff95 1544 }
62e2749e 1545 slot = path->slots[0];
eb60ceac 1546 BUG_ON(slot < 0);
be0e5c09
CM
1547 if (slot != nritems) {
1548 int i;
0783fcfc 1549 unsigned int old_data = btrfs_item_end(leaf->items + slot);
be0e5c09
CM
1550
1551 /*
1552 * item0..itemN ... dataN.offset..dataN.size .. data0.size
1553 */
1554 /* first correct the data pointers */
0783fcfc 1555 for (i = slot; i < nritems; i++) {
123abc88 1556 u32 ioff = btrfs_item_offset(leaf->items + i);
0783fcfc
CM
1557 btrfs_set_item_offset(leaf->items + i,
1558 ioff - data_size);
1559 }
be0e5c09
CM
1560
1561 /* shift the items */
d6025579
CM
1562 btrfs_memmove(root, leaf, leaf->items + slot + 1,
1563 leaf->items + slot,
1564 (nritems - slot) * sizeof(struct btrfs_item));
be0e5c09
CM
1565
1566 /* shift the data */
d6025579
CM
1567 btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) +
1568 data_end - data_size, btrfs_leaf_data(leaf) +
1569 data_end, old_data - data_end);
be0e5c09
CM
1570 data_end = old_data;
1571 }
62e2749e 1572 /* setup the item for the new data */
d6025579
CM
1573 btrfs_memcpy(root, leaf, &leaf->items[slot].key, &disk_key,
1574 sizeof(struct btrfs_disk_key));
0783fcfc
CM
1575 btrfs_set_item_offset(leaf->items + slot, data_end - data_size);
1576 btrfs_set_item_size(leaf->items + slot, data_size);
7518a238 1577 btrfs_set_header_nritems(&leaf->header, nritems + 1);
d6025579 1578 btrfs_mark_buffer_dirty(leaf_buf);
aa5d6bed
CM
1579
1580 ret = 0;
8e19f2cd 1581 if (slot == 0)
e089f05c 1582 ret = fixup_low_keys(trans, root, path, &disk_key, 1);
aa5d6bed 1583
123abc88 1584 if (btrfs_leaf_free_space(root, leaf) < 0)
be0e5c09 1585 BUG();
62e2749e 1586 check_leaf(root, path, 0);
ed2ff2cb 1587out:
62e2749e
CM
1588 return ret;
1589}
1590
1591/*
1592 * Given a key and some data, insert an item into the tree.
1593 * This does all the path init required, making room in the tree if needed.
1594 */
e089f05c
CM
1595int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
1596 *root, struct btrfs_key *cpu_key, void *data, u32
1597 data_size)
62e2749e
CM
1598{
1599 int ret = 0;
2c90e5d6 1600 struct btrfs_path *path;
62e2749e
CM
1601 u8 *ptr;
1602
2c90e5d6
CM
1603 path = btrfs_alloc_path();
1604 BUG_ON(!path);
1605 btrfs_init_path(path);
1606 ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
62e2749e 1607 if (!ret) {
2c90e5d6
CM
1608 ptr = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
1609 path->slots[0], u8);
1610 btrfs_memcpy(root, path->nodes[0]->b_data,
d6025579 1611 ptr, data, data_size);
2c90e5d6 1612 btrfs_mark_buffer_dirty(path->nodes[0]);
62e2749e 1613 }
2c90e5d6
CM
1614 btrfs_release_path(root, path);
1615 btrfs_free_path(path);
aa5d6bed 1616 return ret;
be0e5c09
CM
1617}
1618
74123bd7 1619/*
5de08d7d 1620 * delete the pointer from a given node.
74123bd7
CM
1621 *
1622 * If the delete empties a node, the node is removed from the tree,
1623 * continuing all the way the root if required. The root is converted into
1624 * a leaf if all the nodes are emptied.
1625 */
e089f05c
CM
1626static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1627 struct btrfs_path *path, int level, int slot)
be0e5c09 1628{
234b63a0 1629 struct btrfs_node *node;
e20d96d6 1630 struct buffer_head *parent = path->nodes[level];
7518a238 1631 u32 nritems;
aa5d6bed 1632 int ret = 0;
bb803951 1633 int wret;
be0e5c09 1634
e20d96d6 1635 node = btrfs_buffer_node(parent);
7518a238 1636 nritems = btrfs_header_nritems(&node->header);
bb803951 1637 if (slot != nritems -1) {
d6025579
CM
1638 btrfs_memmove(root, node, node->ptrs + slot,
1639 node->ptrs + slot + 1,
1640 sizeof(struct btrfs_key_ptr) *
1641 (nritems - slot - 1));
bb803951 1642 }
7518a238
CM
1643 nritems--;
1644 btrfs_set_header_nritems(&node->header, nritems);
1645 if (nritems == 0 && parent == root->node) {
e20d96d6
CM
1646 struct btrfs_header *header = btrfs_buffer_header(root->node);
1647 BUG_ON(btrfs_header_level(header) != 1);
bb803951 1648 /* just turn the root into a leaf and break */
e20d96d6 1649 btrfs_set_header_level(header, 0);
bb803951 1650 } else if (slot == 0) {
e089f05c 1651 wret = fixup_low_keys(trans, root, path, &node->ptrs[0].key,
123abc88 1652 level + 1);
0f70abe2
CM
1653 if (wret)
1654 ret = wret;
be0e5c09 1655 }
d6025579 1656 btrfs_mark_buffer_dirty(parent);
aa5d6bed 1657 return ret;
be0e5c09
CM
1658}
1659
74123bd7
CM
1660/*
1661 * delete the item at the leaf level in path. If that empties
1662 * the leaf, remove it from the tree
1663 */
e089f05c
CM
1664int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1665 struct btrfs_path *path)
be0e5c09 1666{
be0e5c09 1667 int slot;
234b63a0 1668 struct btrfs_leaf *leaf;
e20d96d6 1669 struct buffer_head *leaf_buf;
be0e5c09
CM
1670 int doff;
1671 int dsize;
aa5d6bed
CM
1672 int ret = 0;
1673 int wret;
7518a238 1674 u32 nritems;
be0e5c09 1675
eb60ceac 1676 leaf_buf = path->nodes[0];
e20d96d6 1677 leaf = btrfs_buffer_leaf(leaf_buf);
4920c9ac 1678 slot = path->slots[0];
0783fcfc
CM
1679 doff = btrfs_item_offset(leaf->items + slot);
1680 dsize = btrfs_item_size(leaf->items + slot);
7518a238 1681 nritems = btrfs_header_nritems(&leaf->header);
be0e5c09 1682
7518a238 1683 if (slot != nritems - 1) {
be0e5c09 1684 int i;
123abc88 1685 int data_end = leaf_data_end(root, leaf);
d6025579
CM
1686 btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) +
1687 data_end + dsize,
1688 btrfs_leaf_data(leaf) + data_end,
1689 doff - data_end);
0783fcfc 1690 for (i = slot + 1; i < nritems; i++) {
123abc88 1691 u32 ioff = btrfs_item_offset(leaf->items + i);
0783fcfc
CM
1692 btrfs_set_item_offset(leaf->items + i, ioff + dsize);
1693 }
d6025579
CM
1694 btrfs_memmove(root, leaf, leaf->items + slot,
1695 leaf->items + slot + 1,
1696 sizeof(struct btrfs_item) *
1697 (nritems - slot - 1));
be0e5c09 1698 }
7518a238
CM
1699 btrfs_set_header_nritems(&leaf->header, nritems - 1);
1700 nritems--;
74123bd7 1701 /* delete the leaf if we've emptied it */
7518a238 1702 if (nritems == 0) {
eb60ceac 1703 if (leaf_buf == root->node) {
7518a238 1704 btrfs_set_header_level(&leaf->header, 0);
9a8dd150 1705 } else {
e089f05c 1706 clean_tree_block(trans, root, leaf_buf);
d6025579 1707 wait_on_buffer(leaf_buf);
e089f05c 1708 wret = del_ptr(trans, root, path, 1, path->slots[1]);
aa5d6bed
CM
1709 if (wret)
1710 ret = wret;
e089f05c 1711 wret = btrfs_free_extent(trans, root,
7eccb903 1712 bh_blocknr(leaf_buf), 1, 1);
0f70abe2
CM
1713 if (wret)
1714 ret = wret;
9a8dd150 1715 }
be0e5c09 1716 } else {
7518a238 1717 int used = leaf_space_used(leaf, 0, nritems);
aa5d6bed 1718 if (slot == 0) {
e089f05c
CM
1719 wret = fixup_low_keys(trans, root, path,
1720 &leaf->items[0].key, 1);
aa5d6bed
CM
1721 if (wret)
1722 ret = wret;
1723 }
aa5d6bed 1724
74123bd7 1725 /* delete the leaf if it is mostly empty */
123abc88 1726 if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) {
be0e5c09
CM
1727 /* push_leaf_left fixes the path.
1728 * make sure the path still points to our leaf
1729 * for possible call to del_ptr below
1730 */
4920c9ac 1731 slot = path->slots[1];
e20d96d6 1732 get_bh(leaf_buf);
e089f05c 1733 wret = push_leaf_left(trans, root, path, 1);
aa5d6bed
CM
1734 if (wret < 0)
1735 ret = wret;
f0930a37 1736 if (path->nodes[0] == leaf_buf &&
7518a238 1737 btrfs_header_nritems(&leaf->header)) {
e089f05c 1738 wret = push_leaf_right(trans, root, path, 1);
aa5d6bed
CM
1739 if (wret < 0)
1740 ret = wret;
1741 }
7518a238 1742 if (btrfs_header_nritems(&leaf->header) == 0) {
7eccb903 1743 u64 blocknr = bh_blocknr(leaf_buf);
e089f05c 1744 clean_tree_block(trans, root, leaf_buf);
d6025579 1745 wait_on_buffer(leaf_buf);
e089f05c 1746 wret = del_ptr(trans, root, path, 1, slot);
aa5d6bed
CM
1747 if (wret)
1748 ret = wret;
234b63a0 1749 btrfs_block_release(root, leaf_buf);
e089f05c
CM
1750 wret = btrfs_free_extent(trans, root, blocknr,
1751 1, 1);
0f70abe2
CM
1752 if (wret)
1753 ret = wret;
5de08d7d 1754 } else {
d6025579 1755 btrfs_mark_buffer_dirty(leaf_buf);
234b63a0 1756 btrfs_block_release(root, leaf_buf);
be0e5c09 1757 }
d5719762 1758 } else {
d6025579 1759 btrfs_mark_buffer_dirty(leaf_buf);
be0e5c09
CM
1760 }
1761 }
aa5d6bed 1762 return ret;
be0e5c09
CM
1763}
1764
97571fd0
CM
1765/*
1766 * walk up the tree as far as required to find the next leaf.
0f70abe2
CM
1767 * returns 0 if it found something or 1 if there are no greater leaves.
1768 * returns < 0 on io errors.
97571fd0 1769 */
234b63a0 1770int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
d97e63b6
CM
1771{
1772 int slot;
1773 int level = 1;
1774 u64 blocknr;
e20d96d6
CM
1775 struct buffer_head *c;
1776 struct btrfs_node *c_node;
1777 struct buffer_head *next = NULL;
d97e63b6 1778
234b63a0 1779 while(level < BTRFS_MAX_LEVEL) {
d97e63b6 1780 if (!path->nodes[level])
0f70abe2 1781 return 1;
d97e63b6
CM
1782 slot = path->slots[level] + 1;
1783 c = path->nodes[level];
e20d96d6
CM
1784 c_node = btrfs_buffer_node(c);
1785 if (slot >= btrfs_header_nritems(&c_node->header)) {
d97e63b6
CM
1786 level++;
1787 continue;
1788 }
e20d96d6 1789 blocknr = btrfs_node_blockptr(c_node, slot);
cfaa7295 1790 if (next)
234b63a0 1791 btrfs_block_release(root, next);
d97e63b6
CM
1792 next = read_tree_block(root, blocknr);
1793 break;
1794 }
1795 path->slots[level] = slot;
1796 while(1) {
1797 level--;
1798 c = path->nodes[level];
234b63a0 1799 btrfs_block_release(root, c);
d97e63b6
CM
1800 path->nodes[level] = next;
1801 path->slots[level] = 0;
1802 if (!level)
1803 break;
1d4f8a0c 1804 next = read_tree_block(root,
e20d96d6 1805 btrfs_node_blockptr(btrfs_buffer_node(next), 0));
d97e63b6
CM
1806 }
1807 return 0;
1808}