]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - fs/btrfs/ctree.c
Btrfs: switch to early splits
[mirror_ubuntu-jammy-kernel.git] / fs / btrfs / ctree.c
CommitLineData
be0e5c09
CM
1#include <stdio.h>
2#include <stdlib.h>
3#include "kerncompat.h"
eb60ceac
CM
4#include "radix-tree.h"
5#include "ctree.h"
6#include "disk-io.h"
be0e5c09 7
5c680ed6
CM
8#define SEARCH_READ 0
9#define SEARCH_WRITE 1
10
d97e63b6 11static int refill_alloc_extent(struct ctree_root *root);
5c680ed6
CM
12int split_node(struct ctree_root *root, struct ctree_path *path, int level);
13int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size);
d97e63b6 14
be0e5c09
CM
15static inline void init_path(struct ctree_path *p)
16{
17 memset(p, 0, sizeof(*p));
18}
19
eb60ceac
CM
20static void release_path(struct ctree_root *root, struct ctree_path *p)
21{
22 int i;
23 for (i = 0; i < MAX_LEVEL; i++) {
24 if (!p->nodes[i])
25 break;
26 tree_block_release(root, p->nodes[i]);
27 }
28}
29
74123bd7
CM
30/*
31 * The leaf data grows from end-to-front in the node.
32 * this returns the address of the start of the last item,
33 * which is the stop of the leaf data stack
34 */
be0e5c09
CM
35static inline unsigned int leaf_data_end(struct leaf *leaf)
36{
37 unsigned int nr = leaf->header.nritems;
38 if (nr == 0)
d97e63b6 39 return sizeof(leaf->data);
be0e5c09
CM
40 return leaf->items[nr-1].offset;
41}
42
74123bd7
CM
43/*
44 * The space between the end of the leaf items and
45 * the start of the leaf data. IOW, how much room
46 * the leaf has left for both items and data
47 */
be0e5c09
CM
48static inline int leaf_free_space(struct leaf *leaf)
49{
50 int data_end = leaf_data_end(leaf);
51 int nritems = leaf->header.nritems;
52 char *items_end = (char *)(leaf->items + nritems + 1);
53 return (char *)(leaf->data + data_end) - (char *)items_end;
54}
55
74123bd7
CM
56/*
57 * compare two keys in a memcmp fashion
58 */
be0e5c09
CM
59int comp_keys(struct key *k1, struct key *k2)
60{
61 if (k1->objectid > k2->objectid)
62 return 1;
63 if (k1->objectid < k2->objectid)
64 return -1;
65 if (k1->flags > k2->flags)
66 return 1;
67 if (k1->flags < k2->flags)
68 return -1;
69 if (k1->offset > k2->offset)
70 return 1;
71 if (k1->offset < k2->offset)
72 return -1;
73 return 0;
74}
74123bd7
CM
75
76/*
77 * search for key in the array p. items p are item_size apart
78 * and there are 'max' items in p
79 * the slot in the array is returned via slot, and it points to
80 * the place where you would insert key if it is not found in
81 * the array.
82 *
83 * slot may point to max if the key is bigger than all of the keys
84 */
be0e5c09
CM
85int generic_bin_search(char *p, int item_size, struct key *key,
86 int max, int *slot)
87{
88 int low = 0;
89 int high = max;
90 int mid;
91 int ret;
92 struct key *tmp;
93
94 while(low < high) {
95 mid = (low + high) / 2;
96 tmp = (struct key *)(p + mid * item_size);
97 ret = comp_keys(tmp, key);
98
99 if (ret < 0)
100 low = mid + 1;
101 else if (ret > 0)
102 high = mid;
103 else {
104 *slot = mid;
105 return 0;
106 }
107 }
108 *slot = low;
109 return 1;
110}
111
112int bin_search(struct node *c, struct key *key, int *slot)
113{
114 if (is_leaf(c->header.flags)) {
115 struct leaf *l = (struct leaf *)c;
116 return generic_bin_search((void *)l->items, sizeof(struct item),
117 key, c->header.nritems, slot);
118 } else {
119 return generic_bin_search((void *)c->keys, sizeof(struct key),
120 key, c->header.nritems, slot);
121 }
122 return -1;
123}
124
74123bd7
CM
125/*
126 * look for key in the tree. path is filled in with nodes along the way
127 * if key is found, we return zero and you can find the item in the leaf
128 * level of the path (level 0)
129 *
130 * If the key isn't found, the path points to the slot where it should
131 * be inserted.
132 */
5c680ed6 133int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p, int ins_len)
be0e5c09 134{
eb60ceac
CM
135 struct tree_buffer *b = root->node;
136 struct node *c;
be0e5c09
CM
137 int slot;
138 int ret;
139 int level;
5c680ed6 140
eb60ceac
CM
141 b->count++;
142 while (b) {
143 c = &b->node;
be0e5c09 144 level = node_level(c->header.flags);
eb60ceac 145 p->nodes[level] = b;
be0e5c09
CM
146 ret = bin_search(c, key, &slot);
147 if (!is_leaf(c->header.flags)) {
148 if (ret && slot > 0)
149 slot -= 1;
150 p->slots[level] = slot;
5c680ed6
CM
151 if (ins_len && c->header.nritems == NODEPTRS_PER_BLOCK) {
152 int sret = split_node(root, p, level);
153 BUG_ON(sret > 0);
154 if (sret)
155 return sret;
156 b = p->nodes[level];
157 c = &b->node;
158 slot = p->slots[level];
159 }
eb60ceac 160 b = read_tree_block(root, c->blockptrs[slot]);
be0e5c09
CM
161 continue;
162 } else {
5c680ed6 163 struct leaf *l = (struct leaf *)c;
be0e5c09 164 p->slots[level] = slot;
5c680ed6
CM
165 if (ins_len && leaf_free_space(l) < sizeof(struct item) + ins_len) {
166 int sret = split_leaf(root, p, ins_len);
167 BUG_ON(sret > 0);
168 if (sret)
169 return sret;
170 }
be0e5c09
CM
171 return ret;
172 }
173 }
174 return -1;
175}
176
74123bd7
CM
177/*
178 * adjust the pointers going up the tree, starting at level
179 * making sure the right key of each node is points to 'key'.
180 * This is used after shifting pointers to the left, so it stops
181 * fixing up pointers when a given leaf/node is not in slot 0 of the
182 * higher levels
183 */
eb60ceac
CM
184static void fixup_low_keys(struct ctree_root *root,
185 struct ctree_path *path, struct key *key,
186 int level)
be0e5c09
CM
187{
188 int i;
be0e5c09 189 for (i = level; i < MAX_LEVEL; i++) {
eb60ceac 190 struct node *t;
be0e5c09 191 int tslot = path->slots[i];
eb60ceac 192 if (!path->nodes[i])
be0e5c09 193 break;
eb60ceac 194 t = &path->nodes[i]->node;
be0e5c09 195 memcpy(t->keys + tslot, key, sizeof(*key));
eb60ceac 196 write_tree_block(root, path->nodes[i]);
be0e5c09
CM
197 if (tslot != 0)
198 break;
199 }
200}
201
74123bd7
CM
202/*
203 * try to push data from one node into the next node left in the
204 * tree. The src node is found at specified level in the path.
205 * If some bytes were pushed, return 0, otherwise return 1.
206 *
207 * Lower nodes/leaves in the path are not touched, higher nodes may
208 * be modified to reflect the push.
209 *
210 * The path is altered to reflect the push.
211 */
be0e5c09
CM
212int push_node_left(struct ctree_root *root, struct ctree_path *path, int level)
213{
214 int slot;
215 struct node *left;
216 struct node *right;
217 int push_items = 0;
218 int left_nritems;
219 int right_nritems;
eb60ceac
CM
220 struct tree_buffer *t;
221 struct tree_buffer *right_buf;
be0e5c09
CM
222
223 if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0)
224 return 1;
225 slot = path->slots[level + 1];
226 if (slot == 0)
227 return 1;
228
eb60ceac
CM
229 t = read_tree_block(root,
230 path->nodes[level + 1]->node.blockptrs[slot - 1]);
231 left = &t->node;
232 right_buf = path->nodes[level];
233 right = &right_buf->node;
be0e5c09
CM
234 left_nritems = left->header.nritems;
235 right_nritems = right->header.nritems;
236 push_items = NODEPTRS_PER_BLOCK - (left_nritems + 1);
eb60ceac
CM
237 if (push_items <= 0) {
238 tree_block_release(root, t);
be0e5c09 239 return 1;
eb60ceac 240 }
be0e5c09
CM
241
242 if (right_nritems < push_items)
243 push_items = right_nritems;
244 memcpy(left->keys + left_nritems, right->keys,
245 push_items * sizeof(struct key));
246 memcpy(left->blockptrs + left_nritems, right->blockptrs,
247 push_items * sizeof(u64));
248 memmove(right->keys, right->keys + push_items,
249 (right_nritems - push_items) * sizeof(struct key));
250 memmove(right->blockptrs, right->blockptrs + push_items,
251 (right_nritems - push_items) * sizeof(u64));
252 right->header.nritems -= push_items;
253 left->header.nritems += push_items;
254
255 /* adjust the pointers going up the tree */
eb60ceac
CM
256 fixup_low_keys(root, path, right->keys, level + 1);
257
258 write_tree_block(root, t);
259 write_tree_block(root, right_buf);
be0e5c09
CM
260
261 /* then fixup the leaf pointer in the path */
262 if (path->slots[level] < push_items) {
263 path->slots[level] += left_nritems;
eb60ceac
CM
264 tree_block_release(root, path->nodes[level]);
265 path->nodes[level] = t;
be0e5c09
CM
266 path->slots[level + 1] -= 1;
267 } else {
268 path->slots[level] -= push_items;
eb60ceac 269 tree_block_release(root, t);
be0e5c09
CM
270 }
271 return 0;
272}
273
74123bd7
CM
274/*
275 * try to push data from one node into the next node right in the
276 * tree. The src node is found at specified level in the path.
277 * If some bytes were pushed, return 0, otherwise return 1.
278 *
279 * Lower nodes/leaves in the path are not touched, higher nodes may
280 * be modified to reflect the push.
281 *
282 * The path is altered to reflect the push.
283 */
be0e5c09
CM
284int push_node_right(struct ctree_root *root, struct ctree_path *path, int level)
285{
286 int slot;
eb60ceac
CM
287 struct tree_buffer *t;
288 struct tree_buffer *src_buffer;
be0e5c09
CM
289 struct node *dst;
290 struct node *src;
291 int push_items = 0;
292 int dst_nritems;
293 int src_nritems;
294
74123bd7 295 /* can't push from the root */
be0e5c09
CM
296 if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0)
297 return 1;
74123bd7
CM
298
299 /* only try to push inside the node higher up */
be0e5c09
CM
300 slot = path->slots[level + 1];
301 if (slot == NODEPTRS_PER_BLOCK - 1)
302 return 1;
303
eb60ceac 304 if (slot >= path->nodes[level + 1]->node.header.nritems -1)
be0e5c09
CM
305 return 1;
306
eb60ceac
CM
307 t = read_tree_block(root,
308 path->nodes[level + 1]->node.blockptrs[slot + 1]);
309 dst = &t->node;
310 src_buffer = path->nodes[level];
311 src = &src_buffer->node;
be0e5c09
CM
312 dst_nritems = dst->header.nritems;
313 src_nritems = src->header.nritems;
314 push_items = NODEPTRS_PER_BLOCK - (dst_nritems + 1);
eb60ceac
CM
315 if (push_items <= 0) {
316 tree_block_release(root, t);
be0e5c09 317 return 1;
eb60ceac 318 }
be0e5c09
CM
319
320 if (src_nritems < push_items)
321 push_items = src_nritems;
322 memmove(dst->keys + push_items, dst->keys,
323 dst_nritems * sizeof(struct key));
324 memcpy(dst->keys, src->keys + src_nritems - push_items,
325 push_items * sizeof(struct key));
326
327 memmove(dst->blockptrs + push_items, dst->blockptrs,
328 dst_nritems * sizeof(u64));
329 memcpy(dst->blockptrs, src->blockptrs + src_nritems - push_items,
330 push_items * sizeof(u64));
331
332 src->header.nritems -= push_items;
333 dst->header.nritems += push_items;
334
335 /* adjust the pointers going up the tree */
eb60ceac 336 memcpy(path->nodes[level + 1]->node.keys + path->slots[level + 1] + 1,
be0e5c09 337 dst->keys, sizeof(struct key));
eb60ceac
CM
338
339 write_tree_block(root, path->nodes[level + 1]);
340 write_tree_block(root, t);
341 write_tree_block(root, src_buffer);
342
74123bd7 343 /* then fixup the pointers in the path */
be0e5c09
CM
344 if (path->slots[level] >= src->header.nritems) {
345 path->slots[level] -= src->header.nritems;
eb60ceac
CM
346 tree_block_release(root, path->nodes[level]);
347 path->nodes[level] = t;
be0e5c09 348 path->slots[level + 1] += 1;
eb60ceac
CM
349 } else {
350 tree_block_release(root, t);
be0e5c09
CM
351 }
352 return 0;
353}
354
5c680ed6
CM
355static int insert_new_root(struct ctree_root *root, struct ctree_path *path, int level)
356{
357 struct tree_buffer *t;
358 struct node *lower;
359 struct node *c;
360 struct key *lower_key;
361
362 BUG_ON(path->nodes[level]);
363 BUG_ON(path->nodes[level-1] != root->node);
364
365 t = alloc_free_block(root);
366 c = &t->node;
367 memset(c, 0, sizeof(c));
368 c->header.nritems = 1;
369 c->header.flags = node_level(level);
370 c->header.blocknr = t->blocknr;
371 c->header.parentid = root->node->node.header.parentid;
372 lower = &path->nodes[level-1]->node;
373 if (is_leaf(lower->header.flags))
374 lower_key = &((struct leaf *)lower)->items[0].key;
375 else
376 lower_key = lower->keys;
377 memcpy(c->keys, lower_key, sizeof(struct key));
378 c->blockptrs[0] = path->nodes[level-1]->blocknr;
379 /* the super has an extra ref to root->node */
380 tree_block_release(root, root->node);
381 root->node = t;
382 t->count++;
383 write_tree_block(root, t);
384 path->nodes[level] = t;
385 path->slots[level] = 0;
386 return 0;
387}
388
74123bd7
CM
389/*
390 * worker function to insert a single pointer in a node.
391 * the node should have enough room for the pointer already
392 * slot and level indicate where you want the key to go, and
393 * blocknr is the block the key points to.
394 */
5c680ed6 395int insert_ptr(struct ctree_root *root,
74123bd7
CM
396 struct ctree_path *path, struct key *key,
397 u64 blocknr, int slot, int level)
398{
74123bd7 399 struct node *lower;
74123bd7 400 int nritems;
5c680ed6
CM
401
402 BUG_ON(!path->nodes[level]);
74123bd7
CM
403 lower = &path->nodes[level]->node;
404 nritems = lower->header.nritems;
405 if (slot > nritems)
406 BUG();
407 if (nritems == NODEPTRS_PER_BLOCK)
408 BUG();
409 if (slot != nritems) {
410 memmove(lower->keys + slot + 1, lower->keys + slot,
411 (nritems - slot) * sizeof(struct key));
412 memmove(lower->blockptrs + slot + 1, lower->blockptrs + slot,
413 (nritems - slot) * sizeof(u64));
414 }
415 memcpy(lower->keys + slot, key, sizeof(struct key));
416 lower->blockptrs[slot] = blocknr;
417 lower->header.nritems++;
418 if (lower->keys[1].objectid == 0)
419 BUG();
420 write_tree_block(root, path->nodes[level]);
421 return 0;
422}
423
5c680ed6 424int split_node(struct ctree_root *root, struct ctree_path *path, int level)
be0e5c09 425{
5c680ed6
CM
426 struct tree_buffer *t;
427 struct node *c;
428 struct tree_buffer *split_buffer;
429 struct node *split;
be0e5c09 430 int mid;
5c680ed6 431 int ret;
eb60ceac 432
5c680ed6
CM
433 ret = push_node_left(root, path, level);
434 if (!ret)
435 return 0;
436 ret = push_node_right(root, path, level);
437 if (!ret)
438 return 0;
439 t = path->nodes[level];
440 c = &t->node;
441 if (t == root->node) {
442 /* trying to split the root, lets make a new one */
443 ret = insert_new_root(root, path, level + 1);
444 if (ret)
445 return ret;
be0e5c09 446 }
5c680ed6
CM
447 split_buffer = alloc_free_block(root);
448 split = &split_buffer->node;
449 split->header.flags = c->header.flags;
450 split->header.blocknr = split_buffer->blocknr;
451 split->header.parentid = root->node->node.header.parentid;
452 mid = (c->header.nritems + 1) / 2;
453 memcpy(split->keys, c->keys + mid,
454 (c->header.nritems - mid) * sizeof(struct key));
455 memcpy(split->blockptrs, c->blockptrs + mid,
456 (c->header.nritems - mid) * sizeof(u64));
457 split->header.nritems = c->header.nritems - mid;
458 c->header.nritems = mid;
459 write_tree_block(root, t);
460 write_tree_block(root, split_buffer);
461 insert_ptr(root, path, split->keys, split_buffer->blocknr,
462 path->slots[level + 1] + 1, level + 1);
463 if (path->slots[level] > mid) {
464 path->slots[level] -= mid;
465 tree_block_release(root, t);
466 path->nodes[level] = split_buffer;
467 path->slots[level + 1] += 1;
468 } else {
469 tree_block_release(root, split_buffer);
be0e5c09 470 }
5c680ed6 471 return 0;
be0e5c09
CM
472}
473
74123bd7
CM
474/*
475 * how many bytes are required to store the items in a leaf. start
476 * and nr indicate which items in the leaf to check. This totals up the
477 * space used both by the item structs and the item data
478 */
be0e5c09
CM
479int leaf_space_used(struct leaf *l, int start, int nr)
480{
481 int data_len;
482 int end = start + nr - 1;
483
484 if (!nr)
485 return 0;
486 data_len = l->items[start].offset + l->items[start].size;
487 data_len = data_len - l->items[end].offset;
488 data_len += sizeof(struct item) * nr;
489 return data_len;
490}
491
74123bd7
CM
492/*
493 * push some data in the path leaf to the left, trying to free up at
494 * least data_size bytes. returns zero if the push worked, nonzero otherwise
495 */
be0e5c09
CM
496int push_leaf_left(struct ctree_root *root, struct ctree_path *path,
497 int data_size)
498{
eb60ceac
CM
499 struct tree_buffer *right_buf = path->nodes[0];
500 struct leaf *right = &right_buf->leaf;
501 struct tree_buffer *t;
be0e5c09
CM
502 struct leaf *left;
503 int slot;
504 int i;
505 int free_space;
506 int push_space = 0;
507 int push_items = 0;
508 struct item *item;
509 int old_left_nritems;
510
511 slot = path->slots[1];
512 if (slot == 0) {
513 return 1;
514 }
515 if (!path->nodes[1]) {
516 return 1;
517 }
eb60ceac
CM
518 t = read_tree_block(root, path->nodes[1]->node.blockptrs[slot - 1]);
519 left = &t->leaf;
be0e5c09
CM
520 free_space = leaf_free_space(left);
521 if (free_space < data_size + sizeof(struct item)) {
eb60ceac 522 tree_block_release(root, t);
be0e5c09
CM
523 return 1;
524 }
525 for (i = 0; i < right->header.nritems; i++) {
526 item = right->items + i;
527 if (path->slots[0] == i)
528 push_space += data_size + sizeof(*item);
529 if (item->size + sizeof(*item) + push_space > free_space)
530 break;
531 push_items++;
532 push_space += item->size + sizeof(*item);
533 }
534 if (push_items == 0) {
eb60ceac 535 tree_block_release(root, t);
be0e5c09
CM
536 return 1;
537 }
538 /* push data from right to left */
539 memcpy(left->items + left->header.nritems,
540 right->items, push_items * sizeof(struct item));
541 push_space = LEAF_DATA_SIZE - right->items[push_items -1].offset;
542 memcpy(left->data + leaf_data_end(left) - push_space,
543 right->data + right->items[push_items - 1].offset,
544 push_space);
545 old_left_nritems = left->header.nritems;
eb60ceac
CM
546 BUG_ON(old_left_nritems < 0);
547
be0e5c09
CM
548 for(i = old_left_nritems; i < old_left_nritems + push_items; i++) {
549 left->items[i].offset -= LEAF_DATA_SIZE -
550 left->items[old_left_nritems -1].offset;
551 }
552 left->header.nritems += push_items;
553
554 /* fixup right node */
555 push_space = right->items[push_items-1].offset - leaf_data_end(right);
556 memmove(right->data + LEAF_DATA_SIZE - push_space, right->data +
557 leaf_data_end(right), push_space);
558 memmove(right->items, right->items + push_items,
559 (right->header.nritems - push_items) * sizeof(struct item));
560 right->header.nritems -= push_items;
561 push_space = LEAF_DATA_SIZE;
eb60ceac 562
be0e5c09
CM
563 for (i = 0; i < right->header.nritems; i++) {
564 right->items[i].offset = push_space - right->items[i].size;
565 push_space = right->items[i].offset;
566 }
eb60ceac
CM
567
568 write_tree_block(root, t);
569 write_tree_block(root, right_buf);
570
571 fixup_low_keys(root, path, &right->items[0].key, 1);
be0e5c09
CM
572
573 /* then fixup the leaf pointer in the path */
574 if (path->slots[0] < push_items) {
575 path->slots[0] += old_left_nritems;
eb60ceac
CM
576 tree_block_release(root, path->nodes[0]);
577 path->nodes[0] = t;
be0e5c09
CM
578 path->slots[1] -= 1;
579 } else {
eb60ceac 580 tree_block_release(root, t);
be0e5c09
CM
581 path->slots[0] -= push_items;
582 }
eb60ceac 583 BUG_ON(path->slots[0] < 0);
be0e5c09
CM
584 return 0;
585}
586
74123bd7
CM
587/*
588 * split the path's leaf in two, making sure there is at least data_size
589 * available for the resulting leaf level of the path.
590 */
be0e5c09
CM
591int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size)
592{
eb60ceac
CM
593 struct tree_buffer *l_buf = path->nodes[0];
594 struct leaf *l = &l_buf->leaf;
595 int nritems;
596 int mid;
597 int slot;
be0e5c09 598 struct leaf *right;
eb60ceac 599 struct tree_buffer *right_buffer;
be0e5c09
CM
600 int space_needed = data_size + sizeof(struct item);
601 int data_copy_size;
602 int rt_data_off;
603 int i;
604 int ret;
605
606 if (push_leaf_left(root, path, data_size) == 0) {
eb60ceac
CM
607 l_buf = path->nodes[0];
608 l = &l_buf->leaf;
609 if (leaf_free_space(l) >= sizeof(struct item) + data_size)
610 return 0;
be0e5c09 611 }
5c680ed6
CM
612 if (!path->nodes[1]) {
613 ret = insert_new_root(root, path, 1);
614 if (ret)
615 return ret;
616 }
eb60ceac
CM
617 slot = path->slots[0];
618 nritems = l->header.nritems;
619 mid = (nritems + 1)/ 2;
620
621 right_buffer = alloc_free_block(root);
622 BUG_ON(!right_buffer);
623 BUG_ON(mid == nritems);
624 right = &right_buffer->leaf;
be0e5c09
CM
625 memset(right, 0, sizeof(*right));
626 if (mid <= slot) {
627 if (leaf_space_used(l, mid, nritems - mid) + space_needed >
628 LEAF_DATA_SIZE)
629 BUG();
630 } else {
631 if (leaf_space_used(l, 0, mid + 1) + space_needed >
632 LEAF_DATA_SIZE)
633 BUG();
634 }
635 right->header.nritems = nritems - mid;
eb60ceac
CM
636 right->header.blocknr = right_buffer->blocknr;
637 right->header.flags = node_level(0);
cfaa7295 638 right->header.parentid = root->node->node.header.parentid;
be0e5c09
CM
639 data_copy_size = l->items[mid].offset + l->items[mid].size -
640 leaf_data_end(l);
641 memcpy(right->items, l->items + mid,
642 (nritems - mid) * sizeof(struct item));
643 memcpy(right->data + LEAF_DATA_SIZE - data_copy_size,
644 l->data + leaf_data_end(l), data_copy_size);
645 rt_data_off = LEAF_DATA_SIZE -
646 (l->items[mid].offset + l->items[mid].size);
74123bd7
CM
647
648 for (i = 0; i < right->header.nritems; i++)
be0e5c09 649 right->items[i].offset += rt_data_off;
74123bd7 650
be0e5c09
CM
651 l->header.nritems = mid;
652 ret = insert_ptr(root, path, &right->items[0].key,
5c680ed6 653 right_buffer->blocknr, path->slots[1] + 1, 1);
eb60ceac
CM
654 write_tree_block(root, right_buffer);
655 write_tree_block(root, l_buf);
656
657 BUG_ON(path->slots[0] != slot);
be0e5c09 658 if (mid <= slot) {
eb60ceac
CM
659 tree_block_release(root, path->nodes[0]);
660 path->nodes[0] = right_buffer;
be0e5c09
CM
661 path->slots[0] -= mid;
662 path->slots[1] += 1;
eb60ceac
CM
663 } else
664 tree_block_release(root, right_buffer);
665 BUG_ON(path->slots[0] < 0);
be0e5c09
CM
666 return ret;
667}
668
74123bd7
CM
669/*
670 * Given a key and some data, insert an item into the tree.
671 * This does all the path init required, making room in the tree if needed.
672 */
be0e5c09
CM
673int insert_item(struct ctree_root *root, struct key *key,
674 void *data, int data_size)
675{
676 int ret;
677 int slot;
eb60ceac 678 int slot_orig;
be0e5c09 679 struct leaf *leaf;
eb60ceac 680 struct tree_buffer *leaf_buf;
be0e5c09
CM
681 unsigned int nritems;
682 unsigned int data_end;
683 struct ctree_path path;
684
cfaa7295
CM
685 refill_alloc_extent(root);
686
74123bd7 687 /* create a root if there isn't one */
5c680ed6 688 if (!root->node)
cfaa7295 689 BUG();
be0e5c09 690 init_path(&path);
5c680ed6 691 ret = search_slot(root, key, &path, data_size);
eb60ceac
CM
692 if (ret == 0) {
693 release_path(root, &path);
be0e5c09 694 return -EEXIST;
eb60ceac 695 }
be0e5c09 696
eb60ceac
CM
697 slot_orig = path.slots[0];
698 leaf_buf = path.nodes[0];
699 leaf = &leaf_buf->leaf;
74123bd7 700
be0e5c09
CM
701 nritems = leaf->header.nritems;
702 data_end = leaf_data_end(leaf);
eb60ceac 703
be0e5c09
CM
704 if (leaf_free_space(leaf) < sizeof(struct item) + data_size)
705 BUG();
706
707 slot = path.slots[0];
eb60ceac 708 BUG_ON(slot < 0);
be0e5c09 709 if (slot == 0)
eb60ceac 710 fixup_low_keys(root, &path, key, 1);
be0e5c09
CM
711 if (slot != nritems) {
712 int i;
713 unsigned int old_data = leaf->items[slot].offset +
714 leaf->items[slot].size;
715
716 /*
717 * item0..itemN ... dataN.offset..dataN.size .. data0.size
718 */
719 /* first correct the data pointers */
720 for (i = slot; i < nritems; i++)
721 leaf->items[i].offset -= data_size;
722
723 /* shift the items */
724 memmove(leaf->items + slot + 1, leaf->items + slot,
725 (nritems - slot) * sizeof(struct item));
726
727 /* shift the data */
728 memmove(leaf->data + data_end - data_size, leaf->data +
729 data_end, old_data - data_end);
730 data_end = old_data;
731 }
74123bd7 732 /* copy the new data in */
be0e5c09
CM
733 memcpy(&leaf->items[slot].key, key, sizeof(struct key));
734 leaf->items[slot].offset = data_end - data_size;
735 leaf->items[slot].size = data_size;
736 memcpy(leaf->data + data_end - data_size, data, data_size);
737 leaf->header.nritems += 1;
eb60ceac 738 write_tree_block(root, leaf_buf);
be0e5c09
CM
739 if (leaf_free_space(leaf) < 0)
740 BUG();
eb60ceac 741 release_path(root, &path);
be0e5c09
CM
742 return 0;
743}
744
74123bd7
CM
745/*
746 * delete the pointer from a given level in the path. The path is not
747 * fixed up, so after calling this it is not valid at that level.
748 *
749 * If the delete empties a node, the node is removed from the tree,
750 * continuing all the way the root if required. The root is converted into
751 * a leaf if all the nodes are emptied.
752 */
be0e5c09
CM
753int del_ptr(struct ctree_root *root, struct ctree_path *path, int level)
754{
755 int slot;
eb60ceac 756 struct tree_buffer *t;
be0e5c09
CM
757 struct node *node;
758 int nritems;
759
760 while(1) {
eb60ceac
CM
761 t = path->nodes[level];
762 if (!t)
be0e5c09 763 break;
eb60ceac 764 node = &t->node;
be0e5c09
CM
765 slot = path->slots[level];
766 nritems = node->header.nritems;
767
768 if (slot != nritems -1) {
769 memmove(node->keys + slot, node->keys + slot + 1,
770 sizeof(struct key) * (nritems - slot - 1));
771 memmove(node->blockptrs + slot,
772 node->blockptrs + slot + 1,
773 sizeof(u64) * (nritems - slot - 1));
774 }
775 node->header.nritems--;
eb60ceac 776 write_tree_block(root, t);
be0e5c09
CM
777 if (node->header.nritems != 0) {
778 int tslot;
779 if (slot == 0)
eb60ceac
CM
780 fixup_low_keys(root, path, node->keys,
781 level + 1);
be0e5c09 782 tslot = path->slots[level+1];
eb60ceac 783 t->count++;
be0e5c09
CM
784 push_node_left(root, path, level);
785 if (node->header.nritems) {
786 push_node_right(root, path, level);
787 }
eb60ceac
CM
788 if (node->header.nritems) {
789 tree_block_release(root, t);
be0e5c09 790 break;
eb60ceac
CM
791 }
792 tree_block_release(root, t);
4920c9ac 793 path->slots[level+1] = tslot;
be0e5c09 794 }
eb60ceac
CM
795 if (t == root->node) {
796 /* just turn the root into a leaf and break */
797 root->node->node.header.flags = node_level(0);
798 write_tree_block(root, t);
be0e5c09
CM
799 break;
800 }
801 level++;
802 if (!path->nodes[level])
803 BUG();
be0e5c09
CM
804 }
805 return 0;
806}
807
74123bd7
CM
808/*
809 * delete the item at the leaf level in path. If that empties
810 * the leaf, remove it from the tree
811 */
4920c9ac 812int del_item(struct ctree_root *root, struct ctree_path *path)
be0e5c09 813{
be0e5c09
CM
814 int slot;
815 struct leaf *leaf;
eb60ceac 816 struct tree_buffer *leaf_buf;
be0e5c09
CM
817 int doff;
818 int dsize;
819
eb60ceac
CM
820 leaf_buf = path->nodes[0];
821 leaf = &leaf_buf->leaf;
4920c9ac 822 slot = path->slots[0];
be0e5c09
CM
823 doff = leaf->items[slot].offset;
824 dsize = leaf->items[slot].size;
825
826 if (slot != leaf->header.nritems - 1) {
827 int i;
828 int data_end = leaf_data_end(leaf);
829 memmove(leaf->data + data_end + dsize,
830 leaf->data + data_end,
831 doff - data_end);
832 for (i = slot + 1; i < leaf->header.nritems; i++)
833 leaf->items[i].offset += dsize;
834 memmove(leaf->items + slot, leaf->items + slot + 1,
835 sizeof(struct item) *
836 (leaf->header.nritems - slot - 1));
837 }
838 leaf->header.nritems -= 1;
74123bd7 839 /* delete the leaf if we've emptied it */
be0e5c09 840 if (leaf->header.nritems == 0) {
eb60ceac
CM
841 if (leaf_buf == root->node) {
842 leaf->header.flags = node_level(0);
843 write_tree_block(root, leaf_buf);
844 } else
4920c9ac 845 del_ptr(root, path, 1);
be0e5c09
CM
846 } else {
847 if (slot == 0)
eb60ceac
CM
848 fixup_low_keys(root, path, &leaf->items[0].key, 1);
849 write_tree_block(root, leaf_buf);
74123bd7 850 /* delete the leaf if it is mostly empty */
be0e5c09
CM
851 if (leaf_space_used(leaf, 0, leaf->header.nritems) <
852 LEAF_DATA_SIZE / 4) {
853 /* push_leaf_left fixes the path.
854 * make sure the path still points to our leaf
855 * for possible call to del_ptr below
856 */
4920c9ac 857 slot = path->slots[1];
eb60ceac 858 leaf_buf->count++;
4920c9ac 859 push_leaf_left(root, path, 1);
be0e5c09 860 if (leaf->header.nritems == 0) {
4920c9ac
CM
861 path->slots[1] = slot;
862 del_ptr(root, path, 1);
be0e5c09 863 }
eb60ceac 864 tree_block_release(root, leaf_buf);
be0e5c09
CM
865 }
866 }
867 return 0;
868}
869
d97e63b6
CM
870int next_leaf(struct ctree_root *root, struct ctree_path *path)
871{
872 int slot;
873 int level = 1;
874 u64 blocknr;
875 struct tree_buffer *c;
cfaa7295 876 struct tree_buffer *next = NULL;
d97e63b6
CM
877
878 while(level < MAX_LEVEL) {
879 if (!path->nodes[level])
880 return -1;
881 slot = path->slots[level] + 1;
882 c = path->nodes[level];
883 if (slot >= c->node.header.nritems) {
884 level++;
885 continue;
886 }
887 blocknr = c->node.blockptrs[slot];
cfaa7295
CM
888 if (next)
889 tree_block_release(root, next);
d97e63b6
CM
890 next = read_tree_block(root, blocknr);
891 break;
892 }
893 path->slots[level] = slot;
894 while(1) {
895 level--;
896 c = path->nodes[level];
897 tree_block_release(root, c);
898 path->nodes[level] = next;
899 path->slots[level] = 0;
900 if (!level)
901 break;
902 next = read_tree_block(root, next->node.blockptrs[0]);
903 }
904 return 0;
905}
906
cfaa7295 907int alloc_extent(struct ctree_root *orig_root, u64 num_blocks, u64 search_start,
d97e63b6
CM
908 u64 search_end, u64 owner, struct key *ins)
909{
910 struct ctree_path path;
911 struct key *key;
912 int ret;
913 u64 hole_size = 0;
914 int slot = 0;
915 u64 last_block;
916 int start_found = 0;
917 struct leaf *l;
918 struct extent_item extent_item;
cfaa7295 919 struct ctree_root * root = orig_root->extent_root;
d97e63b6
CM
920
921 init_path(&path);
922 ins->objectid = search_start;
923 ins->offset = 0;
924 ins->flags = 0;
925
5c680ed6 926 ret = search_slot(root, ins, &path, sizeof(struct extent_item));
d97e63b6
CM
927 while (1) {
928 l = &path.nodes[0]->leaf;
929 slot = path.slots[0];
930 if (!l) {
931 // FIXME allocate root
932 }
933 if (slot >= l->header.nritems) {
934 ret = next_leaf(root, &path);
935 if (ret == 0)
936 continue;
937 if (!start_found) {
938 ins->objectid = search_start;
939 ins->offset = num_blocks;
940 hole_size = search_end - search_start;
941 goto insert;
942 }
943 ins->objectid = last_block;
944 ins->offset = num_blocks;
945 hole_size = search_end - last_block;
946 goto insert;
947 }
948 key = &l->items[slot].key;
949 if (start_found) {
950 hole_size = key->objectid - last_block;
951 if (hole_size > num_blocks) {
952 ins->objectid = last_block;
953 ins->offset = num_blocks;
954 goto insert;
955 }
956 } else
957 start_found = 1;
958 last_block = key->objectid + key->offset;
959 path.slots[0]++;
d97e63b6
CM
960 }
961 // FIXME -ENOSPC
962insert:
cfaa7295 963 release_path(root, &path);
d97e63b6
CM
964 extent_item.refs = 1;
965 extent_item.owner = owner;
cfaa7295
CM
966 if (root == orig_root && root->reserve_extent->num_blocks == 0) {
967 root->reserve_extent->blocknr = ins->objectid;
968 root->reserve_extent->num_blocks = ins->offset;
969 root->reserve_extent->num_used = 0;
970 }
971 ret = insert_item(root->extent_root, ins, &extent_item, sizeof(extent_item));
d97e63b6
CM
972 return ret;
973}
974
975static int refill_alloc_extent(struct ctree_root *root)
976{
977 struct alloc_extent *ae = root->alloc_extent;
978 struct key key;
979 int ret;
980 int min_blocks = MAX_LEVEL * 2;
981
d97e63b6
CM
982 if (ae->num_blocks > ae->num_used && ae->num_blocks - ae->num_used >
983 min_blocks)
984 return 0;
985 ae = root->reserve_extent;
986 if (ae->num_blocks > ae->num_used) {
987 if (root->alloc_extent->num_blocks == 0) {
988 /* we should swap reserve/alloc_extent when alloc
989 * fills up
990 */
991 BUG();
992 }
993 if (ae->num_blocks - ae->num_used < min_blocks)
994 BUG();
995 return 0;
996 }
cfaa7295
CM
997 ret = alloc_extent(root,
998 min_blocks * 2, 0, (unsigned long)-1,
999 root->node->node.header.parentid, &key);
d97e63b6
CM
1000 ae->blocknr = key.objectid;
1001 ae->num_blocks = key.offset;
1002 ae->num_used = 0;
1003 return ret;
1004}
1005
be0e5c09
CM
1006void print_leaf(struct leaf *l)
1007{
1008 int i;
1009 int nr = l->header.nritems;
1010 struct item *item;
cfaa7295 1011 struct extent_item *ei;
eb60ceac 1012 printf("leaf %lu total ptrs %d free space %d\n", l->header.blocknr, nr,
be0e5c09
CM
1013 leaf_free_space(l));
1014 fflush(stdout);
1015 for (i = 0 ; i < nr ; i++) {
1016 item = l->items + i;
1017 printf("\titem %d key (%lu %u %lu) itemoff %d itemsize %d\n",
1018 i,
1019 item->key.objectid, item->key.flags, item->key.offset,
1020 item->offset, item->size);
1021 fflush(stdout);
1022 printf("\t\titem data %.*s\n", item->size, l->data+item->offset);
cfaa7295
CM
1023 ei = (struct extent_item *)(l->data + item->offset);
1024 printf("\t\textent data %u %lu\n", ei->refs, ei->owner);
be0e5c09
CM
1025 fflush(stdout);
1026 }
1027}
eb60ceac 1028void print_tree(struct ctree_root *root, struct tree_buffer *t)
be0e5c09
CM
1029{
1030 int i;
1031 int nr;
eb60ceac 1032 struct node *c;
be0e5c09 1033
eb60ceac 1034 if (!t)
be0e5c09 1035 return;
eb60ceac 1036 c = &t->node;
be0e5c09 1037 nr = c->header.nritems;
eb60ceac
CM
1038 if (c->header.blocknr != t->blocknr)
1039 BUG();
be0e5c09
CM
1040 if (is_leaf(c->header.flags)) {
1041 print_leaf((struct leaf *)c);
1042 return;
1043 }
eb60ceac 1044 printf("node %lu level %d total ptrs %d free spc %lu\n", t->blocknr,
be0e5c09
CM
1045 node_level(c->header.flags), c->header.nritems,
1046 NODEPTRS_PER_BLOCK - c->header.nritems);
1047 fflush(stdout);
1048 for (i = 0; i < nr; i++) {
eb60ceac 1049 printf("\tkey %d (%lu %u %lu) block %lu\n",
be0e5c09
CM
1050 i,
1051 c->keys[i].objectid, c->keys[i].flags, c->keys[i].offset,
1052 c->blockptrs[i]);
1053 fflush(stdout);
1054 }
1055 for (i = 0; i < nr; i++) {
eb60ceac
CM
1056 struct tree_buffer *next_buf = read_tree_block(root,
1057 c->blockptrs[i]);
1058 struct node *next = &next_buf->node;
be0e5c09
CM
1059 if (is_leaf(next->header.flags) &&
1060 node_level(c->header.flags) != 1)
1061 BUG();
1062 if (node_level(next->header.flags) !=
1063 node_level(c->header.flags) - 1)
1064 BUG();
eb60ceac
CM
1065 print_tree(root, next_buf);
1066 tree_block_release(root, next_buf);
be0e5c09
CM
1067 }
1068
1069}
1070
1071/* for testing only */
1072int next_key(int i, int max_key) {
5c680ed6
CM
1073 // return rand() % max_key;
1074 return i;
be0e5c09
CM
1075}
1076
1077int main() {
eb60ceac 1078 struct ctree_root *root;
be0e5c09 1079 struct key ins;
4920c9ac 1080 struct key last = { (u64)-1, 0, 0};
be0e5c09
CM
1081 char *buf;
1082 int i;
1083 int num;
1084 int ret;
cfaa7295 1085 int run_size = 10000;
be0e5c09
CM
1086 int max_key = 100000000;
1087 int tree_size = 0;
1088 struct ctree_path path;
cfaa7295 1089 struct ctree_super_block super;
be0e5c09 1090
eb60ceac
CM
1091 radix_tree_init();
1092
1093
cfaa7295
CM
1094 root = open_ctree("dbfile", &super);
1095 printf("root tree\n");
1096 print_tree(root, root->node);
1097 printf("map tree\n");
1098 print_tree(root->extent_root, root->extent_root->node);
be0e5c09
CM
1099
1100 srand(55);
be0e5c09
CM
1101 for (i = 0; i < run_size; i++) {
1102 buf = malloc(64);
1103 num = next_key(i, max_key);
1104 // num = i;
1105 sprintf(buf, "string-%d", num);
1106 // printf("insert %d\n", num);
1107 ins.objectid = num;
1108 ins.offset = 0;
1109 ins.flags = 0;
eb60ceac 1110 ret = insert_item(root, &ins, buf, strlen(buf));
be0e5c09
CM
1111 if (!ret)
1112 tree_size++;
1113 }
d97e63b6
CM
1114 printf("root used: %lu\n", root->alloc_extent->num_used);
1115 printf("root tree\n");
cfaa7295 1116 // print_tree(root, root->node);
d97e63b6
CM
1117 printf("map tree\n");
1118 printf("map used: %lu\n", root->extent_root->alloc_extent->num_used);
cfaa7295
CM
1119 // print_tree(root->extent_root, root->extent_root->node);
1120 write_ctree_super(root, &super);
eb60ceac 1121 close_ctree(root);
cfaa7295
CM
1122
1123 root = open_ctree("dbfile", &super);
eb60ceac 1124 printf("starting search\n");
be0e5c09
CM
1125 srand(55);
1126 for (i = 0; i < run_size; i++) {
1127 num = next_key(i, max_key);
1128 ins.objectid = num;
be0e5c09 1129 init_path(&path);
5c680ed6 1130 ret = search_slot(root, &ins, &path, 0);
be0e5c09 1131 if (ret) {
eb60ceac 1132 print_tree(root, root->node);
be0e5c09
CM
1133 printf("unable to find %d\n", num);
1134 exit(1);
1135 }
eb60ceac
CM
1136 release_path(root, &path);
1137 }
cfaa7295 1138 write_ctree_super(root, &super);
eb60ceac 1139 close_ctree(root);
cfaa7295 1140 root = open_ctree("dbfile", &super);
eb60ceac
CM
1141 printf("node %p level %d total ptrs %d free spc %lu\n", root->node,
1142 node_level(root->node->node.header.flags),
1143 root->node->node.header.nritems,
1144 NODEPTRS_PER_BLOCK - root->node->node.header.nritems);
1145 printf("all searches good, deleting some items\n");
be0e5c09
CM
1146 i = 0;
1147 srand(55);
4920c9ac
CM
1148 for (i = 0 ; i < run_size/4; i++) {
1149 num = next_key(i, max_key);
1150 ins.objectid = num;
1151 init_path(&path);
5c680ed6 1152 ret = search_slot(root, &ins, &path, 0);
4920c9ac
CM
1153 if (ret)
1154 continue;
eb60ceac 1155 ret = del_item(root, &path);
4920c9ac
CM
1156 if (ret != 0)
1157 BUG();
eb60ceac 1158 release_path(root, &path);
4920c9ac
CM
1159 tree_size--;
1160 }
1161 srand(128);
be0e5c09 1162 for (i = 0; i < run_size; i++) {
4920c9ac 1163 buf = malloc(64);
be0e5c09 1164 num = next_key(i, max_key);
4920c9ac 1165 sprintf(buf, "string-%d", num);
be0e5c09 1166 ins.objectid = num;
eb60ceac 1167 ret = insert_item(root, &ins, buf, strlen(buf));
4920c9ac
CM
1168 if (!ret)
1169 tree_size++;
1170 }
cfaa7295 1171 write_ctree_super(root, &super);
eb60ceac 1172 close_ctree(root);
cfaa7295 1173 root = open_ctree("dbfile", &super);
eb60ceac
CM
1174 printf("starting search2\n");
1175 srand(128);
1176 for (i = 0; i < run_size; i++) {
1177 num = next_key(i, max_key);
1178 ins.objectid = num;
1179 init_path(&path);
5c680ed6 1180 ret = search_slot(root, &ins, &path, 0);
eb60ceac
CM
1181 if (ret) {
1182 print_tree(root, root->node);
1183 printf("unable to find %d\n", num);
1184 exit(1);
1185 }
1186 release_path(root, &path);
1187 }
1188 printf("starting big long delete run\n");
1189 while(root->node && root->node->node.header.nritems > 0) {
4920c9ac
CM
1190 struct leaf *leaf;
1191 int slot;
1192 ins.objectid = (u64)-1;
1193 init_path(&path);
5c680ed6 1194 ret = search_slot(root, &ins, &path, 0);
4920c9ac
CM
1195 if (ret == 0)
1196 BUG();
1197
eb60ceac 1198 leaf = &path.nodes[0]->leaf;
4920c9ac
CM
1199 slot = path.slots[0];
1200 if (slot != leaf->header.nritems)
1201 BUG();
1202 while(path.slots[0] > 0) {
1203 path.slots[0] -= 1;
1204 slot = path.slots[0];
eb60ceac 1205 leaf = &path.nodes[0]->leaf;
4920c9ac
CM
1206
1207 if (comp_keys(&last, &leaf->items[slot].key) <= 0)
1208 BUG();
1209 memcpy(&last, &leaf->items[slot].key, sizeof(last));
eb60ceac
CM
1210 ret = del_item(root, &path);
1211 if (ret != 0) {
1212 printf("del_item returned %d\n", ret);
4920c9ac 1213 BUG();
eb60ceac 1214 }
4920c9ac
CM
1215 tree_size--;
1216 }
eb60ceac 1217 release_path(root, &path);
be0e5c09 1218 }
cfaa7295 1219 write_ctree_super(root, &super);
eb60ceac 1220 close_ctree(root);
4920c9ac 1221 printf("tree size is now %d\n", tree_size);
be0e5c09
CM
1222 return 0;
1223}