]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - fs/btrfs/extent-tree.c
Btrfs: Add checks for last byte in disk to allocator grouping
[mirror_ubuntu-artful-kernel.git] / fs / btrfs / extent-tree.c
1 /*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19 #include <linux/sched.h>
20 #include <linux/crc32c.h>
21 #include <linux/pagemap.h>
22 #include "hash.h"
23 #include "ctree.h"
24 #include "disk-io.h"
25 #include "print-tree.h"
26 #include "transaction.h"
27
28 #define BLOCK_GROUP_DATA EXTENT_WRITEBACK
29 #define BLOCK_GROUP_METADATA EXTENT_UPTODATE
30 #define BLOCK_GROUP_DIRTY EXTENT_DIRTY
31
32 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
33 btrfs_root *extent_root);
34 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
35 btrfs_root *extent_root);
36 static int find_previous_extent(struct btrfs_root *root,
37 struct btrfs_path *path)
38 {
39 struct btrfs_key found_key;
40 struct extent_buffer *leaf;
41 int ret;
42
43 while(1) {
44 if (path->slots[0] == 0) {
45 ret = btrfs_prev_leaf(root, path);
46 if (ret != 0)
47 return ret;
48 } else {
49 path->slots[0]--;
50 }
51 leaf = path->nodes[0];
52 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
53 if (found_key.type == BTRFS_EXTENT_ITEM_KEY)
54 return 0;
55 }
56 return 1;
57 }
58
59 static int cache_block_group(struct btrfs_root *root,
60 struct btrfs_block_group_cache *block_group)
61 {
62 struct btrfs_path *path;
63 int ret;
64 struct btrfs_key key;
65 struct extent_buffer *leaf;
66 struct extent_io_tree *free_space_cache;
67 int slot;
68 u64 last = 0;
69 u64 hole_size;
70 u64 first_free;
71 int found = 0;
72
73 if (!block_group)
74 return 0;
75
76 root = root->fs_info->extent_root;
77 free_space_cache = &root->fs_info->free_space_cache;
78
79 if (block_group->cached)
80 return 0;
81
82 path = btrfs_alloc_path();
83 if (!path)
84 return -ENOMEM;
85
86 path->reada = 2;
87 first_free = block_group->key.objectid;
88 key.objectid = block_group->key.objectid;
89 key.offset = 0;
90 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
91 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
92 if (ret < 0)
93 return ret;
94 ret = find_previous_extent(root, path);
95 if (ret < 0)
96 return ret;
97 if (ret == 0) {
98 leaf = path->nodes[0];
99 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
100 if (key.objectid + key.offset > first_free)
101 first_free = key.objectid + key.offset;
102 }
103 while(1) {
104 leaf = path->nodes[0];
105 slot = path->slots[0];
106 if (slot >= btrfs_header_nritems(leaf)) {
107 ret = btrfs_next_leaf(root, path);
108 if (ret < 0)
109 goto err;
110 if (ret == 0) {
111 continue;
112 } else {
113 break;
114 }
115 }
116 btrfs_item_key_to_cpu(leaf, &key, slot);
117 if (key.objectid < block_group->key.objectid) {
118 goto next;
119 }
120 if (key.objectid >= block_group->key.objectid +
121 block_group->key.offset) {
122 break;
123 }
124
125 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
126 if (!found) {
127 last = first_free;
128 found = 1;
129 }
130 if (key.objectid > last) {
131 hole_size = key.objectid - last;
132 set_extent_dirty(free_space_cache, last,
133 last + hole_size - 1,
134 GFP_NOFS);
135 }
136 last = key.objectid + key.offset;
137 }
138 next:
139 path->slots[0]++;
140 }
141
142 if (!found)
143 last = first_free;
144 if (block_group->key.objectid +
145 block_group->key.offset > last) {
146 hole_size = block_group->key.objectid +
147 block_group->key.offset - last;
148 set_extent_dirty(free_space_cache, last,
149 last + hole_size - 1, GFP_NOFS);
150 }
151 block_group->cached = 1;
152 err:
153 btrfs_free_path(path);
154 return 0;
155 }
156
157 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
158 btrfs_fs_info *info,
159 u64 bytenr)
160 {
161 struct extent_io_tree *block_group_cache;
162 struct btrfs_block_group_cache *block_group = NULL;
163 u64 ptr;
164 u64 start;
165 u64 end;
166 int ret;
167
168 block_group_cache = &info->block_group_cache;
169 ret = find_first_extent_bit(block_group_cache,
170 bytenr, &start, &end,
171 BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA);
172 if (ret) {
173 return NULL;
174 }
175 ret = get_state_private(block_group_cache, start, &ptr);
176 if (ret)
177 return NULL;
178
179 block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
180 if (block_group->key.objectid <= bytenr && bytenr <
181 block_group->key.objectid + block_group->key.offset)
182 return block_group;
183 return NULL;
184 }
185 static u64 noinline find_search_start(struct btrfs_root *root,
186 struct btrfs_block_group_cache **cache_ret,
187 u64 search_start, int num, int data)
188 {
189 int ret;
190 struct btrfs_block_group_cache *cache = *cache_ret;
191 u64 last;
192 u64 start = 0;
193 u64 end = 0;
194 u64 cache_miss = 0;
195 u64 total_fs_bytes;
196 int wrapped = 0;
197
198 if (!cache) {
199 goto out;
200 }
201 total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
202 again:
203 ret = cache_block_group(root, cache);
204 if (ret)
205 goto out;
206
207 last = max(search_start, cache->key.objectid);
208
209 while(1) {
210 ret = find_first_extent_bit(&root->fs_info->free_space_cache,
211 last, &start, &end, EXTENT_DIRTY);
212 if (ret) {
213 if (!cache_miss)
214 cache_miss = last;
215 goto new_group;
216 }
217
218 start = max(last, start);
219 last = end + 1;
220 if (last - start < num) {
221 if (last == cache->key.objectid + cache->key.offset)
222 cache_miss = start;
223 continue;
224 }
225 if (data != BTRFS_BLOCK_GROUP_MIXED &&
226 start + num > cache->key.objectid + cache->key.offset)
227 goto new_group;
228 if (start + num > total_fs_bytes)
229 goto new_group;
230 return start;
231 }
232 out:
233 cache = btrfs_lookup_block_group(root->fs_info, search_start);
234 if (!cache) {
235 printk("Unable to find block group for %Lu\n",
236 search_start);
237 WARN_ON(1);
238 return search_start;
239 }
240 return search_start;
241
242 new_group:
243 last = cache->key.objectid + cache->key.offset;
244 wrapped:
245 cache = btrfs_lookup_block_group(root->fs_info, last);
246 if (!cache || cache->key.objectid >= total_fs_bytes) {
247 no_cache:
248 if (!wrapped) {
249 wrapped = 1;
250 last = search_start;
251 data = BTRFS_BLOCK_GROUP_MIXED;
252 goto wrapped;
253 }
254 goto out;
255 }
256 if (cache_miss && !cache->cached) {
257 cache_block_group(root, cache);
258 last = cache_miss;
259 cache = btrfs_lookup_block_group(root->fs_info, last);
260 }
261 cache = btrfs_find_block_group(root, cache, last, data, 0);
262 if (!cache)
263 goto no_cache;
264 *cache_ret = cache;
265 cache_miss = 0;
266 goto again;
267 }
268
269 static u64 div_factor(u64 num, int factor)
270 {
271 if (factor == 10)
272 return num;
273 num *= factor;
274 do_div(num, 10);
275 return num;
276 }
277
278 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
279 struct btrfs_block_group_cache
280 *hint, u64 search_start,
281 int data, int owner)
282 {
283 struct btrfs_block_group_cache *cache;
284 struct extent_io_tree *block_group_cache;
285 struct btrfs_block_group_cache *found_group = NULL;
286 struct btrfs_fs_info *info = root->fs_info;
287 u64 used;
288 u64 last = 0;
289 u64 hint_last;
290 u64 start;
291 u64 end;
292 u64 free_check;
293 u64 ptr;
294 u64 total_fs_bytes;
295 int bit;
296 int ret;
297 int full_search = 0;
298 int factor = 8;
299 int data_swap = 0;
300
301 block_group_cache = &info->block_group_cache;
302 total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
303
304 if (!owner)
305 factor = 8;
306
307 if (data == BTRFS_BLOCK_GROUP_MIXED) {
308 bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
309 factor = 10;
310 } else if (data)
311 bit = BLOCK_GROUP_DATA;
312 else
313 bit = BLOCK_GROUP_METADATA;
314
315 if (search_start && search_start < total_fs_bytes) {
316 struct btrfs_block_group_cache *shint;
317 shint = btrfs_lookup_block_group(info, search_start);
318 if (shint && (shint->data == data ||
319 shint->data == BTRFS_BLOCK_GROUP_MIXED)) {
320 used = btrfs_block_group_used(&shint->item);
321 if (used + shint->pinned <
322 div_factor(shint->key.offset, factor)) {
323 return shint;
324 }
325 }
326 }
327 if (hint && hint->key.objectid < total_fs_bytes &&
328 (hint->data == data || hint->data == BTRFS_BLOCK_GROUP_MIXED)) {
329 used = btrfs_block_group_used(&hint->item);
330 if (used + hint->pinned <
331 div_factor(hint->key.offset, factor)) {
332 return hint;
333 }
334 last = hint->key.objectid + hint->key.offset;
335 hint_last = last;
336 } else {
337 if (hint)
338 hint_last = max(hint->key.objectid, search_start);
339 else
340 hint_last = search_start;
341
342 if (hint_last >= total_fs_bytes)
343 hint_last = search_start;
344 last = hint_last;
345 }
346 again:
347 while(1) {
348 ret = find_first_extent_bit(block_group_cache, last,
349 &start, &end, bit);
350 if (ret)
351 break;
352
353 ret = get_state_private(block_group_cache, start, &ptr);
354 if (ret)
355 break;
356
357 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
358 last = cache->key.objectid + cache->key.offset;
359 used = btrfs_block_group_used(&cache->item);
360
361 if (cache->key.objectid > total_fs_bytes)
362 break;
363
364 if (full_search)
365 free_check = cache->key.offset;
366 else
367 free_check = div_factor(cache->key.offset, factor);
368 if (used + cache->pinned < free_check) {
369 found_group = cache;
370 goto found;
371 }
372 cond_resched();
373 }
374 if (!full_search) {
375 last = search_start;
376 full_search = 1;
377 goto again;
378 }
379 if (!data_swap) {
380 data_swap = 1;
381 bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
382 last = search_start;
383 goto again;
384 }
385 found:
386 return found_group;
387 }
388
389 static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation,
390 u64 owner, u64 owner_offset)
391 {
392 u32 high_crc = ~(u32)0;
393 u32 low_crc = ~(u32)0;
394 __le64 lenum;
395
396 lenum = cpu_to_le64(root_objectid);
397 high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
398 lenum = cpu_to_le64(ref_generation);
399 low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
400 if (owner >= BTRFS_FIRST_FREE_OBJECTID) {
401 lenum = cpu_to_le64(owner);
402 low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
403 lenum = cpu_to_le64(owner_offset);
404 low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
405 }
406 return ((u64)high_crc << 32) | (u64)low_crc;
407 }
408
409 static int match_extent_ref(struct extent_buffer *leaf,
410 struct btrfs_extent_ref *disk_ref,
411 struct btrfs_extent_ref *cpu_ref)
412 {
413 int ret;
414 int len;
415
416 if (cpu_ref->objectid)
417 len = sizeof(*cpu_ref);
418 else
419 len = 2 * sizeof(u64);
420 ret = memcmp_extent_buffer(leaf, cpu_ref, (unsigned long)disk_ref,
421 len);
422 return ret == 0;
423 }
424
425 static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans,
426 struct btrfs_root *root,
427 struct btrfs_path *path, u64 bytenr,
428 u64 root_objectid,
429 u64 ref_generation, u64 owner,
430 u64 owner_offset, int del)
431 {
432 u64 hash;
433 struct btrfs_key key;
434 struct btrfs_key found_key;
435 struct btrfs_extent_ref ref;
436 struct extent_buffer *leaf;
437 struct btrfs_extent_ref *disk_ref;
438 int ret;
439 int ret2;
440
441 btrfs_set_stack_ref_root(&ref, root_objectid);
442 btrfs_set_stack_ref_generation(&ref, ref_generation);
443 btrfs_set_stack_ref_objectid(&ref, owner);
444 btrfs_set_stack_ref_offset(&ref, owner_offset);
445
446 hash = hash_extent_ref(root_objectid, ref_generation, owner,
447 owner_offset);
448 key.offset = hash;
449 key.objectid = bytenr;
450 key.type = BTRFS_EXTENT_REF_KEY;
451
452 while (1) {
453 ret = btrfs_search_slot(trans, root, &key, path,
454 del ? -1 : 0, del);
455 if (ret < 0)
456 goto out;
457 leaf = path->nodes[0];
458 if (ret != 0) {
459 u32 nritems = btrfs_header_nritems(leaf);
460 if (path->slots[0] >= nritems) {
461 ret2 = btrfs_next_leaf(root, path);
462 if (ret2)
463 goto out;
464 leaf = path->nodes[0];
465 }
466 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
467 if (found_key.objectid != bytenr ||
468 found_key.type != BTRFS_EXTENT_REF_KEY)
469 goto out;
470 key.offset = found_key.offset;
471 if (del) {
472 btrfs_release_path(root, path);
473 continue;
474 }
475 }
476 disk_ref = btrfs_item_ptr(path->nodes[0],
477 path->slots[0],
478 struct btrfs_extent_ref);
479 if (match_extent_ref(path->nodes[0], disk_ref, &ref)) {
480 ret = 0;
481 goto out;
482 }
483 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
484 key.offset = found_key.offset + 1;
485 btrfs_release_path(root, path);
486 }
487 out:
488 return ret;
489 }
490
491 /*
492 * Back reference rules. Back refs have three main goals:
493 *
494 * 1) differentiate between all holders of references to an extent so that
495 * when a reference is dropped we can make sure it was a valid reference
496 * before freeing the extent.
497 *
498 * 2) Provide enough information to quickly find the holders of an extent
499 * if we notice a given block is corrupted or bad.
500 *
501 * 3) Make it easy to migrate blocks for FS shrinking or storage pool
502 * maintenance. This is actually the same as #2, but with a slightly
503 * different use case.
504 *
505 * File extents can be referenced by:
506 *
507 * - multiple snapshots, subvolumes, or different generations in one subvol
508 * - different files inside a single subvolume (in theory, not implemented yet)
509 * - different offsets inside a file (bookend extents in file.c)
510 *
511 * The extent ref structure has fields for:
512 *
513 * - Objectid of the subvolume root
514 * - Generation number of the tree holding the reference
515 * - objectid of the file holding the reference
516 * - offset in the file corresponding to the key holding the reference
517 *
518 * When a file extent is allocated the fields are filled in:
519 * (root_key.objectid, trans->transid, inode objectid, offset in file)
520 *
521 * When a leaf is cow'd new references are added for every file extent found
522 * in the leaf. It looks the same as the create case, but trans->transid
523 * will be different when the block is cow'd.
524 *
525 * (root_key.objectid, trans->transid, inode objectid, offset in file)
526 *
527 * When a file extent is removed either during snapshot deletion or file
528 * truncation, the corresponding back reference is found
529 * by searching for:
530 *
531 * (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
532 * inode objectid, offset in file)
533 *
534 * Btree extents can be referenced by:
535 *
536 * - Different subvolumes
537 * - Different generations of the same subvolume
538 *
539 * Storing sufficient information for a full reverse mapping of a btree
540 * block would require storing the lowest key of the block in the backref,
541 * and it would require updating that lowest key either before write out or
542 * every time it changed. Instead, the objectid of the lowest key is stored
543 * along with the level of the tree block. This provides a hint
544 * about where in the btree the block can be found. Searches through the
545 * btree only need to look for a pointer to that block, so they stop one
546 * level higher than the level recorded in the backref.
547 *
548 * Some btrees do not do reference counting on their extents. These
549 * include the extent tree and the tree of tree roots. Backrefs for these
550 * trees always have a generation of zero.
551 *
552 * When a tree block is created, back references are inserted:
553 *
554 * (root->root_key.objectid, trans->transid or zero, level, lowest_key_objectid)
555 *
556 * When a tree block is cow'd in a reference counted root,
557 * new back references are added for all the blocks it points to.
558 * These are of the form (trans->transid will have increased since creation):
559 *
560 * (root->root_key.objectid, trans->transid, level, lowest_key_objectid)
561 *
562 * Because the lowest_key_objectid and the level are just hints
563 * they are not used when backrefs are deleted. When a backref is deleted:
564 *
565 * if backref was for a tree root:
566 * root_objectid = root->root_key.objectid
567 * else
568 * root_objectid = btrfs_header_owner(parent)
569 *
570 * (root_objectid, btrfs_header_generation(parent) or zero, 0, 0)
571 *
572 * Back Reference Key hashing:
573 *
574 * Back references have four fields, each 64 bits long. Unfortunately,
575 * This is hashed into a single 64 bit number and placed into the key offset.
576 * The key objectid corresponds to the first byte in the extent, and the
577 * key type is set to BTRFS_EXTENT_REF_KEY
578 */
579 int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans,
580 struct btrfs_root *root,
581 struct btrfs_path *path, u64 bytenr,
582 u64 root_objectid, u64 ref_generation,
583 u64 owner, u64 owner_offset)
584 {
585 u64 hash;
586 struct btrfs_key key;
587 struct btrfs_extent_ref ref;
588 struct btrfs_extent_ref *disk_ref;
589 int ret;
590
591 btrfs_set_stack_ref_root(&ref, root_objectid);
592 btrfs_set_stack_ref_generation(&ref, ref_generation);
593 btrfs_set_stack_ref_objectid(&ref, owner);
594 btrfs_set_stack_ref_offset(&ref, owner_offset);
595
596 hash = hash_extent_ref(root_objectid, ref_generation, owner,
597 owner_offset);
598 key.offset = hash;
599 key.objectid = bytenr;
600 key.type = BTRFS_EXTENT_REF_KEY;
601
602 ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(ref));
603 while (ret == -EEXIST) {
604 disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
605 struct btrfs_extent_ref);
606 if (match_extent_ref(path->nodes[0], disk_ref, &ref))
607 goto out;
608 key.offset++;
609 btrfs_release_path(root, path);
610 ret = btrfs_insert_empty_item(trans, root, path, &key,
611 sizeof(ref));
612 }
613 if (ret)
614 goto out;
615 disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
616 struct btrfs_extent_ref);
617 write_extent_buffer(path->nodes[0], &ref, (unsigned long)disk_ref,
618 sizeof(ref));
619 btrfs_mark_buffer_dirty(path->nodes[0]);
620 out:
621 btrfs_release_path(root, path);
622 return ret;
623 }
624
625 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
626 struct btrfs_root *root,
627 u64 bytenr, u64 num_bytes,
628 u64 root_objectid, u64 ref_generation,
629 u64 owner, u64 owner_offset)
630 {
631 struct btrfs_path *path;
632 int ret;
633 struct btrfs_key key;
634 struct extent_buffer *l;
635 struct btrfs_extent_item *item;
636 u32 refs;
637
638 WARN_ON(num_bytes < root->sectorsize);
639 path = btrfs_alloc_path();
640 if (!path)
641 return -ENOMEM;
642
643 path->reada = 0;
644 key.objectid = bytenr;
645 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
646 key.offset = num_bytes;
647 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
648 0, 1);
649 if (ret < 0)
650 return ret;
651 if (ret != 0) {
652 BUG();
653 }
654 BUG_ON(ret != 0);
655 l = path->nodes[0];
656 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
657 refs = btrfs_extent_refs(l, item);
658 btrfs_set_extent_refs(l, item, refs + 1);
659 btrfs_mark_buffer_dirty(path->nodes[0]);
660
661 btrfs_release_path(root->fs_info->extent_root, path);
662
663 path->reada = 0;
664 ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root,
665 path, bytenr, root_objectid,
666 ref_generation, owner, owner_offset);
667 BUG_ON(ret);
668 finish_current_insert(trans, root->fs_info->extent_root);
669 del_pending_extents(trans, root->fs_info->extent_root);
670
671 btrfs_free_path(path);
672 return 0;
673 }
674
675 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
676 struct btrfs_root *root)
677 {
678 finish_current_insert(trans, root->fs_info->extent_root);
679 del_pending_extents(trans, root->fs_info->extent_root);
680 return 0;
681 }
682
683 static int lookup_extent_ref(struct btrfs_trans_handle *trans,
684 struct btrfs_root *root, u64 bytenr,
685 u64 num_bytes, u32 *refs)
686 {
687 struct btrfs_path *path;
688 int ret;
689 struct btrfs_key key;
690 struct extent_buffer *l;
691 struct btrfs_extent_item *item;
692
693 WARN_ON(num_bytes < root->sectorsize);
694 path = btrfs_alloc_path();
695 path->reada = 0;
696 key.objectid = bytenr;
697 key.offset = num_bytes;
698 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
699 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
700 0, 0);
701 if (ret < 0)
702 goto out;
703 if (ret != 0) {
704 btrfs_print_leaf(root, path->nodes[0]);
705 printk("failed to find block number %Lu\n", bytenr);
706 BUG();
707 }
708 l = path->nodes[0];
709 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
710 *refs = btrfs_extent_refs(l, item);
711 out:
712 btrfs_free_path(path);
713 return 0;
714 }
715
716 u32 btrfs_count_snapshots_in_path(struct btrfs_root *root,
717 struct btrfs_path *count_path,
718 u64 first_extent)
719 {
720 struct btrfs_root *extent_root = root->fs_info->extent_root;
721 struct btrfs_path *path;
722 u64 bytenr;
723 u64 found_objectid;
724 u64 root_objectid = root->root_key.objectid;
725 u32 total_count = 0;
726 u32 cur_count;
727 u32 nritems;
728 int ret;
729 struct btrfs_key key;
730 struct btrfs_key found_key;
731 struct extent_buffer *l;
732 struct btrfs_extent_item *item;
733 struct btrfs_extent_ref *ref_item;
734 int level = -1;
735
736 path = btrfs_alloc_path();
737 again:
738 if (level == -1)
739 bytenr = first_extent;
740 else
741 bytenr = count_path->nodes[level]->start;
742
743 cur_count = 0;
744 key.objectid = bytenr;
745 key.offset = 0;
746
747 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
748 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
749 if (ret < 0)
750 goto out;
751 BUG_ON(ret == 0);
752
753 l = path->nodes[0];
754 btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
755
756 if (found_key.objectid != bytenr ||
757 found_key.type != BTRFS_EXTENT_ITEM_KEY) {
758 goto out;
759 }
760
761 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
762 while (1) {
763 l = path->nodes[0];
764 nritems = btrfs_header_nritems(l);
765 if (path->slots[0] >= nritems) {
766 ret = btrfs_next_leaf(extent_root, path);
767 if (ret == 0)
768 continue;
769 break;
770 }
771 btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
772 if (found_key.objectid != bytenr)
773 break;
774
775 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
776 path->slots[0]++;
777 continue;
778 }
779
780 cur_count++;
781 ref_item = btrfs_item_ptr(l, path->slots[0],
782 struct btrfs_extent_ref);
783 found_objectid = btrfs_ref_root(l, ref_item);
784
785 if (found_objectid != root_objectid) {
786 total_count = 2;
787 goto out;
788 }
789 total_count = 1;
790 path->slots[0]++;
791 }
792 if (cur_count == 0) {
793 total_count = 0;
794 goto out;
795 }
796 if (level >= 0 && root->node == count_path->nodes[level])
797 goto out;
798 level++;
799 btrfs_release_path(root, path);
800 goto again;
801
802 out:
803 btrfs_free_path(path);
804 return total_count;
805 }
806 int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
807 struct btrfs_root *root, u64 owner_objectid)
808 {
809 u64 generation;
810 u64 key_objectid;
811 u64 level;
812 u32 nritems;
813 struct btrfs_disk_key disk_key;
814
815 level = btrfs_header_level(root->node);
816 generation = trans->transid;
817 nritems = btrfs_header_nritems(root->node);
818 if (nritems > 0) {
819 if (level == 0)
820 btrfs_item_key(root->node, &disk_key, 0);
821 else
822 btrfs_node_key(root->node, &disk_key, 0);
823 key_objectid = btrfs_disk_key_objectid(&disk_key);
824 } else {
825 key_objectid = 0;
826 }
827 return btrfs_inc_extent_ref(trans, root, root->node->start,
828 root->node->len, owner_objectid,
829 generation, level, key_objectid);
830 }
831
832 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
833 struct extent_buffer *buf)
834 {
835 u64 bytenr;
836 u32 nritems;
837 struct btrfs_key key;
838 struct btrfs_file_extent_item *fi;
839 int i;
840 int level;
841 int ret;
842 int faili;
843
844 if (!root->ref_cows)
845 return 0;
846
847 level = btrfs_header_level(buf);
848 nritems = btrfs_header_nritems(buf);
849 for (i = 0; i < nritems; i++) {
850 if (level == 0) {
851 u64 disk_bytenr;
852 btrfs_item_key_to_cpu(buf, &key, i);
853 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
854 continue;
855 fi = btrfs_item_ptr(buf, i,
856 struct btrfs_file_extent_item);
857 if (btrfs_file_extent_type(buf, fi) ==
858 BTRFS_FILE_EXTENT_INLINE)
859 continue;
860 disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
861 if (disk_bytenr == 0)
862 continue;
863 ret = btrfs_inc_extent_ref(trans, root, disk_bytenr,
864 btrfs_file_extent_disk_num_bytes(buf, fi),
865 root->root_key.objectid, trans->transid,
866 key.objectid, key.offset);
867 if (ret) {
868 faili = i;
869 goto fail;
870 }
871 } else {
872 bytenr = btrfs_node_blockptr(buf, i);
873 btrfs_node_key_to_cpu(buf, &key, i);
874 ret = btrfs_inc_extent_ref(trans, root, bytenr,
875 btrfs_level_size(root, level - 1),
876 root->root_key.objectid,
877 trans->transid,
878 level - 1, key.objectid);
879 if (ret) {
880 faili = i;
881 goto fail;
882 }
883 }
884 }
885 return 0;
886 fail:
887 WARN_ON(1);
888 #if 0
889 for (i =0; i < faili; i++) {
890 if (level == 0) {
891 u64 disk_bytenr;
892 btrfs_item_key_to_cpu(buf, &key, i);
893 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
894 continue;
895 fi = btrfs_item_ptr(buf, i,
896 struct btrfs_file_extent_item);
897 if (btrfs_file_extent_type(buf, fi) ==
898 BTRFS_FILE_EXTENT_INLINE)
899 continue;
900 disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
901 if (disk_bytenr == 0)
902 continue;
903 err = btrfs_free_extent(trans, root, disk_bytenr,
904 btrfs_file_extent_disk_num_bytes(buf,
905 fi), 0);
906 BUG_ON(err);
907 } else {
908 bytenr = btrfs_node_blockptr(buf, i);
909 err = btrfs_free_extent(trans, root, bytenr,
910 btrfs_level_size(root, level - 1), 0);
911 BUG_ON(err);
912 }
913 }
914 #endif
915 return ret;
916 }
917
918 static int write_one_cache_group(struct btrfs_trans_handle *trans,
919 struct btrfs_root *root,
920 struct btrfs_path *path,
921 struct btrfs_block_group_cache *cache)
922 {
923 int ret;
924 int pending_ret;
925 struct btrfs_root *extent_root = root->fs_info->extent_root;
926 unsigned long bi;
927 struct extent_buffer *leaf;
928
929 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
930 if (ret < 0)
931 goto fail;
932 BUG_ON(ret);
933
934 leaf = path->nodes[0];
935 bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
936 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
937 btrfs_mark_buffer_dirty(leaf);
938 btrfs_release_path(extent_root, path);
939 fail:
940 finish_current_insert(trans, extent_root);
941 pending_ret = del_pending_extents(trans, extent_root);
942 if (ret)
943 return ret;
944 if (pending_ret)
945 return pending_ret;
946 return 0;
947
948 }
949
950 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
951 struct btrfs_root *root)
952 {
953 struct extent_io_tree *block_group_cache;
954 struct btrfs_block_group_cache *cache;
955 int ret;
956 int err = 0;
957 int werr = 0;
958 struct btrfs_path *path;
959 u64 last = 0;
960 u64 start;
961 u64 end;
962 u64 ptr;
963
964 block_group_cache = &root->fs_info->block_group_cache;
965 path = btrfs_alloc_path();
966 if (!path)
967 return -ENOMEM;
968
969 while(1) {
970 ret = find_first_extent_bit(block_group_cache, last,
971 &start, &end, BLOCK_GROUP_DIRTY);
972 if (ret)
973 break;
974
975 last = end + 1;
976 ret = get_state_private(block_group_cache, start, &ptr);
977 if (ret)
978 break;
979
980 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
981 err = write_one_cache_group(trans, root,
982 path, cache);
983 /*
984 * if we fail to write the cache group, we want
985 * to keep it marked dirty in hopes that a later
986 * write will work
987 */
988 if (err) {
989 werr = err;
990 continue;
991 }
992 clear_extent_bits(block_group_cache, start, end,
993 BLOCK_GROUP_DIRTY, GFP_NOFS);
994 }
995 btrfs_free_path(path);
996 return werr;
997 }
998
999 static int update_block_group(struct btrfs_trans_handle *trans,
1000 struct btrfs_root *root,
1001 u64 bytenr, u64 num_bytes, int alloc,
1002 int mark_free, int data)
1003 {
1004 struct btrfs_block_group_cache *cache;
1005 struct btrfs_fs_info *info = root->fs_info;
1006 u64 total = num_bytes;
1007 u64 old_val;
1008 u64 byte_in_group;
1009 u64 start;
1010 u64 end;
1011
1012 while(total) {
1013 cache = btrfs_lookup_block_group(info, bytenr);
1014 if (!cache) {
1015 return -1;
1016 }
1017 byte_in_group = bytenr - cache->key.objectid;
1018 WARN_ON(byte_in_group > cache->key.offset);
1019 start = cache->key.objectid;
1020 end = start + cache->key.offset - 1;
1021 set_extent_bits(&info->block_group_cache, start, end,
1022 BLOCK_GROUP_DIRTY, GFP_NOFS);
1023
1024 old_val = btrfs_block_group_used(&cache->item);
1025 num_bytes = min(total, cache->key.offset - byte_in_group);
1026 if (alloc) {
1027 if (cache->data != data &&
1028 old_val < (cache->key.offset >> 1)) {
1029 int bit_to_clear;
1030 int bit_to_set;
1031 cache->data = data;
1032 if (data) {
1033 bit_to_clear = BLOCK_GROUP_METADATA;
1034 bit_to_set = BLOCK_GROUP_DATA;
1035 cache->item.flags &=
1036 ~BTRFS_BLOCK_GROUP_MIXED;
1037 cache->item.flags |=
1038 BTRFS_BLOCK_GROUP_DATA;
1039 } else {
1040 bit_to_clear = BLOCK_GROUP_DATA;
1041 bit_to_set = BLOCK_GROUP_METADATA;
1042 cache->item.flags &=
1043 ~BTRFS_BLOCK_GROUP_MIXED;
1044 cache->item.flags &=
1045 ~BTRFS_BLOCK_GROUP_DATA;
1046 }
1047 clear_extent_bits(&info->block_group_cache,
1048 start, end, bit_to_clear,
1049 GFP_NOFS);
1050 set_extent_bits(&info->block_group_cache,
1051 start, end, bit_to_set,
1052 GFP_NOFS);
1053 } else if (cache->data != data &&
1054 cache->data != BTRFS_BLOCK_GROUP_MIXED) {
1055 cache->data = BTRFS_BLOCK_GROUP_MIXED;
1056 set_extent_bits(&info->block_group_cache,
1057 start, end,
1058 BLOCK_GROUP_DATA |
1059 BLOCK_GROUP_METADATA,
1060 GFP_NOFS);
1061 }
1062 old_val += num_bytes;
1063 } else {
1064 old_val -= num_bytes;
1065 if (mark_free) {
1066 set_extent_dirty(&info->free_space_cache,
1067 bytenr, bytenr + num_bytes - 1,
1068 GFP_NOFS);
1069 }
1070 }
1071 btrfs_set_block_group_used(&cache->item, old_val);
1072 total -= num_bytes;
1073 bytenr += num_bytes;
1074 }
1075 return 0;
1076 }
1077 static int update_pinned_extents(struct btrfs_root *root,
1078 u64 bytenr, u64 num, int pin)
1079 {
1080 u64 len;
1081 struct btrfs_block_group_cache *cache;
1082 struct btrfs_fs_info *fs_info = root->fs_info;
1083
1084 if (pin) {
1085 set_extent_dirty(&fs_info->pinned_extents,
1086 bytenr, bytenr + num - 1, GFP_NOFS);
1087 } else {
1088 clear_extent_dirty(&fs_info->pinned_extents,
1089 bytenr, bytenr + num - 1, GFP_NOFS);
1090 }
1091 while (num > 0) {
1092 cache = btrfs_lookup_block_group(fs_info, bytenr);
1093 WARN_ON(!cache);
1094 len = min(num, cache->key.offset -
1095 (bytenr - cache->key.objectid));
1096 if (pin) {
1097 cache->pinned += len;
1098 fs_info->total_pinned += len;
1099 } else {
1100 cache->pinned -= len;
1101 fs_info->total_pinned -= len;
1102 }
1103 bytenr += len;
1104 num -= len;
1105 }
1106 return 0;
1107 }
1108
1109 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
1110 {
1111 u64 last = 0;
1112 u64 start;
1113 u64 end;
1114 struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
1115 int ret;
1116
1117 while(1) {
1118 ret = find_first_extent_bit(pinned_extents, last,
1119 &start, &end, EXTENT_DIRTY);
1120 if (ret)
1121 break;
1122 set_extent_dirty(copy, start, end, GFP_NOFS);
1123 last = end + 1;
1124 }
1125 return 0;
1126 }
1127
1128 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1129 struct btrfs_root *root,
1130 struct extent_io_tree *unpin)
1131 {
1132 u64 start;
1133 u64 end;
1134 int ret;
1135 struct extent_io_tree *free_space_cache;
1136 free_space_cache = &root->fs_info->free_space_cache;
1137
1138 while(1) {
1139 ret = find_first_extent_bit(unpin, 0, &start, &end,
1140 EXTENT_DIRTY);
1141 if (ret)
1142 break;
1143 update_pinned_extents(root, start, end + 1 - start, 0);
1144 clear_extent_dirty(unpin, start, end, GFP_NOFS);
1145 set_extent_dirty(free_space_cache, start, end, GFP_NOFS);
1146 }
1147 return 0;
1148 }
1149
1150 static int finish_current_insert(struct btrfs_trans_handle *trans,
1151 struct btrfs_root *extent_root)
1152 {
1153 u64 start;
1154 u64 end;
1155 struct btrfs_fs_info *info = extent_root->fs_info;
1156 struct extent_buffer *eb;
1157 struct btrfs_path *path;
1158 struct btrfs_key ins;
1159 struct btrfs_disk_key first;
1160 struct btrfs_extent_item extent_item;
1161 int ret;
1162 int level;
1163 int err = 0;
1164
1165 btrfs_set_stack_extent_refs(&extent_item, 1);
1166 btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
1167 path = btrfs_alloc_path();
1168
1169 while(1) {
1170 ret = find_first_extent_bit(&info->extent_ins, 0, &start,
1171 &end, EXTENT_LOCKED);
1172 if (ret)
1173 break;
1174
1175 ins.objectid = start;
1176 ins.offset = end + 1 - start;
1177 err = btrfs_insert_item(trans, extent_root, &ins,
1178 &extent_item, sizeof(extent_item));
1179 clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED,
1180 GFP_NOFS);
1181 eb = read_tree_block(extent_root, ins.objectid, ins.offset);
1182 level = btrfs_header_level(eb);
1183 if (level == 0) {
1184 btrfs_item_key(eb, &first, 0);
1185 } else {
1186 btrfs_node_key(eb, &first, 0);
1187 }
1188 err = btrfs_insert_extent_backref(trans, extent_root, path,
1189 start, extent_root->root_key.objectid,
1190 0, level,
1191 btrfs_disk_key_objectid(&first));
1192 BUG_ON(err);
1193 free_extent_buffer(eb);
1194 }
1195 btrfs_free_path(path);
1196 return 0;
1197 }
1198
1199 static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
1200 int pending)
1201 {
1202 int err = 0;
1203 struct extent_buffer *buf;
1204
1205 if (!pending) {
1206 buf = btrfs_find_tree_block(root, bytenr, num_bytes);
1207 if (buf) {
1208 if (btrfs_buffer_uptodate(buf)) {
1209 u64 transid =
1210 root->fs_info->running_transaction->transid;
1211 u64 header_transid =
1212 btrfs_header_generation(buf);
1213 if (header_transid == transid) {
1214 clean_tree_block(NULL, root, buf);
1215 free_extent_buffer(buf);
1216 return 1;
1217 }
1218 }
1219 free_extent_buffer(buf);
1220 }
1221 update_pinned_extents(root, bytenr, num_bytes, 1);
1222 } else {
1223 set_extent_bits(&root->fs_info->pending_del,
1224 bytenr, bytenr + num_bytes - 1,
1225 EXTENT_LOCKED, GFP_NOFS);
1226 }
1227 BUG_ON(err < 0);
1228 return 0;
1229 }
1230
1231 /*
1232 * remove an extent from the root, returns 0 on success
1233 */
1234 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1235 *root, u64 bytenr, u64 num_bytes,
1236 u64 root_objectid, u64 ref_generation,
1237 u64 owner_objectid, u64 owner_offset, int pin,
1238 int mark_free)
1239 {
1240 struct btrfs_path *path;
1241 struct btrfs_key key;
1242 struct btrfs_fs_info *info = root->fs_info;
1243 struct btrfs_root *extent_root = info->extent_root;
1244 struct extent_buffer *leaf;
1245 int ret;
1246 struct btrfs_extent_item *ei;
1247 u32 refs;
1248
1249 key.objectid = bytenr;
1250 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
1251 key.offset = num_bytes;
1252 path = btrfs_alloc_path();
1253 if (!path)
1254 return -ENOMEM;
1255
1256 path->reada = 0;
1257 ret = lookup_extent_backref(trans, extent_root, path,
1258 bytenr, root_objectid,
1259 ref_generation,
1260 owner_objectid, owner_offset, 1);
1261 if (ret == 0) {
1262 ret = btrfs_del_item(trans, extent_root, path);
1263 } else {
1264 btrfs_print_leaf(extent_root, path->nodes[0]);
1265 WARN_ON(1);
1266 printk("Unable to find ref byte nr %Lu root %Lu "
1267 " gen %Lu owner %Lu offset %Lu\n", bytenr,
1268 root_objectid, ref_generation, owner_objectid,
1269 owner_offset);
1270 }
1271 btrfs_release_path(extent_root, path);
1272 ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
1273 if (ret < 0)
1274 return ret;
1275 BUG_ON(ret);
1276
1277 leaf = path->nodes[0];
1278 ei = btrfs_item_ptr(leaf, path->slots[0],
1279 struct btrfs_extent_item);
1280 refs = btrfs_extent_refs(leaf, ei);
1281 BUG_ON(refs == 0);
1282 refs -= 1;
1283 btrfs_set_extent_refs(leaf, ei, refs);
1284 btrfs_mark_buffer_dirty(leaf);
1285
1286 if (refs == 0) {
1287 u64 super_used;
1288 u64 root_used;
1289
1290 if (pin) {
1291 ret = pin_down_bytes(root, bytenr, num_bytes, 0);
1292 if (ret > 0)
1293 mark_free = 1;
1294 BUG_ON(ret < 0);
1295 }
1296
1297 /* block accounting for super block */
1298 super_used = btrfs_super_bytes_used(&info->super_copy);
1299 btrfs_set_super_bytes_used(&info->super_copy,
1300 super_used - num_bytes);
1301
1302 /* block accounting for root item */
1303 root_used = btrfs_root_used(&root->root_item);
1304 btrfs_set_root_used(&root->root_item,
1305 root_used - num_bytes);
1306
1307 ret = btrfs_del_item(trans, extent_root, path);
1308 if (ret) {
1309 return ret;
1310 }
1311 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
1312 mark_free, 0);
1313 BUG_ON(ret);
1314 }
1315 btrfs_free_path(path);
1316 finish_current_insert(trans, extent_root);
1317 return ret;
1318 }
1319
1320 /*
1321 * find all the blocks marked as pending in the radix tree and remove
1322 * them from the extent map
1323 */
1324 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
1325 btrfs_root *extent_root)
1326 {
1327 int ret;
1328 int err = 0;
1329 u64 start;
1330 u64 end;
1331 struct extent_io_tree *pending_del;
1332 struct extent_io_tree *pinned_extents;
1333
1334 pending_del = &extent_root->fs_info->pending_del;
1335 pinned_extents = &extent_root->fs_info->pinned_extents;
1336
1337 while(1) {
1338 ret = find_first_extent_bit(pending_del, 0, &start, &end,
1339 EXTENT_LOCKED);
1340 if (ret)
1341 break;
1342 update_pinned_extents(extent_root, start, end + 1 - start, 1);
1343 clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
1344 GFP_NOFS);
1345 ret = __free_extent(trans, extent_root,
1346 start, end + 1 - start,
1347 extent_root->root_key.objectid,
1348 0, 0, 0, 0, 0);
1349 if (ret)
1350 err = ret;
1351 }
1352 return err;
1353 }
1354
1355 /*
1356 * remove an extent from the root, returns 0 on success
1357 */
1358 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1359 *root, u64 bytenr, u64 num_bytes,
1360 u64 root_objectid, u64 ref_generation,
1361 u64 owner_objectid, u64 owner_offset, int pin)
1362 {
1363 struct btrfs_root *extent_root = root->fs_info->extent_root;
1364 int pending_ret;
1365 int ret;
1366
1367 WARN_ON(num_bytes < root->sectorsize);
1368 if (!root->ref_cows)
1369 ref_generation = 0;
1370
1371 if (root == extent_root) {
1372 pin_down_bytes(root, bytenr, num_bytes, 1);
1373 return 0;
1374 }
1375 ret = __free_extent(trans, root, bytenr, num_bytes, root_objectid,
1376 ref_generation, owner_objectid, owner_offset,
1377 pin, pin == 0);
1378 pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
1379 return ret ? ret : pending_ret;
1380 }
1381
1382 static u64 stripe_align(struct btrfs_root *root, u64 val)
1383 {
1384 u64 mask = ((u64)root->stripesize - 1);
1385 u64 ret = (val + mask) & ~mask;
1386 return ret;
1387 }
1388
1389 /*
1390 * walks the btree of allocated extents and find a hole of a given size.
1391 * The key ins is changed to record the hole:
1392 * ins->objectid == block start
1393 * ins->flags = BTRFS_EXTENT_ITEM_KEY
1394 * ins->offset == number of blocks
1395 * Any available blocks before search_start are skipped.
1396 */
1397 static int noinline find_free_extent(struct btrfs_trans_handle *trans,
1398 struct btrfs_root *orig_root,
1399 u64 num_bytes, u64 empty_size,
1400 u64 search_start, u64 search_end,
1401 u64 hint_byte, struct btrfs_key *ins,
1402 u64 exclude_start, u64 exclude_nr,
1403 int data)
1404 {
1405 struct btrfs_path *path;
1406 struct btrfs_key key;
1407 u64 hole_size = 0;
1408 u64 aligned;
1409 int ret;
1410 int slot = 0;
1411 u64 last_byte = 0;
1412 u64 *last_ptr = NULL;
1413 u64 orig_search_start = search_start;
1414 int start_found;
1415 struct extent_buffer *l;
1416 struct btrfs_root * root = orig_root->fs_info->extent_root;
1417 struct btrfs_fs_info *info = root->fs_info;
1418 u64 total_needed = num_bytes;
1419 int level;
1420 struct btrfs_block_group_cache *block_group;
1421 int full_scan = 0;
1422 int wrapped = 0;
1423 u64 cached_start;
1424
1425 WARN_ON(num_bytes < root->sectorsize);
1426 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
1427
1428 level = btrfs_header_level(root->node);
1429
1430 if (num_bytes >= 32 * 1024 * 1024 && hint_byte) {
1431 data = BTRFS_BLOCK_GROUP_MIXED;
1432 }
1433
1434 if (!data)
1435 last_ptr = &root->fs_info->last_alloc;
1436
1437 if (data && btrfs_test_opt(root, SSD))
1438 last_ptr = &root->fs_info->last_data_alloc;
1439
1440 if (last_ptr) {
1441 if (*last_ptr)
1442 hint_byte = *last_ptr;
1443 else {
1444 hint_byte = hint_byte &
1445 ~((u64)BTRFS_BLOCK_GROUP_SIZE - 1);
1446 empty_size += 2 * 1024 * 1024;
1447 }
1448 }
1449
1450 search_end = min(search_end,
1451 btrfs_super_total_bytes(&info->super_copy));
1452 if (hint_byte) {
1453 block_group = btrfs_lookup_block_group(info, hint_byte);
1454 if (!block_group)
1455 hint_byte = search_start;
1456 block_group = btrfs_find_block_group(root, block_group,
1457 hint_byte, data, 1);
1458 } else {
1459 block_group = btrfs_find_block_group(root,
1460 trans->block_group,
1461 search_start, data, 1);
1462 }
1463
1464 total_needed += empty_size;
1465 path = btrfs_alloc_path();
1466 check_failed:
1467 if (!block_group) {
1468 block_group = btrfs_lookup_block_group(info, search_start);
1469 if (!block_group)
1470 block_group = btrfs_lookup_block_group(info,
1471 orig_search_start);
1472 }
1473 search_start = find_search_start(root, &block_group, search_start,
1474 total_needed, data);
1475
1476 if (last_ptr && *last_ptr && search_start != *last_ptr) {
1477 *last_ptr = 0;
1478 if (!empty_size) {
1479 empty_size += 16 * 1024 * 1024;
1480 total_needed += empty_size;
1481 }
1482 search_start = find_search_start(root, &block_group,
1483 search_start, total_needed,
1484 data);
1485 }
1486
1487 search_start = stripe_align(root, search_start);
1488 cached_start = search_start;
1489 btrfs_init_path(path);
1490 ins->objectid = search_start;
1491 ins->offset = 0;
1492 start_found = 0;
1493 path->reada = 2;
1494
1495 ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
1496 if (ret < 0)
1497 goto error;
1498 ret = find_previous_extent(root, path);
1499 if (ret < 0)
1500 goto error;
1501 l = path->nodes[0];
1502 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
1503 while (1) {
1504 l = path->nodes[0];
1505 slot = path->slots[0];
1506 if (slot >= btrfs_header_nritems(l)) {
1507 ret = btrfs_next_leaf(root, path);
1508 if (ret == 0)
1509 continue;
1510 if (ret < 0)
1511 goto error;
1512
1513 search_start = max(search_start,
1514 block_group->key.objectid);
1515 if (!start_found) {
1516 aligned = stripe_align(root, search_start);
1517 ins->objectid = aligned;
1518 if (aligned >= search_end) {
1519 ret = -ENOSPC;
1520 goto error;
1521 }
1522 ins->offset = search_end - aligned;
1523 start_found = 1;
1524 goto check_pending;
1525 }
1526 ins->objectid = stripe_align(root,
1527 last_byte > search_start ?
1528 last_byte : search_start);
1529 if (search_end <= ins->objectid) {
1530 ret = -ENOSPC;
1531 goto error;
1532 }
1533 ins->offset = search_end - ins->objectid;
1534 BUG_ON(ins->objectid >= search_end);
1535 goto check_pending;
1536 }
1537 btrfs_item_key_to_cpu(l, &key, slot);
1538
1539 if (key.objectid >= search_start && key.objectid > last_byte &&
1540 start_found) {
1541 if (last_byte < search_start)
1542 last_byte = search_start;
1543 aligned = stripe_align(root, last_byte);
1544 hole_size = key.objectid - aligned;
1545 if (key.objectid > aligned && hole_size >= num_bytes) {
1546 ins->objectid = aligned;
1547 ins->offset = hole_size;
1548 goto check_pending;
1549 }
1550 }
1551 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) {
1552 if (!start_found && btrfs_key_type(&key) ==
1553 BTRFS_BLOCK_GROUP_ITEM_KEY) {
1554 last_byte = key.objectid;
1555 start_found = 1;
1556 }
1557 goto next;
1558 }
1559
1560
1561 start_found = 1;
1562 last_byte = key.objectid + key.offset;
1563
1564 if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED &&
1565 last_byte >= block_group->key.objectid +
1566 block_group->key.offset) {
1567 btrfs_release_path(root, path);
1568 search_start = block_group->key.objectid +
1569 block_group->key.offset;
1570 goto new_group;
1571 }
1572 next:
1573 path->slots[0]++;
1574 cond_resched();
1575 }
1576 check_pending:
1577 /* we have to make sure we didn't find an extent that has already
1578 * been allocated by the map tree or the original allocation
1579 */
1580 btrfs_release_path(root, path);
1581 BUG_ON(ins->objectid < search_start);
1582
1583 if (ins->objectid + num_bytes >= search_end)
1584 goto enospc;
1585 if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED &&
1586 ins->objectid + num_bytes > block_group->
1587 key.objectid + block_group->key.offset) {
1588 search_start = block_group->key.objectid +
1589 block_group->key.offset;
1590 goto new_group;
1591 }
1592 if (test_range_bit(&info->extent_ins, ins->objectid,
1593 ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
1594 search_start = ins->objectid + num_bytes;
1595 goto new_group;
1596 }
1597 if (test_range_bit(&info->pinned_extents, ins->objectid,
1598 ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
1599 search_start = ins->objectid + num_bytes;
1600 goto new_group;
1601 }
1602 if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
1603 ins->objectid < exclude_start + exclude_nr)) {
1604 search_start = exclude_start + exclude_nr;
1605 goto new_group;
1606 }
1607 if (!data) {
1608 block_group = btrfs_lookup_block_group(info, ins->objectid);
1609 if (block_group)
1610 trans->block_group = block_group;
1611 }
1612 ins->offset = num_bytes;
1613 btrfs_free_path(path);
1614 if (last_ptr) {
1615 *last_ptr = ins->objectid + ins->offset;
1616 if (*last_ptr ==
1617 btrfs_super_total_bytes(&root->fs_info->super_copy)) {
1618 *last_ptr = 0;
1619 }
1620 }
1621 return 0;
1622
1623 new_group:
1624 if (search_start + num_bytes >= search_end) {
1625 enospc:
1626 search_start = orig_search_start;
1627 if (full_scan) {
1628 ret = -ENOSPC;
1629 goto error;
1630 }
1631 if (wrapped) {
1632 if (!full_scan)
1633 total_needed -= empty_size;
1634 full_scan = 1;
1635 data = BTRFS_BLOCK_GROUP_MIXED;
1636 } else
1637 wrapped = 1;
1638 }
1639 block_group = btrfs_lookup_block_group(info, search_start);
1640 cond_resched();
1641 block_group = btrfs_find_block_group(root, block_group,
1642 search_start, data, 0);
1643 goto check_failed;
1644
1645 error:
1646 btrfs_release_path(root, path);
1647 btrfs_free_path(path);
1648 return ret;
1649 }
1650 /*
1651 * finds a free extent and does all the dirty work required for allocation
1652 * returns the key for the extent through ins, and a tree buffer for
1653 * the first block of the extent through buf.
1654 *
1655 * returns 0 if everything worked, non-zero otherwise.
1656 */
1657 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1658 struct btrfs_root *root,
1659 u64 num_bytes, u64 root_objectid, u64 ref_generation,
1660 u64 owner, u64 owner_offset,
1661 u64 empty_size, u64 hint_byte,
1662 u64 search_end, struct btrfs_key *ins, int data)
1663 {
1664 int ret;
1665 int pending_ret;
1666 u64 super_used;
1667 u64 root_used;
1668 u64 search_start = 0;
1669 u64 new_hint;
1670 u32 sizes[2];
1671 struct btrfs_fs_info *info = root->fs_info;
1672 struct btrfs_root *extent_root = info->extent_root;
1673 struct btrfs_extent_item *extent_item;
1674 struct btrfs_extent_ref *ref;
1675 struct btrfs_path *path;
1676 struct btrfs_key keys[2];
1677
1678 new_hint = max(hint_byte, root->fs_info->alloc_start);
1679 if (new_hint < btrfs_super_total_bytes(&info->super_copy))
1680 hint_byte = new_hint;
1681
1682 WARN_ON(num_bytes < root->sectorsize);
1683 ret = find_free_extent(trans, root, num_bytes, empty_size,
1684 search_start, search_end, hint_byte, ins,
1685 trans->alloc_exclude_start,
1686 trans->alloc_exclude_nr, data);
1687 BUG_ON(ret);
1688 if (ret)
1689 return ret;
1690
1691 /* block accounting for super block */
1692 super_used = btrfs_super_bytes_used(&info->super_copy);
1693 btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
1694
1695 /* block accounting for root item */
1696 root_used = btrfs_root_used(&root->root_item);
1697 btrfs_set_root_used(&root->root_item, root_used + num_bytes);
1698
1699 clear_extent_dirty(&root->fs_info->free_space_cache,
1700 ins->objectid, ins->objectid + ins->offset - 1,
1701 GFP_NOFS);
1702
1703 if (root == extent_root) {
1704 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
1705 ins->objectid + ins->offset - 1,
1706 EXTENT_LOCKED, GFP_NOFS);
1707 WARN_ON(data == 1);
1708 goto update_block;
1709 }
1710
1711 WARN_ON(trans->alloc_exclude_nr);
1712 trans->alloc_exclude_start = ins->objectid;
1713 trans->alloc_exclude_nr = ins->offset;
1714
1715 memcpy(&keys[0], ins, sizeof(*ins));
1716 keys[1].offset = hash_extent_ref(root_objectid, ref_generation,
1717 owner, owner_offset);
1718 keys[1].objectid = ins->objectid;
1719 keys[1].type = BTRFS_EXTENT_REF_KEY;
1720 sizes[0] = sizeof(*extent_item);
1721 sizes[1] = sizeof(*ref);
1722
1723 path = btrfs_alloc_path();
1724 BUG_ON(!path);
1725
1726 ret = btrfs_insert_empty_items(trans, extent_root, path, keys,
1727 sizes, 2);
1728
1729 BUG_ON(ret);
1730 extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
1731 struct btrfs_extent_item);
1732 btrfs_set_extent_refs(path->nodes[0], extent_item, 1);
1733 ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
1734 struct btrfs_extent_ref);
1735
1736 btrfs_set_ref_root(path->nodes[0], ref, root_objectid);
1737 btrfs_set_ref_generation(path->nodes[0], ref, ref_generation);
1738 btrfs_set_ref_objectid(path->nodes[0], ref, owner);
1739 btrfs_set_ref_offset(path->nodes[0], ref, owner_offset);
1740
1741 btrfs_mark_buffer_dirty(path->nodes[0]);
1742
1743 trans->alloc_exclude_start = 0;
1744 trans->alloc_exclude_nr = 0;
1745 btrfs_free_path(path);
1746 finish_current_insert(trans, extent_root);
1747 pending_ret = del_pending_extents(trans, extent_root);
1748
1749 if (ret) {
1750 return ret;
1751 }
1752 if (pending_ret) {
1753 return pending_ret;
1754 }
1755
1756 update_block:
1757 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0,
1758 data);
1759 if (ret) {
1760 printk("update block group failed for %Lu %Lu\n",
1761 ins->objectid, ins->offset);
1762 BUG();
1763 }
1764 return 0;
1765 }
1766
1767 /*
1768 * helper function to allocate a block for a given tree
1769 * returns the tree buffer or NULL.
1770 */
1771 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1772 struct btrfs_root *root,
1773 u32 blocksize,
1774 u64 root_objectid, u64 hint,
1775 u64 empty_size)
1776 {
1777 u64 ref_generation;
1778
1779 if (root->ref_cows)
1780 ref_generation = trans->transid;
1781 else
1782 ref_generation = 0;
1783
1784
1785 return __btrfs_alloc_free_block(trans, root, blocksize, root_objectid,
1786 ref_generation, 0, 0, hint, empty_size);
1787 }
1788
1789 /*
1790 * helper function to allocate a block for a given tree
1791 * returns the tree buffer or NULL.
1792 */
1793 struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1794 struct btrfs_root *root,
1795 u32 blocksize,
1796 u64 root_objectid,
1797 u64 ref_generation,
1798 u64 first_objectid,
1799 int level,
1800 u64 hint,
1801 u64 empty_size)
1802 {
1803 struct btrfs_key ins;
1804 int ret;
1805 struct extent_buffer *buf;
1806
1807 ret = btrfs_alloc_extent(trans, root, blocksize,
1808 root_objectid, ref_generation,
1809 level, first_objectid, empty_size, hint,
1810 (u64)-1, &ins, 0);
1811 if (ret) {
1812 BUG_ON(ret > 0);
1813 return ERR_PTR(ret);
1814 }
1815 buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize);
1816 if (!buf) {
1817 btrfs_free_extent(trans, root, ins.objectid, blocksize,
1818 root->root_key.objectid, ref_generation,
1819 0, 0, 0);
1820 return ERR_PTR(-ENOMEM);
1821 }
1822 btrfs_set_header_generation(buf, trans->transid);
1823 clean_tree_block(trans, root, buf);
1824 wait_on_tree_block_writeback(root, buf);
1825 btrfs_set_buffer_uptodate(buf);
1826
1827 if (PageDirty(buf->first_page)) {
1828 printk("page %lu dirty\n", buf->first_page->index);
1829 WARN_ON(1);
1830 }
1831
1832 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
1833 buf->start + buf->len - 1, GFP_NOFS);
1834 set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->io_tree,
1835 buf->start, buf->start + buf->len - 1,
1836 EXTENT_CSUM, GFP_NOFS);
1837 buf->flags |= EXTENT_CSUM;
1838 btrfs_set_buffer_defrag(buf);
1839 trans->blocks_used++;
1840 return buf;
1841 }
1842
1843 static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
1844 struct btrfs_root *root,
1845 struct extent_buffer *leaf)
1846 {
1847 u64 leaf_owner;
1848 u64 leaf_generation;
1849 struct btrfs_key key;
1850 struct btrfs_file_extent_item *fi;
1851 int i;
1852 int nritems;
1853 int ret;
1854
1855 BUG_ON(!btrfs_is_leaf(leaf));
1856 nritems = btrfs_header_nritems(leaf);
1857 leaf_owner = btrfs_header_owner(leaf);
1858 leaf_generation = btrfs_header_generation(leaf);
1859
1860 for (i = 0; i < nritems; i++) {
1861 u64 disk_bytenr;
1862
1863 btrfs_item_key_to_cpu(leaf, &key, i);
1864 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1865 continue;
1866 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1867 if (btrfs_file_extent_type(leaf, fi) ==
1868 BTRFS_FILE_EXTENT_INLINE)
1869 continue;
1870 /*
1871 * FIXME make sure to insert a trans record that
1872 * repeats the snapshot del on crash
1873 */
1874 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1875 if (disk_bytenr == 0)
1876 continue;
1877 ret = btrfs_free_extent(trans, root, disk_bytenr,
1878 btrfs_file_extent_disk_num_bytes(leaf, fi),
1879 leaf_owner, leaf_generation,
1880 key.objectid, key.offset, 0);
1881 BUG_ON(ret);
1882 }
1883 return 0;
1884 }
1885
1886 static void noinline reada_walk_down(struct btrfs_root *root,
1887 struct extent_buffer *node,
1888 int slot)
1889 {
1890 u64 bytenr;
1891 u64 last = 0;
1892 u32 nritems;
1893 u32 refs;
1894 u32 blocksize;
1895 int ret;
1896 int i;
1897 int level;
1898 int skipped = 0;
1899
1900 nritems = btrfs_header_nritems(node);
1901 level = btrfs_header_level(node);
1902 if (level)
1903 return;
1904
1905 for (i = slot; i < nritems && skipped < 32; i++) {
1906 bytenr = btrfs_node_blockptr(node, i);
1907 if (last && ((bytenr > last && bytenr - last > 32 * 1024) ||
1908 (last > bytenr && last - bytenr > 32 * 1024))) {
1909 skipped++;
1910 continue;
1911 }
1912 blocksize = btrfs_level_size(root, level - 1);
1913 if (i != slot) {
1914 ret = lookup_extent_ref(NULL, root, bytenr,
1915 blocksize, &refs);
1916 BUG_ON(ret);
1917 if (refs != 1) {
1918 skipped++;
1919 continue;
1920 }
1921 }
1922 mutex_unlock(&root->fs_info->fs_mutex);
1923 ret = readahead_tree_block(root, bytenr, blocksize);
1924 last = bytenr + blocksize;
1925 cond_resched();
1926 mutex_lock(&root->fs_info->fs_mutex);
1927 if (ret)
1928 break;
1929 }
1930 }
1931
1932 /*
1933 * helper function for drop_snapshot, this walks down the tree dropping ref
1934 * counts as it goes.
1935 */
1936 static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
1937 struct btrfs_root *root,
1938 struct btrfs_path *path, int *level)
1939 {
1940 u64 root_owner;
1941 u64 root_gen;
1942 u64 bytenr;
1943 struct extent_buffer *next;
1944 struct extent_buffer *cur;
1945 struct extent_buffer *parent;
1946 u32 blocksize;
1947 int ret;
1948 u32 refs;
1949
1950 WARN_ON(*level < 0);
1951 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1952 ret = lookup_extent_ref(trans, root,
1953 path->nodes[*level]->start,
1954 path->nodes[*level]->len, &refs);
1955 BUG_ON(ret);
1956 if (refs > 1)
1957 goto out;
1958
1959 /*
1960 * walk down to the last node level and free all the leaves
1961 */
1962 while(*level >= 0) {
1963 WARN_ON(*level < 0);
1964 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1965 cur = path->nodes[*level];
1966
1967 if (btrfs_header_level(cur) != *level)
1968 WARN_ON(1);
1969
1970 if (path->slots[*level] >=
1971 btrfs_header_nritems(cur))
1972 break;
1973 if (*level == 0) {
1974 ret = drop_leaf_ref(trans, root, cur);
1975 BUG_ON(ret);
1976 break;
1977 }
1978 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1979 blocksize = btrfs_level_size(root, *level - 1);
1980 ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs);
1981 BUG_ON(ret);
1982 if (refs != 1) {
1983 parent = path->nodes[*level];
1984 root_owner = btrfs_header_owner(parent);
1985 root_gen = btrfs_header_generation(parent);
1986 path->slots[*level]++;
1987 ret = btrfs_free_extent(trans, root, bytenr,
1988 blocksize, root_owner,
1989 root_gen, 0, 0, 1);
1990 BUG_ON(ret);
1991 continue;
1992 }
1993 next = btrfs_find_tree_block(root, bytenr, blocksize);
1994 if (!next || !btrfs_buffer_uptodate(next)) {
1995 free_extent_buffer(next);
1996 reada_walk_down(root, cur, path->slots[*level]);
1997 mutex_unlock(&root->fs_info->fs_mutex);
1998 next = read_tree_block(root, bytenr, blocksize);
1999 mutex_lock(&root->fs_info->fs_mutex);
2000
2001 /* we dropped the lock, check one more time */
2002 ret = lookup_extent_ref(trans, root, bytenr,
2003 blocksize, &refs);
2004 BUG_ON(ret);
2005 if (refs != 1) {
2006 parent = path->nodes[*level];
2007 root_owner = btrfs_header_owner(parent);
2008 root_gen = btrfs_header_generation(parent);
2009
2010 path->slots[*level]++;
2011 free_extent_buffer(next);
2012 ret = btrfs_free_extent(trans, root, bytenr,
2013 blocksize,
2014 root_owner,
2015 root_gen, 0, 0, 1);
2016 BUG_ON(ret);
2017 continue;
2018 }
2019 }
2020 WARN_ON(*level <= 0);
2021 if (path->nodes[*level-1])
2022 free_extent_buffer(path->nodes[*level-1]);
2023 path->nodes[*level-1] = next;
2024 *level = btrfs_header_level(next);
2025 path->slots[*level] = 0;
2026 }
2027 out:
2028 WARN_ON(*level < 0);
2029 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2030
2031 if (path->nodes[*level] == root->node) {
2032 root_owner = root->root_key.objectid;
2033 parent = path->nodes[*level];
2034 } else {
2035 parent = path->nodes[*level + 1];
2036 root_owner = btrfs_header_owner(parent);
2037 }
2038
2039 root_gen = btrfs_header_generation(parent);
2040 ret = btrfs_free_extent(trans, root, path->nodes[*level]->start,
2041 path->nodes[*level]->len,
2042 root_owner, root_gen, 0, 0, 1);
2043 free_extent_buffer(path->nodes[*level]);
2044 path->nodes[*level] = NULL;
2045 *level += 1;
2046 BUG_ON(ret);
2047 return 0;
2048 }
2049
2050 /*
2051 * helper for dropping snapshots. This walks back up the tree in the path
2052 * to find the first node higher up where we haven't yet gone through
2053 * all the slots
2054 */
2055 static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
2056 struct btrfs_root *root,
2057 struct btrfs_path *path, int *level)
2058 {
2059 u64 root_owner;
2060 u64 root_gen;
2061 struct btrfs_root_item *root_item = &root->root_item;
2062 int i;
2063 int slot;
2064 int ret;
2065
2066 for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2067 slot = path->slots[i];
2068 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
2069 struct extent_buffer *node;
2070 struct btrfs_disk_key disk_key;
2071 node = path->nodes[i];
2072 path->slots[i]++;
2073 *level = i;
2074 WARN_ON(*level == 0);
2075 btrfs_node_key(node, &disk_key, path->slots[i]);
2076 memcpy(&root_item->drop_progress,
2077 &disk_key, sizeof(disk_key));
2078 root_item->drop_level = i;
2079 return 0;
2080 } else {
2081 if (path->nodes[*level] == root->node) {
2082 root_owner = root->root_key.objectid;
2083 root_gen =
2084 btrfs_header_generation(path->nodes[*level]);
2085 } else {
2086 struct extent_buffer *node;
2087 node = path->nodes[*level + 1];
2088 root_owner = btrfs_header_owner(node);
2089 root_gen = btrfs_header_generation(node);
2090 }
2091 ret = btrfs_free_extent(trans, root,
2092 path->nodes[*level]->start,
2093 path->nodes[*level]->len,
2094 root_owner, root_gen, 0, 0, 1);
2095 BUG_ON(ret);
2096 free_extent_buffer(path->nodes[*level]);
2097 path->nodes[*level] = NULL;
2098 *level = i + 1;
2099 }
2100 }
2101 return 1;
2102 }
2103
2104 /*
2105 * drop the reference count on the tree rooted at 'snap'. This traverses
2106 * the tree freeing any blocks that have a ref count of zero after being
2107 * decremented.
2108 */
2109 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
2110 *root)
2111 {
2112 int ret = 0;
2113 int wret;
2114 int level;
2115 struct btrfs_path *path;
2116 int i;
2117 int orig_level;
2118 struct btrfs_root_item *root_item = &root->root_item;
2119
2120 path = btrfs_alloc_path();
2121 BUG_ON(!path);
2122
2123 level = btrfs_header_level(root->node);
2124 orig_level = level;
2125 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
2126 path->nodes[level] = root->node;
2127 extent_buffer_get(root->node);
2128 path->slots[level] = 0;
2129 } else {
2130 struct btrfs_key key;
2131 struct btrfs_disk_key found_key;
2132 struct extent_buffer *node;
2133
2134 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
2135 level = root_item->drop_level;
2136 path->lowest_level = level;
2137 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2138 if (wret < 0) {
2139 ret = wret;
2140 goto out;
2141 }
2142 node = path->nodes[level];
2143 btrfs_node_key(node, &found_key, path->slots[level]);
2144 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
2145 sizeof(found_key)));
2146 }
2147 while(1) {
2148 wret = walk_down_tree(trans, root, path, &level);
2149 if (wret > 0)
2150 break;
2151 if (wret < 0)
2152 ret = wret;
2153
2154 wret = walk_up_tree(trans, root, path, &level);
2155 if (wret > 0)
2156 break;
2157 if (wret < 0)
2158 ret = wret;
2159 ret = -EAGAIN;
2160 break;
2161 }
2162 for (i = 0; i <= orig_level; i++) {
2163 if (path->nodes[i]) {
2164 free_extent_buffer(path->nodes[i]);
2165 path->nodes[i] = NULL;
2166 }
2167 }
2168 out:
2169 btrfs_free_path(path);
2170 return ret;
2171 }
2172
2173 int btrfs_free_block_groups(struct btrfs_fs_info *info)
2174 {
2175 u64 start;
2176 u64 end;
2177 u64 ptr;
2178 int ret;
2179 while(1) {
2180 ret = find_first_extent_bit(&info->block_group_cache, 0,
2181 &start, &end, (unsigned int)-1);
2182 if (ret)
2183 break;
2184 ret = get_state_private(&info->block_group_cache, start, &ptr);
2185 if (!ret)
2186 kfree((void *)(unsigned long)ptr);
2187 clear_extent_bits(&info->block_group_cache, start,
2188 end, (unsigned int)-1, GFP_NOFS);
2189 }
2190 while(1) {
2191 ret = find_first_extent_bit(&info->free_space_cache, 0,
2192 &start, &end, EXTENT_DIRTY);
2193 if (ret)
2194 break;
2195 clear_extent_dirty(&info->free_space_cache, start,
2196 end, GFP_NOFS);
2197 }
2198 return 0;
2199 }
2200
2201 static int noinline relocate_inode_pages(struct inode *inode, u64 start,
2202 u64 len)
2203 {
2204 u64 page_start;
2205 u64 page_end;
2206 u64 delalloc_start;
2207 u64 existing_delalloc;
2208 unsigned long last_index;
2209 unsigned long i;
2210 struct page *page;
2211 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2212 struct file_ra_state *ra;
2213
2214 ra = kzalloc(sizeof(*ra), GFP_NOFS);
2215
2216 mutex_lock(&inode->i_mutex);
2217 i = start >> PAGE_CACHE_SHIFT;
2218 last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
2219
2220 file_ra_state_init(ra, inode->i_mapping);
2221 btrfs_force_ra(inode->i_mapping, ra, NULL, i, last_index);
2222 kfree(ra);
2223
2224 for (; i <= last_index; i++) {
2225 page = grab_cache_page(inode->i_mapping, i);
2226 if (!page)
2227 goto out_unlock;
2228 if (!PageUptodate(page)) {
2229 btrfs_readpage(NULL, page);
2230 lock_page(page);
2231 if (!PageUptodate(page)) {
2232 unlock_page(page);
2233 page_cache_release(page);
2234 goto out_unlock;
2235 }
2236 }
2237 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2238 page_end = page_start + PAGE_CACHE_SIZE - 1;
2239
2240 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
2241
2242 delalloc_start = page_start;
2243 existing_delalloc = count_range_bits(io_tree,
2244 &delalloc_start, page_end,
2245 PAGE_CACHE_SIZE, EXTENT_DELALLOC);
2246
2247 set_extent_delalloc(io_tree, page_start,
2248 page_end, GFP_NOFS);
2249
2250 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2251 set_page_dirty(page);
2252 unlock_page(page);
2253 page_cache_release(page);
2254 }
2255
2256 out_unlock:
2257 mutex_unlock(&inode->i_mutex);
2258 return 0;
2259 }
2260
2261 /*
2262 * note, this releases the path
2263 */
2264 static int noinline relocate_one_reference(struct btrfs_root *extent_root,
2265 struct btrfs_path *path,
2266 struct btrfs_key *extent_key)
2267 {
2268 struct inode *inode;
2269 struct btrfs_root *found_root;
2270 struct btrfs_key *root_location;
2271 struct btrfs_extent_ref *ref;
2272 u64 ref_root;
2273 u64 ref_gen;
2274 u64 ref_objectid;
2275 u64 ref_offset;
2276 int ret;
2277
2278 ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
2279 struct btrfs_extent_ref);
2280 ref_root = btrfs_ref_root(path->nodes[0], ref);
2281 ref_gen = btrfs_ref_generation(path->nodes[0], ref);
2282 ref_objectid = btrfs_ref_objectid(path->nodes[0], ref);
2283 ref_offset = btrfs_ref_offset(path->nodes[0], ref);
2284 btrfs_release_path(extent_root, path);
2285
2286 root_location = kmalloc(sizeof(*root_location), GFP_NOFS);
2287 root_location->objectid = ref_root;
2288 if (ref_gen == 0)
2289 root_location->offset = 0;
2290 else
2291 root_location->offset = (u64)-1;
2292 root_location->type = BTRFS_ROOT_ITEM_KEY;
2293
2294 found_root = btrfs_read_fs_root_no_name(extent_root->fs_info,
2295 root_location);
2296 BUG_ON(!found_root);
2297 kfree(root_location);
2298
2299 if (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
2300 mutex_unlock(&extent_root->fs_info->fs_mutex);
2301 inode = btrfs_iget_locked(extent_root->fs_info->sb,
2302 ref_objectid, found_root);
2303 if (inode->i_state & I_NEW) {
2304 /* the inode and parent dir are two different roots */
2305 BTRFS_I(inode)->root = found_root;
2306 BTRFS_I(inode)->location.objectid = ref_objectid;
2307 BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
2308 BTRFS_I(inode)->location.offset = 0;
2309 btrfs_read_locked_inode(inode);
2310 unlock_new_inode(inode);
2311
2312 }
2313 /* this can happen if the reference is not against
2314 * the latest version of the tree root
2315 */
2316 if (is_bad_inode(inode)) {
2317 mutex_lock(&extent_root->fs_info->fs_mutex);
2318 goto out;
2319 }
2320 relocate_inode_pages(inode, ref_offset, extent_key->offset);
2321 /* FIXME, data=ordered will help get rid of this */
2322 filemap_fdatawrite(inode->i_mapping);
2323 iput(inode);
2324 mutex_lock(&extent_root->fs_info->fs_mutex);
2325 } else {
2326 struct btrfs_trans_handle *trans;
2327 struct btrfs_key found_key;
2328 struct extent_buffer *eb;
2329 int level;
2330 int i;
2331
2332 trans = btrfs_start_transaction(found_root, 1);
2333 eb = read_tree_block(found_root, extent_key->objectid,
2334 extent_key->offset);
2335 level = btrfs_header_level(eb);
2336
2337 if (level == 0)
2338 btrfs_item_key_to_cpu(eb, &found_key, 0);
2339 else
2340 btrfs_node_key_to_cpu(eb, &found_key, 0);
2341
2342 free_extent_buffer(eb);
2343
2344 path->lowest_level = level;
2345 path->reada = 2;
2346 ret = btrfs_search_slot(trans, found_root, &found_key, path,
2347 0, 1);
2348 path->lowest_level = 0;
2349 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
2350 if (!path->nodes[i])
2351 break;
2352 free_extent_buffer(path->nodes[i]);
2353 path->nodes[i] = NULL;
2354 }
2355 btrfs_release_path(found_root, path);
2356 btrfs_end_transaction(trans, found_root);
2357 }
2358
2359 out:
2360 return 0;
2361 }
2362
2363 static int noinline relocate_one_extent(struct btrfs_root *extent_root,
2364 struct btrfs_path *path,
2365 struct btrfs_key *extent_key)
2366 {
2367 struct btrfs_key key;
2368 struct btrfs_key found_key;
2369 struct extent_buffer *leaf;
2370 u32 nritems;
2371 u32 item_size;
2372 int ret = 0;
2373
2374 key.objectid = extent_key->objectid;
2375 key.type = BTRFS_EXTENT_REF_KEY;
2376 key.offset = 0;
2377
2378 while(1) {
2379 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2380
2381 if (ret < 0)
2382 goto out;
2383
2384 ret = 0;
2385 leaf = path->nodes[0];
2386 nritems = btrfs_header_nritems(leaf);
2387 if (path->slots[0] == nritems)
2388 goto out;
2389
2390 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2391 if (found_key.objectid != extent_key->objectid)
2392 break;
2393
2394 if (found_key.type != BTRFS_EXTENT_REF_KEY)
2395 break;
2396
2397 key.offset = found_key.offset + 1;
2398 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2399
2400 ret = relocate_one_reference(extent_root, path, extent_key);
2401 if (ret)
2402 goto out;
2403 }
2404 ret = 0;
2405 out:
2406 btrfs_release_path(extent_root, path);
2407 return ret;
2408 }
2409
2410 int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size)
2411 {
2412 struct btrfs_trans_handle *trans;
2413 struct btrfs_root *tree_root = root->fs_info->tree_root;
2414 struct btrfs_path *path;
2415 u64 cur_byte;
2416 u64 total_found;
2417 struct btrfs_fs_info *info = root->fs_info;
2418 struct extent_io_tree *block_group_cache;
2419 struct btrfs_key key;
2420 struct btrfs_key found_key;
2421 struct extent_buffer *leaf;
2422 u32 nritems;
2423 int ret;
2424 int progress = 0;
2425
2426 btrfs_set_super_total_bytes(&info->super_copy, new_size);
2427 clear_extent_dirty(&info->free_space_cache, new_size, (u64)-1,
2428 GFP_NOFS);
2429 block_group_cache = &info->block_group_cache;
2430 path = btrfs_alloc_path();
2431 root = root->fs_info->extent_root;
2432 path->reada = 2;
2433
2434 again:
2435 total_found = 0;
2436 key.objectid = new_size;
2437 key.offset = 0;
2438 key.type = 0;
2439 cur_byte = key.objectid;
2440
2441 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2442 if (ret < 0)
2443 goto out;
2444
2445 ret = find_previous_extent(root, path);
2446 if (ret < 0)
2447 goto out;
2448 if (ret == 0) {
2449 leaf = path->nodes[0];
2450 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2451 if (found_key.objectid + found_key.offset > new_size) {
2452 cur_byte = found_key.objectid;
2453 key.objectid = cur_byte;
2454 }
2455 }
2456 btrfs_release_path(root, path);
2457
2458 while(1) {
2459 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2460 if (ret < 0)
2461 goto out;
2462
2463 leaf = path->nodes[0];
2464 nritems = btrfs_header_nritems(leaf);
2465 next:
2466 if (path->slots[0] >= nritems) {
2467 ret = btrfs_next_leaf(root, path);
2468 if (ret < 0)
2469 goto out;
2470 if (ret == 1) {
2471 ret = 0;
2472 break;
2473 }
2474 leaf = path->nodes[0];
2475 nritems = btrfs_header_nritems(leaf);
2476 }
2477
2478 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2479
2480 if (progress && need_resched()) {
2481 memcpy(&key, &found_key, sizeof(key));
2482 mutex_unlock(&root->fs_info->fs_mutex);
2483 cond_resched();
2484 mutex_lock(&root->fs_info->fs_mutex);
2485 btrfs_release_path(root, path);
2486 btrfs_search_slot(NULL, root, &key, path, 0, 0);
2487 progress = 0;
2488 goto next;
2489 }
2490 progress = 1;
2491
2492 if (btrfs_key_type(&found_key) != BTRFS_EXTENT_ITEM_KEY ||
2493 found_key.objectid + found_key.offset <= cur_byte) {
2494 path->slots[0]++;
2495 goto next;
2496 }
2497
2498 total_found++;
2499 cur_byte = found_key.objectid + found_key.offset;
2500 key.objectid = cur_byte;
2501 btrfs_release_path(root, path);
2502 ret = relocate_one_extent(root, path, &found_key);
2503 }
2504
2505 btrfs_release_path(root, path);
2506
2507 if (total_found > 0) {
2508 trans = btrfs_start_transaction(tree_root, 1);
2509 btrfs_commit_transaction(trans, tree_root);
2510
2511 mutex_unlock(&root->fs_info->fs_mutex);
2512 btrfs_clean_old_snapshots(tree_root);
2513 mutex_lock(&root->fs_info->fs_mutex);
2514
2515 trans = btrfs_start_transaction(tree_root, 1);
2516 btrfs_commit_transaction(trans, tree_root);
2517 goto again;
2518 }
2519
2520 trans = btrfs_start_transaction(root, 1);
2521 key.objectid = new_size;
2522 key.offset = 0;
2523 key.type = 0;
2524 while(1) {
2525 u64 ptr;
2526
2527 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2528 if (ret < 0)
2529 goto out;
2530
2531 leaf = path->nodes[0];
2532 nritems = btrfs_header_nritems(leaf);
2533 bg_next:
2534 if (path->slots[0] >= nritems) {
2535 ret = btrfs_next_leaf(root, path);
2536 if (ret < 0)
2537 break;
2538 if (ret == 1) {
2539 ret = 0;
2540 break;
2541 }
2542 leaf = path->nodes[0];
2543 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2544
2545 /*
2546 * btrfs_next_leaf doesn't cow buffers, we have to
2547 * do the search again
2548 */
2549 memcpy(&key, &found_key, sizeof(key));
2550 btrfs_release_path(root, path);
2551 goto resched_check;
2552 }
2553
2554 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2555 if (btrfs_key_type(&found_key) != BTRFS_BLOCK_GROUP_ITEM_KEY) {
2556 printk("shrinker found key %Lu %u %Lu\n",
2557 found_key.objectid, found_key.type,
2558 found_key.offset);
2559 path->slots[0]++;
2560 goto bg_next;
2561 }
2562 ret = get_state_private(&info->block_group_cache,
2563 found_key.objectid, &ptr);
2564 if (!ret)
2565 kfree((void *)(unsigned long)ptr);
2566
2567 clear_extent_bits(&info->block_group_cache, found_key.objectid,
2568 found_key.objectid + found_key.offset - 1,
2569 (unsigned int)-1, GFP_NOFS);
2570
2571 key.objectid = found_key.objectid + 1;
2572 btrfs_del_item(trans, root, path);
2573 btrfs_release_path(root, path);
2574 resched_check:
2575 if (need_resched()) {
2576 mutex_unlock(&root->fs_info->fs_mutex);
2577 cond_resched();
2578 mutex_lock(&root->fs_info->fs_mutex);
2579 }
2580 }
2581 clear_extent_dirty(&info->free_space_cache, new_size, (u64)-1,
2582 GFP_NOFS);
2583 btrfs_commit_transaction(trans, root);
2584 out:
2585 btrfs_free_path(path);
2586 return ret;
2587 }
2588
2589 int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans,
2590 struct btrfs_root *root, u64 new_size)
2591 {
2592 struct btrfs_path *path;
2593 u64 nr = 0;
2594 u64 cur_byte;
2595 u64 old_size;
2596 unsigned long rem;
2597 struct btrfs_block_group_cache *cache;
2598 struct btrfs_block_group_item *item;
2599 struct btrfs_fs_info *info = root->fs_info;
2600 struct extent_io_tree *block_group_cache;
2601 struct btrfs_key key;
2602 struct extent_buffer *leaf;
2603 int ret;
2604 int bit;
2605
2606 old_size = btrfs_super_total_bytes(&info->super_copy);
2607 block_group_cache = &info->block_group_cache;
2608
2609 root = info->extent_root;
2610
2611 cache = btrfs_lookup_block_group(root->fs_info, old_size - 1);
2612
2613 cur_byte = cache->key.objectid + cache->key.offset;
2614 if (cur_byte >= new_size)
2615 goto set_size;
2616
2617 key.offset = BTRFS_BLOCK_GROUP_SIZE;
2618 btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
2619
2620 path = btrfs_alloc_path();
2621 if (!path)
2622 return -ENOMEM;
2623
2624 while(cur_byte < new_size) {
2625 key.objectid = cur_byte;
2626 ret = btrfs_insert_empty_item(trans, root, path, &key,
2627 sizeof(struct btrfs_block_group_item));
2628 BUG_ON(ret);
2629 leaf = path->nodes[0];
2630 item = btrfs_item_ptr(leaf, path->slots[0],
2631 struct btrfs_block_group_item);
2632
2633 btrfs_set_disk_block_group_used(leaf, item, 0);
2634 div_long_long_rem(nr, 3, &rem);
2635 if (rem) {
2636 btrfs_set_disk_block_group_flags(leaf, item,
2637 BTRFS_BLOCK_GROUP_DATA);
2638 } else {
2639 btrfs_set_disk_block_group_flags(leaf, item, 0);
2640 }
2641 nr++;
2642
2643 cache = kmalloc(sizeof(*cache), GFP_NOFS);
2644 BUG_ON(!cache);
2645
2646 read_extent_buffer(leaf, &cache->item, (unsigned long)item,
2647 sizeof(cache->item));
2648
2649 memcpy(&cache->key, &key, sizeof(key));
2650 cache->cached = 0;
2651 cache->pinned = 0;
2652 cur_byte = key.objectid + key.offset;
2653 btrfs_release_path(root, path);
2654
2655 if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) {
2656 bit = BLOCK_GROUP_DATA;
2657 cache->data = BTRFS_BLOCK_GROUP_DATA;
2658 } else {
2659 bit = BLOCK_GROUP_METADATA;
2660 cache->data = 0;
2661 }
2662
2663 /* use EXTENT_LOCKED to prevent merging */
2664 set_extent_bits(block_group_cache, key.objectid,
2665 key.objectid + key.offset - 1,
2666 bit | EXTENT_LOCKED, GFP_NOFS);
2667 set_state_private(block_group_cache, key.objectid,
2668 (unsigned long)cache);
2669 }
2670 btrfs_free_path(path);
2671 set_size:
2672 btrfs_set_super_total_bytes(&info->super_copy, new_size);
2673 return 0;
2674 }
2675
2676 int btrfs_read_block_groups(struct btrfs_root *root)
2677 {
2678 struct btrfs_path *path;
2679 int ret;
2680 int err = 0;
2681 int bit;
2682 struct btrfs_block_group_cache *cache;
2683 struct btrfs_fs_info *info = root->fs_info;
2684 struct extent_io_tree *block_group_cache;
2685 struct btrfs_key key;
2686 struct btrfs_key found_key;
2687 struct extent_buffer *leaf;
2688
2689 block_group_cache = &info->block_group_cache;
2690
2691 root = info->extent_root;
2692 key.objectid = 0;
2693 key.offset = BTRFS_BLOCK_GROUP_SIZE;
2694 btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
2695
2696 path = btrfs_alloc_path();
2697 if (!path)
2698 return -ENOMEM;
2699
2700 while(1) {
2701 ret = btrfs_search_slot(NULL, info->extent_root,
2702 &key, path, 0, 0);
2703 if (ret != 0) {
2704 err = ret;
2705 break;
2706 }
2707 leaf = path->nodes[0];
2708 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2709 cache = kmalloc(sizeof(*cache), GFP_NOFS);
2710 if (!cache) {
2711 err = -1;
2712 break;
2713 }
2714
2715 read_extent_buffer(leaf, &cache->item,
2716 btrfs_item_ptr_offset(leaf, path->slots[0]),
2717 sizeof(cache->item));
2718 memcpy(&cache->key, &found_key, sizeof(found_key));
2719 cache->cached = 0;
2720 cache->pinned = 0;
2721 key.objectid = found_key.objectid + found_key.offset;
2722 btrfs_release_path(root, path);
2723
2724 if (cache->item.flags & BTRFS_BLOCK_GROUP_MIXED) {
2725 bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
2726 cache->data = BTRFS_BLOCK_GROUP_MIXED;
2727 } else if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) {
2728 bit = BLOCK_GROUP_DATA;
2729 cache->data = BTRFS_BLOCK_GROUP_DATA;
2730 } else {
2731 bit = BLOCK_GROUP_METADATA;
2732 cache->data = 0;
2733 }
2734
2735 /* use EXTENT_LOCKED to prevent merging */
2736 set_extent_bits(block_group_cache, found_key.objectid,
2737 found_key.objectid + found_key.offset - 1,
2738 bit | EXTENT_LOCKED, GFP_NOFS);
2739 set_state_private(block_group_cache, found_key.objectid,
2740 (unsigned long)cache);
2741
2742 if (key.objectid >=
2743 btrfs_super_total_bytes(&info->super_copy))
2744 break;
2745 }
2746
2747 btrfs_free_path(path);
2748 return 0;
2749 }