]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blame - fs/btrfs/relocation.c
btrfs: backref: rename and move backref_cache_init()
[mirror_ubuntu-hirsute-kernel.git] / fs / btrfs / relocation.c
CommitLineData
c1d7c514 1// SPDX-License-Identifier: GPL-2.0
5d4f98a2
YZ
2/*
3 * Copyright (C) 2009 Oracle. All rights reserved.
5d4f98a2
YZ
4 */
5
6#include <linux/sched.h>
7#include <linux/pagemap.h>
8#include <linux/writeback.h>
9#include <linux/blkdev.h>
10#include <linux/rbtree.h>
5a0e3ad6 11#include <linux/slab.h>
726a3421 12#include <linux/error-injection.h>
5d4f98a2
YZ
13#include "ctree.h"
14#include "disk-io.h"
15#include "transaction.h"
16#include "volumes.h"
17#include "locking.h"
18#include "btrfs_inode.h"
19#include "async-thread.h"
0af3d00b 20#include "free-space-cache.h"
581bb050 21#include "inode-map.h"
62b99540 22#include "qgroup.h"
cdccee99 23#include "print-tree.h"
86736342 24#include "delalloc-space.h"
aac0023c 25#include "block-group.h"
19b546d7 26#include "backref.h"
e9a28dc5 27#include "misc.h"
5d4f98a2 28
0c891389
QW
29/*
30 * Relocation overview
31 *
32 * [What does relocation do]
33 *
34 * The objective of relocation is to relocate all extents of the target block
35 * group to other block groups.
36 * This is utilized by resize (shrink only), profile converting, compacting
37 * space, or balance routine to spread chunks over devices.
38 *
39 * Before | After
40 * ------------------------------------------------------------------
41 * BG A: 10 data extents | BG A: deleted
42 * BG B: 2 data extents | BG B: 10 data extents (2 old + 8 relocated)
43 * BG C: 1 extents | BG C: 3 data extents (1 old + 2 relocated)
44 *
45 * [How does relocation work]
46 *
47 * 1. Mark the target block group read-only
48 * New extents won't be allocated from the target block group.
49 *
50 * 2.1 Record each extent in the target block group
51 * To build a proper map of extents to be relocated.
52 *
53 * 2.2 Build data reloc tree and reloc trees
54 * Data reloc tree will contain an inode, recording all newly relocated
55 * data extents.
56 * There will be only one data reloc tree for one data block group.
57 *
58 * Reloc tree will be a special snapshot of its source tree, containing
59 * relocated tree blocks.
60 * Each tree referring to a tree block in target block group will get its
61 * reloc tree built.
62 *
63 * 2.3 Swap source tree with its corresponding reloc tree
64 * Each involved tree only refers to new extents after swap.
65 *
66 * 3. Cleanup reloc trees and data reloc tree.
67 * As old extents in the target block group are still referenced by reloc
68 * trees, we need to clean them up before really freeing the target block
69 * group.
70 *
71 * The main complexity is in steps 2.2 and 2.3.
72 *
73 * The entry point of relocation is relocate_block_group() function.
74 */
75
2a979612 76#define RELOCATION_RESERVED_NODES 256
5d4f98a2
YZ
77/*
78 * map address of tree root to tree
79 */
80struct mapping_node {
e9a28dc5
QW
81 struct {
82 struct rb_node rb_node;
83 u64 bytenr;
84 }; /* Use rb_simle_node for search/insert */
5d4f98a2
YZ
85 void *data;
86};
87
88struct mapping_tree {
89 struct rb_root rb_root;
90 spinlock_t lock;
91};
92
93/*
94 * present a tree block to process
95 */
96struct tree_block {
e9a28dc5
QW
97 struct {
98 struct rb_node rb_node;
99 u64 bytenr;
100 }; /* Use rb_simple_node for search/insert */
5d4f98a2
YZ
101 struct btrfs_key key;
102 unsigned int level:8;
103 unsigned int key_ready:1;
104};
105
0257bb82
YZ
106#define MAX_EXTENTS 128
107
108struct file_extent_cluster {
109 u64 start;
110 u64 end;
111 u64 boundary[MAX_EXTENTS];
112 unsigned int nr;
113};
114
5d4f98a2
YZ
115struct reloc_control {
116 /* block group to relocate */
32da5386 117 struct btrfs_block_group *block_group;
5d4f98a2
YZ
118 /* extent tree */
119 struct btrfs_root *extent_root;
120 /* inode for moving data */
121 struct inode *data_inode;
3fd0a558
YZ
122
123 struct btrfs_block_rsv *block_rsv;
124
a26195a5 125 struct btrfs_backref_cache backref_cache;
3fd0a558
YZ
126
127 struct file_extent_cluster cluster;
5d4f98a2
YZ
128 /* tree blocks have been processed */
129 struct extent_io_tree processed_blocks;
130 /* map start of tree root to corresponding reloc tree */
131 struct mapping_tree reloc_root_tree;
132 /* list of reloc trees */
133 struct list_head reloc_roots;
d2311e69
QW
134 /* list of subvolume trees that get relocated */
135 struct list_head dirty_subvol_roots;
3fd0a558
YZ
136 /* size of metadata reservation for merging reloc trees */
137 u64 merging_rsv_size;
138 /* size of relocated tree nodes */
139 u64 nodes_relocated;
0647bf56
WS
140 /* reserved size for block group relocation*/
141 u64 reserved_bytes;
3fd0a558 142
5d4f98a2
YZ
143 u64 search_start;
144 u64 extents_found;
3fd0a558 145
3fd0a558
YZ
146 unsigned int stage:8;
147 unsigned int create_reloc_tree:1;
148 unsigned int merge_reloc_tree:1;
5d4f98a2 149 unsigned int found_file_extent:1;
5d4f98a2
YZ
150};
151
152/* stages of data relocation */
153#define MOVE_DATA_EXTENTS 0
154#define UPDATE_DATA_PTRS 1
155
a26195a5
QW
156static void remove_backref_node(struct btrfs_backref_cache *cache,
157 struct btrfs_backref_node *node);
9569cc20
QW
158
159static void mark_block_processed(struct reloc_control *rc,
a26195a5 160 struct btrfs_backref_node *node)
9569cc20
QW
161{
162 u32 blocksize;
163
164 if (node->level == 0 ||
165 in_range(node->bytenr, rc->block_group->start,
166 rc->block_group->length)) {
167 blocksize = rc->extent_root->fs_info->nodesize;
168 set_extent_bits(&rc->processed_blocks, node->bytenr,
169 node->bytenr + blocksize - 1, EXTENT_DIRTY);
170 }
171 node->processed = 1;
172}
173
5d4f98a2
YZ
174
175static void mapping_tree_init(struct mapping_tree *tree)
176{
6bef4d31 177 tree->rb_root = RB_ROOT;
5d4f98a2
YZ
178 spin_lock_init(&tree->lock);
179}
180
a26195a5 181static void backref_cache_cleanup(struct btrfs_backref_cache *cache)
3fd0a558 182{
a26195a5 183 struct btrfs_backref_node *node;
3fd0a558
YZ
184 int i;
185
186 while (!list_empty(&cache->detached)) {
187 node = list_entry(cache->detached.next,
a26195a5 188 struct btrfs_backref_node, list);
3fd0a558
YZ
189 remove_backref_node(cache, node);
190 }
191
192 while (!list_empty(&cache->leaves)) {
193 node = list_entry(cache->leaves.next,
a26195a5 194 struct btrfs_backref_node, lower);
3fd0a558
YZ
195 remove_backref_node(cache, node);
196 }
197
198 cache->last_trans = 0;
199
200 for (i = 0; i < BTRFS_MAX_LEVEL; i++)
f4907095 201 ASSERT(list_empty(&cache->pending[i]));
84780289
QW
202 ASSERT(list_empty(&cache->pending_edge));
203 ASSERT(list_empty(&cache->useless_node));
f4907095
LB
204 ASSERT(list_empty(&cache->changed));
205 ASSERT(list_empty(&cache->detached));
206 ASSERT(RB_EMPTY_ROOT(&cache->rb_root));
207 ASSERT(!cache->nr_nodes);
208 ASSERT(!cache->nr_edges);
3fd0a558
YZ
209}
210
a26195a5
QW
211static struct btrfs_backref_node *alloc_backref_node(
212 struct btrfs_backref_cache *cache, u64 bytenr, int level)
3fd0a558 213{
a26195a5 214 struct btrfs_backref_node *node;
3fd0a558 215
0304f2d8 216 ASSERT(level >= 0 && level < BTRFS_MAX_LEVEL);
3fd0a558 217 node = kzalloc(sizeof(*node), GFP_NOFS);
0304f2d8
QW
218 if (!node)
219 return node;
220
221 INIT_LIST_HEAD(&node->list);
222 INIT_LIST_HEAD(&node->upper);
223 INIT_LIST_HEAD(&node->lower);
224 RB_CLEAR_NODE(&node->rb_node);
225 cache->nr_nodes++;
226 node->level = level;
227 node->bytenr = bytenr;
228
3fd0a558
YZ
229 return node;
230}
231
a26195a5
QW
232static void free_backref_node(struct btrfs_backref_cache *cache,
233 struct btrfs_backref_node *node)
3fd0a558
YZ
234{
235 if (node) {
236 cache->nr_nodes--;
00246528 237 btrfs_put_root(node->root);
3fd0a558
YZ
238 kfree(node);
239 }
240}
241
a26195a5
QW
242static struct btrfs_backref_edge *alloc_backref_edge(
243 struct btrfs_backref_cache *cache)
3fd0a558 244{
a26195a5 245 struct btrfs_backref_edge *edge;
3fd0a558
YZ
246
247 edge = kzalloc(sizeof(*edge), GFP_NOFS);
248 if (edge)
249 cache->nr_edges++;
250 return edge;
5d4f98a2
YZ
251}
252
2a979612
QW
253#define LINK_LOWER (1 << 0)
254#define LINK_UPPER (1 << 1)
a26195a5
QW
255static void link_backref_edge(struct btrfs_backref_edge *edge,
256 struct btrfs_backref_node *lower,
257 struct btrfs_backref_node *upper,
2a979612
QW
258 int link_which)
259{
260 ASSERT(upper && lower && upper->level == lower->level + 1);
261 edge->node[LOWER] = lower;
262 edge->node[UPPER] = upper;
263 if (link_which & LINK_LOWER)
264 list_add_tail(&edge->list[LOWER], &lower->upper);
265 if (link_which & LINK_UPPER)
266 list_add_tail(&edge->list[UPPER], &upper->lower);
267}
268
a26195a5
QW
269static void free_backref_edge(struct btrfs_backref_cache *cache,
270 struct btrfs_backref_edge *edge)
5d4f98a2 271{
3fd0a558
YZ
272 if (edge) {
273 cache->nr_edges--;
274 kfree(edge);
275 }
5d4f98a2
YZ
276}
277
48a3b636 278static void backref_tree_panic(struct rb_node *rb_node, int errno, u64 bytenr)
43c04fb1
JM
279{
280
281 struct btrfs_fs_info *fs_info = NULL;
a26195a5
QW
282 struct btrfs_backref_node *bnode = rb_entry(rb_node,
283 struct btrfs_backref_node, rb_node);
43c04fb1
JM
284 if (bnode->root)
285 fs_info = bnode->root->fs_info;
5d163e0e
JM
286 btrfs_panic(fs_info, errno,
287 "Inconsistency in backref cache found at offset %llu",
288 bytenr);
43c04fb1
JM
289}
290
5d4f98a2
YZ
291/*
292 * walk up backref nodes until reach node presents tree root
293 */
a26195a5
QW
294static struct btrfs_backref_node *walk_up_backref(
295 struct btrfs_backref_node *node,
296 struct btrfs_backref_edge *edges[], int *index)
5d4f98a2 297{
a26195a5 298 struct btrfs_backref_edge *edge;
5d4f98a2
YZ
299 int idx = *index;
300
301 while (!list_empty(&node->upper)) {
302 edge = list_entry(node->upper.next,
a26195a5 303 struct btrfs_backref_edge, list[LOWER]);
5d4f98a2
YZ
304 edges[idx++] = edge;
305 node = edge->node[UPPER];
306 }
3fd0a558 307 BUG_ON(node->detached);
5d4f98a2
YZ
308 *index = idx;
309 return node;
310}
311
312/*
313 * walk down backref nodes to find start of next reference path
314 */
a26195a5
QW
315static struct btrfs_backref_node *walk_down_backref(
316 struct btrfs_backref_edge *edges[], int *index)
5d4f98a2 317{
a26195a5
QW
318 struct btrfs_backref_edge *edge;
319 struct btrfs_backref_node *lower;
5d4f98a2
YZ
320 int idx = *index;
321
322 while (idx > 0) {
323 edge = edges[idx - 1];
324 lower = edge->node[LOWER];
325 if (list_is_last(&edge->list[LOWER], &lower->upper)) {
326 idx--;
327 continue;
328 }
329 edge = list_entry(edge->list[LOWER].next,
a26195a5 330 struct btrfs_backref_edge, list[LOWER]);
5d4f98a2
YZ
331 edges[idx - 1] = edge;
332 *index = idx;
333 return edge->node[UPPER];
334 }
335 *index = 0;
336 return NULL;
337}
338
a26195a5 339static void unlock_node_buffer(struct btrfs_backref_node *node)
3fd0a558
YZ
340{
341 if (node->locked) {
342 btrfs_tree_unlock(node->eb);
343 node->locked = 0;
344 }
345}
346
a26195a5 347static void drop_node_buffer(struct btrfs_backref_node *node)
5d4f98a2
YZ
348{
349 if (node->eb) {
3fd0a558 350 unlock_node_buffer(node);
5d4f98a2
YZ
351 free_extent_buffer(node->eb);
352 node->eb = NULL;
353 }
354}
355
a26195a5
QW
356static void drop_backref_node(struct btrfs_backref_cache *tree,
357 struct btrfs_backref_node *node)
5d4f98a2 358{
5d4f98a2
YZ
359 BUG_ON(!list_empty(&node->upper));
360
361 drop_node_buffer(node);
3fd0a558 362 list_del(&node->list);
5d4f98a2 363 list_del(&node->lower);
3fd0a558
YZ
364 if (!RB_EMPTY_NODE(&node->rb_node))
365 rb_erase(&node->rb_node, &tree->rb_root);
366 free_backref_node(tree, node);
5d4f98a2
YZ
367}
368
369/*
370 * remove a backref node from the backref cache
371 */
a26195a5
QW
372static void remove_backref_node(struct btrfs_backref_cache *cache,
373 struct btrfs_backref_node *node)
5d4f98a2 374{
a26195a5
QW
375 struct btrfs_backref_node *upper;
376 struct btrfs_backref_edge *edge;
5d4f98a2
YZ
377
378 if (!node)
379 return;
380
3fd0a558 381 BUG_ON(!node->lowest && !node->detached);
5d4f98a2 382 while (!list_empty(&node->upper)) {
a26195a5 383 edge = list_entry(node->upper.next, struct btrfs_backref_edge,
5d4f98a2
YZ
384 list[LOWER]);
385 upper = edge->node[UPPER];
386 list_del(&edge->list[LOWER]);
387 list_del(&edge->list[UPPER]);
3fd0a558
YZ
388 free_backref_edge(cache, edge);
389
390 if (RB_EMPTY_NODE(&upper->rb_node)) {
391 BUG_ON(!list_empty(&node->upper));
392 drop_backref_node(cache, node);
393 node = upper;
394 node->lowest = 1;
395 continue;
396 }
5d4f98a2 397 /*
3fd0a558 398 * add the node to leaf node list if no other
5d4f98a2
YZ
399 * child block cached.
400 */
401 if (list_empty(&upper->lower)) {
3fd0a558 402 list_add_tail(&upper->lower, &cache->leaves);
5d4f98a2
YZ
403 upper->lowest = 1;
404 }
405 }
3fd0a558 406
5d4f98a2
YZ
407 drop_backref_node(cache, node);
408}
409
a26195a5
QW
410static void update_backref_node(struct btrfs_backref_cache *cache,
411 struct btrfs_backref_node *node, u64 bytenr)
3fd0a558
YZ
412{
413 struct rb_node *rb_node;
414 rb_erase(&node->rb_node, &cache->rb_root);
415 node->bytenr = bytenr;
e9a28dc5 416 rb_node = rb_simple_insert(&cache->rb_root, node->bytenr, &node->rb_node);
43c04fb1
JM
417 if (rb_node)
418 backref_tree_panic(rb_node, -EEXIST, bytenr);
3fd0a558
YZ
419}
420
421/*
422 * update backref cache after a transaction commit
423 */
424static int update_backref_cache(struct btrfs_trans_handle *trans,
a26195a5 425 struct btrfs_backref_cache *cache)
3fd0a558 426{
a26195a5 427 struct btrfs_backref_node *node;
3fd0a558
YZ
428 int level = 0;
429
430 if (cache->last_trans == 0) {
431 cache->last_trans = trans->transid;
432 return 0;
433 }
434
435 if (cache->last_trans == trans->transid)
436 return 0;
437
438 /*
439 * detached nodes are used to avoid unnecessary backref
440 * lookup. transaction commit changes the extent tree.
441 * so the detached nodes are no longer useful.
442 */
443 while (!list_empty(&cache->detached)) {
444 node = list_entry(cache->detached.next,
a26195a5 445 struct btrfs_backref_node, list);
3fd0a558
YZ
446 remove_backref_node(cache, node);
447 }
448
449 while (!list_empty(&cache->changed)) {
450 node = list_entry(cache->changed.next,
a26195a5 451 struct btrfs_backref_node, list);
3fd0a558
YZ
452 list_del_init(&node->list);
453 BUG_ON(node->pending);
454 update_backref_node(cache, node, node->new_bytenr);
455 }
456
457 /*
458 * some nodes can be left in the pending list if there were
459 * errors during processing the pending nodes.
460 */
461 for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
462 list_for_each_entry(node, &cache->pending[level], list) {
463 BUG_ON(!node->pending);
464 if (node->bytenr == node->new_bytenr)
465 continue;
466 update_backref_node(cache, node, node->new_bytenr);
467 }
468 }
469
470 cache->last_trans = 0;
471 return 1;
472}
473
6282675e
QW
474static bool reloc_root_is_dead(struct btrfs_root *root)
475{
476 /*
477 * Pair with set_bit/clear_bit in clean_dirty_subvols and
478 * btrfs_update_reloc_root. We need to see the updated bit before
479 * trying to access reloc_root
480 */
481 smp_rmb();
482 if (test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state))
483 return true;
484 return false;
485}
486
487/*
488 * Check if this subvolume tree has valid reloc tree.
489 *
490 * Reloc tree after swap is considered dead, thus not considered as valid.
491 * This is enough for most callers, as they don't distinguish dead reloc root
492 * from no reloc root. But should_ignore_root() below is a special case.
493 */
494static bool have_reloc_root(struct btrfs_root *root)
495{
496 if (reloc_root_is_dead(root))
497 return false;
498 if (!root->reloc_root)
499 return false;
500 return true;
501}
f2a97a9d 502
3fd0a558
YZ
503static int should_ignore_root(struct btrfs_root *root)
504{
505 struct btrfs_root *reloc_root;
506
27cdeb70 507 if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
3fd0a558
YZ
508 return 0;
509
6282675e
QW
510 /* This root has been merged with its reloc tree, we can ignore it */
511 if (reloc_root_is_dead(root))
512 return 1;
513
3fd0a558
YZ
514 reloc_root = root->reloc_root;
515 if (!reloc_root)
516 return 0;
517
4d4225fc
JB
518 if (btrfs_header_generation(reloc_root->commit_root) ==
519 root->fs_info->running_transaction->transid)
3fd0a558
YZ
520 return 0;
521 /*
522 * if there is reloc tree and it was created in previous
523 * transaction backref lookup can find the reloc tree,
524 * so backref node for the fs tree root is useless for
525 * relocation.
526 */
527 return 1;
528}
5d4f98a2
YZ
529/*
530 * find reloc tree by address of tree root
531 */
2433bea5 532struct btrfs_root *find_reloc_root(struct btrfs_fs_info *fs_info, u64 bytenr)
5d4f98a2 533{
2433bea5 534 struct reloc_control *rc = fs_info->reloc_ctl;
5d4f98a2
YZ
535 struct rb_node *rb_node;
536 struct mapping_node *node;
537 struct btrfs_root *root = NULL;
538
2433bea5 539 ASSERT(rc);
5d4f98a2 540 spin_lock(&rc->reloc_root_tree.lock);
e9a28dc5 541 rb_node = rb_simple_search(&rc->reloc_root_tree.rb_root, bytenr);
5d4f98a2
YZ
542 if (rb_node) {
543 node = rb_entry(rb_node, struct mapping_node, rb_node);
544 root = (struct btrfs_root *)node->data;
545 }
546 spin_unlock(&rc->reloc_root_tree.lock);
00246528 547 return btrfs_grab_root(root);
5d4f98a2
YZ
548}
549
5d4f98a2
YZ
550static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info,
551 u64 root_objectid)
552{
553 struct btrfs_key key;
554
555 key.objectid = root_objectid;
556 key.type = BTRFS_ROOT_ITEM_KEY;
e1922118 557 key.offset = (u64)-1;
5d4f98a2 558
bc44d7c4 559 return btrfs_get_fs_root(fs_info, &key, false);
5d4f98a2
YZ
560}
561
4007ea87
QW
562/*
563 * Handle direct tree backref
564 *
565 * Direct tree backref means, the backref item shows its parent bytenr
566 * directly. This is for SHARED_BLOCK_REF backref (keyed or inlined).
567 *
568 * @ref_key: The converted backref key.
569 * For keyed backref, it's the item key.
570 * For inlined backref, objectid is the bytenr,
571 * type is btrfs_inline_ref_type, offset is
572 * btrfs_inline_ref_offset.
573 */
a26195a5 574static int handle_direct_tree_backref(struct btrfs_backref_cache *cache,
4007ea87 575 struct btrfs_key *ref_key,
a26195a5 576 struct btrfs_backref_node *cur)
4007ea87 577{
a26195a5
QW
578 struct btrfs_backref_edge *edge;
579 struct btrfs_backref_node *upper;
4007ea87
QW
580 struct rb_node *rb_node;
581
582 ASSERT(ref_key->type == BTRFS_SHARED_BLOCK_REF_KEY);
583
584 /* Only reloc root uses backref pointing to itself */
585 if (ref_key->objectid == ref_key->offset) {
586 struct btrfs_root *root;
587
588 cur->is_reloc_root = 1;
589 /* Only reloc backref cache cares about a specific root */
590 if (cache->is_reloc) {
591 root = find_reloc_root(cache->fs_info, cur->bytenr);
592 if (WARN_ON(!root))
593 return -ENOENT;
594 cur->root = root;
595 } else {
596 /*
597 * For generic purpose backref cache, reloc root node
598 * is useless.
599 */
600 list_add(&cur->list, &cache->useless_node);
601 }
602 return 0;
603 }
604
605 edge = alloc_backref_edge(cache);
606 if (!edge)
607 return -ENOMEM;
608
e9a28dc5 609 rb_node = rb_simple_search(&cache->rb_root, ref_key->offset);
4007ea87
QW
610 if (!rb_node) {
611 /* Parent node not yet cached */
0304f2d8
QW
612 upper = alloc_backref_node(cache, ref_key->offset,
613 cur->level + 1);
4007ea87
QW
614 if (!upper) {
615 free_backref_edge(cache, edge);
616 return -ENOMEM;
617 }
4007ea87
QW
618
619 /*
620 * Backrefs for the upper level block isn't cached, add the
621 * block to pending list
622 */
623 list_add_tail(&edge->list[UPPER], &cache->pending_edge);
624 } else {
625 /* Parent node already cached */
a26195a5 626 upper = rb_entry(rb_node, struct btrfs_backref_node, rb_node);
4007ea87
QW
627 ASSERT(upper->checked);
628 INIT_LIST_HEAD(&edge->list[UPPER]);
629 }
2a979612 630 link_backref_edge(edge, cur, upper, LINK_LOWER);
4007ea87
QW
631 return 0;
632}
633
4d81ea8b
QW
634/*
635 * Handle indirect tree backref
636 *
637 * Indirect tree backref means, we only know which tree the node belongs to.
638 * We still need to do a tree search to find out the parents. This is for
639 * TREE_BLOCK_REF backref (keyed or inlined).
640 *
641 * @ref_key: The same as @ref_key in handle_direct_tree_backref()
642 * @tree_key: The first key of this tree block.
643 * @path: A clean (released) path, to avoid allocating path everytime
644 * the function get called.
645 */
a26195a5 646static int handle_indirect_tree_backref(struct btrfs_backref_cache *cache,
4d81ea8b
QW
647 struct btrfs_path *path,
648 struct btrfs_key *ref_key,
649 struct btrfs_key *tree_key,
a26195a5 650 struct btrfs_backref_node *cur)
4d81ea8b
QW
651{
652 struct btrfs_fs_info *fs_info = cache->fs_info;
a26195a5
QW
653 struct btrfs_backref_node *upper;
654 struct btrfs_backref_node *lower;
655 struct btrfs_backref_edge *edge;
4d81ea8b
QW
656 struct extent_buffer *eb;
657 struct btrfs_root *root;
658 struct rb_node *rb_node;
659 int level;
660 bool need_check = true;
661 int ret;
662
663 root = read_fs_root(fs_info, ref_key->offset);
664 if (IS_ERR(root))
665 return PTR_ERR(root);
666 if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
667 cur->cowonly = 1;
668
669 if (btrfs_root_level(&root->root_item) == cur->level) {
670 /* Tree root */
671 ASSERT(btrfs_root_bytenr(&root->root_item) == cur->bytenr);
672 if (should_ignore_root(root)) {
673 btrfs_put_root(root);
674 list_add(&cur->list, &cache->useless_node);
675 } else {
676 cur->root = root;
677 }
678 return 0;
679 }
680
681 level = cur->level + 1;
682
683 /* Search the tree to find parent blocks referring to the block */
684 path->search_commit_root = 1;
685 path->skip_locking = 1;
686 path->lowest_level = level;
687 ret = btrfs_search_slot(NULL, root, tree_key, path, 0, 0);
688 path->lowest_level = 0;
689 if (ret < 0) {
690 btrfs_put_root(root);
691 return ret;
692 }
693 if (ret > 0 && path->slots[level] > 0)
694 path->slots[level]--;
695
696 eb = path->nodes[level];
697 if (btrfs_node_blockptr(eb, path->slots[level]) != cur->bytenr) {
698 btrfs_err(fs_info,
699"couldn't find block (%llu) (level %d) in tree (%llu) with key (%llu %u %llu)",
700 cur->bytenr, level - 1, root->root_key.objectid,
701 tree_key->objectid, tree_key->type, tree_key->offset);
702 btrfs_put_root(root);
703 ret = -ENOENT;
704 goto out;
705 }
706 lower = cur;
707
708 /* Add all nodes and edges in the path */
709 for (; level < BTRFS_MAX_LEVEL; level++) {
710 if (!path->nodes[level]) {
711 ASSERT(btrfs_root_bytenr(&root->root_item) ==
712 lower->bytenr);
713 if (should_ignore_root(root)) {
714 btrfs_put_root(root);
715 list_add(&lower->list, &cache->useless_node);
716 } else {
717 lower->root = root;
718 }
719 break;
720 }
721
722 edge = alloc_backref_edge(cache);
723 if (!edge) {
724 btrfs_put_root(root);
725 ret = -ENOMEM;
726 goto out;
727 }
728
729 eb = path->nodes[level];
e9a28dc5 730 rb_node = rb_simple_search(&cache->rb_root, eb->start);
4d81ea8b 731 if (!rb_node) {
0304f2d8
QW
732 upper = alloc_backref_node(cache, eb->start,
733 lower->level + 1);
4d81ea8b
QW
734 if (!upper) {
735 btrfs_put_root(root);
736 free_backref_edge(cache, edge);
737 ret = -ENOMEM;
738 goto out;
739 }
4d81ea8b 740 upper->owner = btrfs_header_owner(eb);
4d81ea8b
QW
741 if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
742 upper->cowonly = 1;
743
744 /*
745 * If we know the block isn't shared we can avoid
746 * checking its backrefs.
747 */
748 if (btrfs_block_can_be_shared(root, eb))
749 upper->checked = 0;
750 else
751 upper->checked = 1;
752
753 /*
754 * Add the block to pending list if we need to check its
755 * backrefs, we only do this once while walking up a
756 * tree as we will catch anything else later on.
757 */
758 if (!upper->checked && need_check) {
759 need_check = false;
760 list_add_tail(&edge->list[UPPER],
761 &cache->pending_edge);
762 } else {
763 if (upper->checked)
764 need_check = true;
765 INIT_LIST_HEAD(&edge->list[UPPER]);
766 }
767 } else {
a26195a5
QW
768 upper = rb_entry(rb_node, struct btrfs_backref_node,
769 rb_node);
4d81ea8b
QW
770 ASSERT(upper->checked);
771 INIT_LIST_HEAD(&edge->list[UPPER]);
772 if (!upper->owner)
773 upper->owner = btrfs_header_owner(eb);
774 }
2a979612 775 link_backref_edge(edge, lower, upper, LINK_LOWER);
4d81ea8b
QW
776
777 if (rb_node) {
778 btrfs_put_root(root);
779 break;
780 }
781 lower = upper;
782 upper = NULL;
783 }
784out:
785 btrfs_release_path(path);
786 return ret;
787}
788
a26195a5 789static int handle_one_tree_block(struct btrfs_backref_cache *cache,
e7d571c7
QW
790 struct btrfs_path *path,
791 struct btrfs_backref_iter *iter,
792 struct btrfs_key *node_key,
a26195a5 793 struct btrfs_backref_node *cur)
5d4f98a2 794{
e7d571c7 795 struct btrfs_fs_info *fs_info = cache->fs_info;
a26195a5
QW
796 struct btrfs_backref_edge *edge;
797 struct btrfs_backref_node *exist;
5d4f98a2 798 int ret;
5d4f98a2 799
71f572a9 800 ret = btrfs_backref_iter_start(iter, cur->bytenr);
e7d571c7
QW
801 if (ret < 0)
802 return ret;
71f572a9
QW
803 /*
804 * We skip the first btrfs_tree_block_info, as we don't use the key
805 * stored in it, but fetch it from the tree block
806 */
807 if (btrfs_backref_has_tree_block_info(iter)) {
808 ret = btrfs_backref_iter_next(iter);
e7d571c7 809 if (ret < 0)
71f572a9 810 goto out;
71f572a9
QW
811 /* No extra backref? This means the tree block is corrupted */
812 if (ret > 0) {
e7d571c7 813 ret = -EUCLEAN;
71f572a9
QW
814 goto out;
815 }
816 }
5d4f98a2
YZ
817 WARN_ON(cur->checked);
818 if (!list_empty(&cur->upper)) {
819 /*
70f23fd6 820 * the backref was added previously when processing
5d4f98a2
YZ
821 * backref of type BTRFS_TREE_BLOCK_REF_KEY
822 */
75bfb9af 823 ASSERT(list_is_singular(&cur->upper));
a26195a5 824 edge = list_entry(cur->upper.next, struct btrfs_backref_edge,
5d4f98a2 825 list[LOWER]);
75bfb9af 826 ASSERT(list_empty(&edge->list[UPPER]));
5d4f98a2
YZ
827 exist = edge->node[UPPER];
828 /*
829 * add the upper level block to pending list if we need
830 * check its backrefs
831 */
832 if (!exist->checked)
84780289 833 list_add_tail(&edge->list[UPPER], &cache->pending_edge);
5d4f98a2
YZ
834 } else {
835 exist = NULL;
836 }
837
71f572a9
QW
838 for (; ret == 0; ret = btrfs_backref_iter_next(iter)) {
839 struct extent_buffer *eb;
840 struct btrfs_key key;
841 int type;
5d4f98a2 842
71f572a9
QW
843 cond_resched();
844 eb = btrfs_backref_get_eb(iter);
5d4f98a2 845
71f572a9
QW
846 key.objectid = iter->bytenr;
847 if (btrfs_backref_iter_is_inline_ref(iter)) {
848 struct btrfs_extent_inline_ref *iref;
5d4f98a2 849
5d4f98a2 850 /* update key for inline back ref */
71f572a9
QW
851 iref = (struct btrfs_extent_inline_ref *)
852 ((unsigned long)iter->cur_ptr);
3de28d57
LB
853 type = btrfs_get_extent_inline_ref_type(eb, iref,
854 BTRFS_REF_TYPE_BLOCK);
855 if (type == BTRFS_REF_TYPE_INVALID) {
e7d571c7 856 ret = -EUCLEAN;
3de28d57
LB
857 goto out;
858 }
859 key.type = type;
5d4f98a2 860 key.offset = btrfs_extent_inline_ref_offset(eb, iref);
71f572a9
QW
861 } else {
862 key.type = iter->cur_key.type;
863 key.offset = iter->cur_key.offset;
5d4f98a2
YZ
864 }
865
fa6ac715
QW
866 /*
867 * Parent node found and matches current inline ref, no need to
868 * rebuild this node for this inline ref.
869 */
5d4f98a2
YZ
870 if (exist &&
871 ((key.type == BTRFS_TREE_BLOCK_REF_KEY &&
872 exist->owner == key.offset) ||
873 (key.type == BTRFS_SHARED_BLOCK_REF_KEY &&
874 exist->bytenr == key.offset))) {
875 exist = NULL;
71f572a9 876 continue;
5d4f98a2
YZ
877 }
878
fa6ac715 879 /* SHARED_BLOCK_REF means key.offset is the parent bytenr */
5d4f98a2 880 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
4007ea87 881 ret = handle_direct_tree_backref(cache, &key, cur);
e7d571c7 882 if (ret < 0)
5d4f98a2 883 goto out;
71f572a9 884 continue;
6d8ff4e4 885 } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
e7d571c7
QW
886 ret = -EINVAL;
887 btrfs_print_v0_err(fs_info);
888 btrfs_handle_fs_error(fs_info, ret, NULL);
ba3c2b19 889 goto out;
5d4f98a2 890 } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) {
71f572a9 891 continue;
5d4f98a2
YZ
892 }
893
fa6ac715
QW
894 /*
895 * key.type == BTRFS_TREE_BLOCK_REF_KEY, inline ref offset
896 * means the root objectid. We need to search the tree to get
897 * its parent bytenr.
898 */
4d81ea8b
QW
899 ret = handle_indirect_tree_backref(cache, path, &key, node_key,
900 cur);
e7d571c7 901 if (ret < 0)
5d4f98a2 902 goto out;
5d4f98a2 903 }
71f572a9 904 ret = 0;
5d4f98a2
YZ
905 cur->checked = 1;
906 WARN_ON(exist);
e7d571c7
QW
907out:
908 btrfs_backref_iter_release(iter);
909 return ret;
910}
911
1f872924
QW
912/*
913 * In handle_one_tree_backref(), we have only linked the lower node to the edge,
914 * but the upper node hasn't been linked to the edge.
a26195a5
QW
915 * This means we can only iterate through btrfs_backref_node::upper to reach
916 * parent edges, but not through btrfs_backref_node::lower to reach children
917 * edges.
1f872924 918 *
a26195a5
QW
919 * This function will finish the btrfs_backref_node::lower to related edges,
920 * so that backref cache can be bi-directionally iterated.
1f872924
QW
921 *
922 * Also, this will add the nodes to backref cache for the next run.
923 */
a26195a5
QW
924static int finish_upper_links(struct btrfs_backref_cache *cache,
925 struct btrfs_backref_node *start)
1f872924
QW
926{
927 struct list_head *useless_node = &cache->useless_node;
a26195a5 928 struct btrfs_backref_edge *edge;
1f872924
QW
929 struct rb_node *rb_node;
930 LIST_HEAD(pending_edge);
931
932 ASSERT(start->checked);
933
934 /* Insert this node to cache if it's not COW-only */
935 if (!start->cowonly) {
e9a28dc5
QW
936 rb_node = rb_simple_insert(&cache->rb_root, start->bytenr,
937 &start->rb_node);
1f872924
QW
938 if (rb_node)
939 backref_tree_panic(rb_node, -EEXIST, start->bytenr);
940 list_add_tail(&start->lower, &cache->leaves);
941 }
942
943 /*
944 * Use breadth first search to iterate all related edges.
945 *
946 * The starting points are all the edges of this node
947 */
948 list_for_each_entry(edge, &start->upper, list[LOWER])
949 list_add_tail(&edge->list[UPPER], &pending_edge);
950
951 while (!list_empty(&pending_edge)) {
a26195a5
QW
952 struct btrfs_backref_node *upper;
953 struct btrfs_backref_node *lower;
1f872924
QW
954 struct rb_node *rb_node;
955
a26195a5
QW
956 edge = list_first_entry(&pending_edge,
957 struct btrfs_backref_edge, list[UPPER]);
1f872924
QW
958 list_del_init(&edge->list[UPPER]);
959 upper = edge->node[UPPER];
960 lower = edge->node[LOWER];
961
962 /* Parent is detached, no need to keep any edges */
963 if (upper->detached) {
964 list_del(&edge->list[LOWER]);
965 free_backref_edge(cache, edge);
966
967 /* Lower node is orphan, queue for cleanup */
968 if (list_empty(&lower->upper))
969 list_add(&lower->list, useless_node);
970 continue;
971 }
972
973 /*
974 * All new nodes added in current build_backref_tree() haven't
975 * been linked to the cache rb tree.
976 * So if we have upper->rb_node populated, this means a cache
977 * hit. We only need to link the edge, as @upper and all its
978 * parent have already been linked.
979 */
980 if (!RB_EMPTY_NODE(&upper->rb_node)) {
981 if (upper->lowest) {
982 list_del_init(&upper->lower);
983 upper->lowest = 0;
984 }
985
986 list_add_tail(&edge->list[UPPER], &upper->lower);
987 continue;
988 }
989
990 /* Sanity check, we shouldn't have any unchecked nodes */
991 if (!upper->checked) {
992 ASSERT(0);
993 return -EUCLEAN;
994 }
995
996 /* Sanity check, COW-only node has non-COW-only parent */
997 if (start->cowonly != upper->cowonly) {
998 ASSERT(0);
999 return -EUCLEAN;
1000 }
1001
1002 /* Only cache non-COW-only (subvolume trees) tree blocks */
1003 if (!upper->cowonly) {
e9a28dc5
QW
1004 rb_node = rb_simple_insert(&cache->rb_root, upper->bytenr,
1005 &upper->rb_node);
1f872924
QW
1006 if (rb_node) {
1007 backref_tree_panic(rb_node, -EEXIST,
1008 upper->bytenr);
1009 return -EUCLEAN;
1010 }
1011 }
1012
1013 list_add_tail(&edge->list[UPPER], &upper->lower);
1014
1015 /*
1016 * Also queue all the parent edges of this uncached node to
1017 * finish the upper linkage
1018 */
1019 list_for_each_entry(edge, &upper->upper, list[LOWER])
1020 list_add_tail(&edge->list[UPPER], &pending_edge);
1021 }
1022 return 0;
1023}
1024
29db137b
QW
1025/*
1026 * For useless nodes, do two major clean ups:
1027 *
1028 * - Cleanup the children edges and nodes
1029 * If child node is also orphan (no parent) during cleanup, then the child
1030 * node will also be cleaned up.
1031 *
1032 * - Freeing up leaves (level 0), keeps nodes detached
1033 * For nodes, the node is still cached as "detached"
1034 *
1035 * Return false if @node is not in the @useless_nodes list.
1036 * Return true if @node is in the @useless_nodes list.
1037 */
1038static bool handle_useless_nodes(struct reloc_control *rc,
a26195a5 1039 struct btrfs_backref_node *node)
29db137b 1040{
a26195a5 1041 struct btrfs_backref_cache *cache = &rc->backref_cache;
29db137b
QW
1042 struct list_head *useless_node = &cache->useless_node;
1043 bool ret = false;
1044
1045 while (!list_empty(useless_node)) {
a26195a5 1046 struct btrfs_backref_node *cur;
29db137b 1047
a26195a5 1048 cur = list_first_entry(useless_node, struct btrfs_backref_node,
29db137b
QW
1049 list);
1050 list_del_init(&cur->list);
1051
1052 /* Only tree root nodes can be added to @useless_nodes */
1053 ASSERT(list_empty(&cur->upper));
1054
1055 if (cur == node)
1056 ret = true;
1057
1058 /* The node is the lowest node */
1059 if (cur->lowest) {
1060 list_del_init(&cur->lower);
1061 cur->lowest = 0;
1062 }
1063
1064 /* Cleanup the lower edges */
1065 while (!list_empty(&cur->lower)) {
a26195a5
QW
1066 struct btrfs_backref_edge *edge;
1067 struct btrfs_backref_node *lower;
29db137b
QW
1068
1069 edge = list_entry(cur->lower.next,
a26195a5 1070 struct btrfs_backref_edge, list[UPPER]);
29db137b
QW
1071 list_del(&edge->list[UPPER]);
1072 list_del(&edge->list[LOWER]);
1073 lower = edge->node[LOWER];
1074 free_backref_edge(cache, edge);
1075
1076 /* Child node is also orphan, queue for cleanup */
1077 if (list_empty(&lower->upper))
1078 list_add(&lower->list, useless_node);
1079 }
1080 /* Mark this block processed for relocation */
1081 mark_block_processed(rc, cur);
1082
1083 /*
1084 * Backref nodes for tree leaves are deleted from the cache.
1085 * Backref nodes for upper level tree blocks are left in the
1086 * cache to avoid unnecessary backref lookup.
1087 */
1088 if (cur->level > 0) {
1089 list_add(&cur->list, &cache->detached);
1090 cur->detached = 1;
1091 } else {
1092 rb_erase(&cur->rb_node, &cache->rb_root);
1093 free_backref_node(cache, cur);
1094 }
1095 }
1096 return ret;
1097}
1098
e7d571c7
QW
1099/*
1100 * Build backref tree for a given tree block. Root of the backref tree
1101 * corresponds the tree block, leaves of the backref tree correspond roots of
1102 * b-trees that reference the tree block.
1103 *
1104 * The basic idea of this function is check backrefs of a given block to find
1105 * upper level blocks that reference the block, and then check backrefs of
1106 * these upper level blocks recursively. The recursion stops when tree root is
1107 * reached or backrefs for the block is cached.
1108 *
1109 * NOTE: if we find that backrefs for a block are cached, we know backrefs for
1110 * all upper level blocks that directly/indirectly reference the block are also
1111 * cached.
1112 */
a26195a5 1113static noinline_for_stack struct btrfs_backref_node *build_backref_tree(
e7d571c7
QW
1114 struct reloc_control *rc, struct btrfs_key *node_key,
1115 int level, u64 bytenr)
1116{
1117 struct btrfs_backref_iter *iter;
a26195a5 1118 struct btrfs_backref_cache *cache = &rc->backref_cache;
e7d571c7
QW
1119 /* For searching parent of TREE_BLOCK_REF */
1120 struct btrfs_path *path;
a26195a5
QW
1121 struct btrfs_backref_node *cur;
1122 struct btrfs_backref_node *upper;
1123 struct btrfs_backref_node *lower;
1124 struct btrfs_backref_node *node = NULL;
1125 struct btrfs_backref_edge *edge;
e7d571c7
QW
1126 int ret;
1127 int err = 0;
5d4f98a2 1128
e7d571c7
QW
1129 iter = btrfs_backref_iter_alloc(rc->extent_root->fs_info, GFP_NOFS);
1130 if (!iter)
1131 return ERR_PTR(-ENOMEM);
1132 path = btrfs_alloc_path();
1133 if (!path) {
1134 err = -ENOMEM;
1135 goto out;
1136 }
1137
1138 node = alloc_backref_node(cache, bytenr, level);
1139 if (!node) {
1140 err = -ENOMEM;
1141 goto out;
5d4f98a2
YZ
1142 }
1143
e7d571c7
QW
1144 node->lowest = 1;
1145 cur = node;
1146
1147 /* Breadth-first search to build backref cache */
1148 do {
1149 ret = handle_one_tree_block(cache, path, iter, node_key, cur);
1150 if (ret < 0) {
1151 err = ret;
1152 goto out;
1153 }
1154 edge = list_first_entry_or_null(&cache->pending_edge,
a26195a5 1155 struct btrfs_backref_edge, list[UPPER]);
e7d571c7
QW
1156 /*
1157 * The pending list isn't empty, take the first block to
1158 * process
1159 */
1160 if (edge) {
1161 list_del_init(&edge->list[UPPER]);
1162 cur = edge->node[UPPER];
1163 }
1164 } while (edge);
1165
1f872924
QW
1166 /* Finish the upper linkage of newly added edges/nodes */
1167 ret = finish_upper_links(cache, node);
1168 if (ret < 0) {
1169 err = ret;
1170 goto out;
3fd0a558 1171 }
5d4f98a2 1172
29db137b
QW
1173 if (handle_useless_nodes(rc, node))
1174 node = NULL;
5d4f98a2 1175out:
71f572a9
QW
1176 btrfs_backref_iter_free(iter);
1177 btrfs_free_path(path);
5d4f98a2 1178 if (err) {
84780289
QW
1179 while (!list_empty(&cache->useless_node)) {
1180 lower = list_first_entry(&cache->useless_node,
a26195a5 1181 struct btrfs_backref_node, list);
75bfb9af 1182 list_del_init(&lower->list);
3fd0a558 1183 }
84780289
QW
1184 while (!list_empty(&cache->pending_edge)) {
1185 edge = list_first_entry(&cache->pending_edge,
a26195a5 1186 struct btrfs_backref_edge, list[UPPER]);
75bfb9af 1187 list_del(&edge->list[UPPER]);
3fd0a558 1188 list_del(&edge->list[LOWER]);
75bfb9af 1189 lower = edge->node[LOWER];
5d4f98a2 1190 upper = edge->node[UPPER];
3fd0a558 1191 free_backref_edge(cache, edge);
75bfb9af
JB
1192
1193 /*
1194 * Lower is no longer linked to any upper backref nodes
1195 * and isn't in the cache, we can free it ourselves.
1196 */
1197 if (list_empty(&lower->upper) &&
1198 RB_EMPTY_NODE(&lower->rb_node))
84780289 1199 list_add(&lower->list, &cache->useless_node);
75bfb9af
JB
1200
1201 if (!RB_EMPTY_NODE(&upper->rb_node))
1202 continue;
1203
01327610 1204 /* Add this guy's upper edges to the list to process */
75bfb9af 1205 list_for_each_entry(edge, &upper->upper, list[LOWER])
84780289
QW
1206 list_add_tail(&edge->list[UPPER],
1207 &cache->pending_edge);
75bfb9af 1208 if (list_empty(&upper->upper))
84780289 1209 list_add(&upper->list, &cache->useless_node);
75bfb9af
JB
1210 }
1211
84780289
QW
1212 while (!list_empty(&cache->useless_node)) {
1213 lower = list_first_entry(&cache->useless_node,
a26195a5 1214 struct btrfs_backref_node, list);
75bfb9af 1215 list_del_init(&lower->list);
0fd8c3da
LB
1216 if (lower == node)
1217 node = NULL;
75bfb9af 1218 free_backref_node(cache, lower);
5d4f98a2 1219 }
0fd8c3da 1220
8e19c973 1221 remove_backref_node(cache, node);
84780289
QW
1222 ASSERT(list_empty(&cache->useless_node) &&
1223 list_empty(&cache->pending_edge));
5d4f98a2
YZ
1224 return ERR_PTR(err);
1225 }
75bfb9af 1226 ASSERT(!node || !node->detached);
84780289
QW
1227 ASSERT(list_empty(&cache->useless_node) &&
1228 list_empty(&cache->pending_edge));
5d4f98a2
YZ
1229 return node;
1230}
1231
3fd0a558
YZ
1232/*
1233 * helper to add backref node for the newly created snapshot.
1234 * the backref node is created by cloning backref node that
1235 * corresponds to root of source tree
1236 */
1237static int clone_backref_node(struct btrfs_trans_handle *trans,
1238 struct reloc_control *rc,
1239 struct btrfs_root *src,
1240 struct btrfs_root *dest)
1241{
1242 struct btrfs_root *reloc_root = src->reloc_root;
a26195a5
QW
1243 struct btrfs_backref_cache *cache = &rc->backref_cache;
1244 struct btrfs_backref_node *node = NULL;
1245 struct btrfs_backref_node *new_node;
1246 struct btrfs_backref_edge *edge;
1247 struct btrfs_backref_edge *new_edge;
3fd0a558
YZ
1248 struct rb_node *rb_node;
1249
1250 if (cache->last_trans > 0)
1251 update_backref_cache(trans, cache);
1252
e9a28dc5 1253 rb_node = rb_simple_search(&cache->rb_root, src->commit_root->start);
3fd0a558 1254 if (rb_node) {
a26195a5 1255 node = rb_entry(rb_node, struct btrfs_backref_node, rb_node);
3fd0a558
YZ
1256 if (node->detached)
1257 node = NULL;
1258 else
1259 BUG_ON(node->new_bytenr != reloc_root->node->start);
1260 }
1261
1262 if (!node) {
e9a28dc5
QW
1263 rb_node = rb_simple_search(&cache->rb_root,
1264 reloc_root->commit_root->start);
3fd0a558 1265 if (rb_node) {
a26195a5 1266 node = rb_entry(rb_node, struct btrfs_backref_node,
3fd0a558
YZ
1267 rb_node);
1268 BUG_ON(node->detached);
1269 }
1270 }
1271
1272 if (!node)
1273 return 0;
1274
0304f2d8 1275 new_node = alloc_backref_node(cache, dest->node->start, node->level);
3fd0a558
YZ
1276 if (!new_node)
1277 return -ENOMEM;
1278
3fd0a558 1279 new_node->lowest = node->lowest;
6848ad64 1280 new_node->checked = 1;
00246528 1281 new_node->root = btrfs_grab_root(dest);
0b530bc5 1282 ASSERT(new_node->root);
3fd0a558
YZ
1283
1284 if (!node->lowest) {
1285 list_for_each_entry(edge, &node->lower, list[UPPER]) {
1286 new_edge = alloc_backref_edge(cache);
1287 if (!new_edge)
1288 goto fail;
1289
2a979612
QW
1290 link_backref_edge(new_edge, edge->node[LOWER], new_node,
1291 LINK_UPPER);
3fd0a558 1292 }
76b9e23d
MX
1293 } else {
1294 list_add_tail(&new_node->lower, &cache->leaves);
3fd0a558
YZ
1295 }
1296
e9a28dc5
QW
1297 rb_node = rb_simple_insert(&cache->rb_root, new_node->bytenr,
1298 &new_node->rb_node);
43c04fb1
JM
1299 if (rb_node)
1300 backref_tree_panic(rb_node, -EEXIST, new_node->bytenr);
3fd0a558
YZ
1301
1302 if (!new_node->lowest) {
1303 list_for_each_entry(new_edge, &new_node->lower, list[UPPER]) {
1304 list_add_tail(&new_edge->list[LOWER],
1305 &new_edge->node[LOWER]->upper);
1306 }
1307 }
1308 return 0;
1309fail:
1310 while (!list_empty(&new_node->lower)) {
1311 new_edge = list_entry(new_node->lower.next,
a26195a5 1312 struct btrfs_backref_edge, list[UPPER]);
3fd0a558
YZ
1313 list_del(&new_edge->list[UPPER]);
1314 free_backref_edge(cache, new_edge);
1315 }
1316 free_backref_node(cache, new_node);
1317 return -ENOMEM;
1318}
1319
5d4f98a2
YZ
1320/*
1321 * helper to add 'address of tree root -> reloc tree' mapping
1322 */
ffd7b339 1323static int __must_check __add_reloc_root(struct btrfs_root *root)
5d4f98a2 1324{
0b246afa 1325 struct btrfs_fs_info *fs_info = root->fs_info;
5d4f98a2
YZ
1326 struct rb_node *rb_node;
1327 struct mapping_node *node;
0b246afa 1328 struct reloc_control *rc = fs_info->reloc_ctl;
5d4f98a2
YZ
1329
1330 node = kmalloc(sizeof(*node), GFP_NOFS);
ffd7b339
JM
1331 if (!node)
1332 return -ENOMEM;
5d4f98a2 1333
ea287ab1 1334 node->bytenr = root->commit_root->start;
5d4f98a2
YZ
1335 node->data = root;
1336
1337 spin_lock(&rc->reloc_root_tree.lock);
e9a28dc5
QW
1338 rb_node = rb_simple_insert(&rc->reloc_root_tree.rb_root,
1339 node->bytenr, &node->rb_node);
5d4f98a2 1340 spin_unlock(&rc->reloc_root_tree.lock);
ffd7b339 1341 if (rb_node) {
0b246afa 1342 btrfs_panic(fs_info, -EEXIST,
5d163e0e
JM
1343 "Duplicate root found for start=%llu while inserting into relocation tree",
1344 node->bytenr);
ffd7b339 1345 }
5d4f98a2
YZ
1346
1347 list_add_tail(&root->root_list, &rc->reloc_roots);
1348 return 0;
1349}
1350
1351/*
c974c464 1352 * helper to delete the 'address of tree root -> reloc tree'
5d4f98a2
YZ
1353 * mapping
1354 */
c974c464 1355static void __del_reloc_root(struct btrfs_root *root)
5d4f98a2 1356{
0b246afa 1357 struct btrfs_fs_info *fs_info = root->fs_info;
5d4f98a2
YZ
1358 struct rb_node *rb_node;
1359 struct mapping_node *node = NULL;
0b246afa 1360 struct reloc_control *rc = fs_info->reloc_ctl;
f44deb74 1361 bool put_ref = false;
5d4f98a2 1362
65c6e82b 1363 if (rc && root->node) {
389305b2 1364 spin_lock(&rc->reloc_root_tree.lock);
e9a28dc5
QW
1365 rb_node = rb_simple_search(&rc->reloc_root_tree.rb_root,
1366 root->commit_root->start);
389305b2
QW
1367 if (rb_node) {
1368 node = rb_entry(rb_node, struct mapping_node, rb_node);
1369 rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
ea287ab1 1370 RB_CLEAR_NODE(&node->rb_node);
389305b2
QW
1371 }
1372 spin_unlock(&rc->reloc_root_tree.lock);
1373 if (!node)
1374 return;
1375 BUG_ON((struct btrfs_root *)node->data != root);
5d4f98a2 1376 }
5d4f98a2 1377
f44deb74
JB
1378 /*
1379 * We only put the reloc root here if it's on the list. There's a lot
1380 * of places where the pattern is to splice the rc->reloc_roots, process
1381 * the reloc roots, and then add the reloc root back onto
1382 * rc->reloc_roots. If we call __del_reloc_root while it's off of the
1383 * list we don't want the reference being dropped, because the guy
1384 * messing with the list is in charge of the reference.
1385 */
0b246afa 1386 spin_lock(&fs_info->trans_lock);
f44deb74
JB
1387 if (!list_empty(&root->root_list)) {
1388 put_ref = true;
1389 list_del_init(&root->root_list);
1390 }
0b246afa 1391 spin_unlock(&fs_info->trans_lock);
f44deb74
JB
1392 if (put_ref)
1393 btrfs_put_root(root);
c974c464
WS
1394 kfree(node);
1395}
1396
1397/*
1398 * helper to update the 'address of tree root -> reloc tree'
1399 * mapping
1400 */
ea287ab1 1401static int __update_reloc_root(struct btrfs_root *root)
c974c464 1402{
0b246afa 1403 struct btrfs_fs_info *fs_info = root->fs_info;
c974c464
WS
1404 struct rb_node *rb_node;
1405 struct mapping_node *node = NULL;
0b246afa 1406 struct reloc_control *rc = fs_info->reloc_ctl;
c974c464
WS
1407
1408 spin_lock(&rc->reloc_root_tree.lock);
e9a28dc5
QW
1409 rb_node = rb_simple_search(&rc->reloc_root_tree.rb_root,
1410 root->commit_root->start);
c974c464
WS
1411 if (rb_node) {
1412 node = rb_entry(rb_node, struct mapping_node, rb_node);
1413 rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
5d4f98a2 1414 }
c974c464
WS
1415 spin_unlock(&rc->reloc_root_tree.lock);
1416
1417 if (!node)
1418 return 0;
1419 BUG_ON((struct btrfs_root *)node->data != root);
1420
1421 spin_lock(&rc->reloc_root_tree.lock);
ea287ab1 1422 node->bytenr = root->node->start;
e9a28dc5
QW
1423 rb_node = rb_simple_insert(&rc->reloc_root_tree.rb_root,
1424 node->bytenr, &node->rb_node);
c974c464
WS
1425 spin_unlock(&rc->reloc_root_tree.lock);
1426 if (rb_node)
1427 backref_tree_panic(rb_node, -EEXIST, node->bytenr);
5d4f98a2
YZ
1428 return 0;
1429}
1430
3fd0a558
YZ
1431static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans,
1432 struct btrfs_root *root, u64 objectid)
5d4f98a2 1433{
0b246afa 1434 struct btrfs_fs_info *fs_info = root->fs_info;
5d4f98a2
YZ
1435 struct btrfs_root *reloc_root;
1436 struct extent_buffer *eb;
1437 struct btrfs_root_item *root_item;
1438 struct btrfs_key root_key;
1439 int ret;
1440
5d4f98a2
YZ
1441 root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
1442 BUG_ON(!root_item);
1443
1444 root_key.objectid = BTRFS_TREE_RELOC_OBJECTID;
1445 root_key.type = BTRFS_ROOT_ITEM_KEY;
3fd0a558 1446 root_key.offset = objectid;
5d4f98a2 1447
3fd0a558 1448 if (root->root_key.objectid == objectid) {
054570a1
FM
1449 u64 commit_root_gen;
1450
3fd0a558
YZ
1451 /* called by btrfs_init_reloc_root */
1452 ret = btrfs_copy_root(trans, root, root->commit_root, &eb,
1453 BTRFS_TREE_RELOC_OBJECTID);
1454 BUG_ON(ret);
054570a1
FM
1455 /*
1456 * Set the last_snapshot field to the generation of the commit
1457 * root - like this ctree.c:btrfs_block_can_be_shared() behaves
1458 * correctly (returns true) when the relocation root is created
1459 * either inside the critical section of a transaction commit
1460 * (through transaction.c:qgroup_account_snapshot()) and when
1461 * it's created before the transaction commit is started.
1462 */
1463 commit_root_gen = btrfs_header_generation(root->commit_root);
1464 btrfs_set_root_last_snapshot(&root->root_item, commit_root_gen);
3fd0a558
YZ
1465 } else {
1466 /*
1467 * called by btrfs_reloc_post_snapshot_hook.
1468 * the source tree is a reloc tree, all tree blocks
1469 * modified after it was created have RELOC flag
1470 * set in their headers. so it's OK to not update
1471 * the 'last_snapshot'.
1472 */
1473 ret = btrfs_copy_root(trans, root, root->node, &eb,
1474 BTRFS_TREE_RELOC_OBJECTID);
1475 BUG_ON(ret);
1476 }
5d4f98a2 1477
5d4f98a2 1478 memcpy(root_item, &root->root_item, sizeof(*root_item));
5d4f98a2
YZ
1479 btrfs_set_root_bytenr(root_item, eb->start);
1480 btrfs_set_root_level(root_item, btrfs_header_level(eb));
1481 btrfs_set_root_generation(root_item, trans->transid);
3fd0a558
YZ
1482
1483 if (root->root_key.objectid == objectid) {
1484 btrfs_set_root_refs(root_item, 0);
1485 memset(&root_item->drop_progress, 0,
1486 sizeof(struct btrfs_disk_key));
1487 root_item->drop_level = 0;
1488 }
5d4f98a2
YZ
1489
1490 btrfs_tree_unlock(eb);
1491 free_extent_buffer(eb);
1492
0b246afa 1493 ret = btrfs_insert_root(trans, fs_info->tree_root,
5d4f98a2
YZ
1494 &root_key, root_item);
1495 BUG_ON(ret);
1496 kfree(root_item);
1497
3dbf1738 1498 reloc_root = btrfs_read_tree_root(fs_info->tree_root, &root_key);
5d4f98a2 1499 BUG_ON(IS_ERR(reloc_root));
3dbf1738 1500 set_bit(BTRFS_ROOT_REF_COWS, &reloc_root->state);
5d4f98a2 1501 reloc_root->last_trans = trans->transid;
3fd0a558
YZ
1502 return reloc_root;
1503}
1504
1505/*
1506 * create reloc tree for a given fs tree. reloc tree is just a
1507 * snapshot of the fs tree with special root objectid.
f44deb74
JB
1508 *
1509 * The reloc_root comes out of here with two references, one for
1510 * root->reloc_root, and another for being on the rc->reloc_roots list.
3fd0a558
YZ
1511 */
1512int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
1513 struct btrfs_root *root)
1514{
0b246afa 1515 struct btrfs_fs_info *fs_info = root->fs_info;
3fd0a558 1516 struct btrfs_root *reloc_root;
0b246afa 1517 struct reloc_control *rc = fs_info->reloc_ctl;
20dd2cbf 1518 struct btrfs_block_rsv *rsv;
3fd0a558 1519 int clear_rsv = 0;
ffd7b339 1520 int ret;
3fd0a558 1521
aec7db3b 1522 if (!rc)
2abc726a
JB
1523 return 0;
1524
1fac4a54
QW
1525 /*
1526 * The subvolume has reloc tree but the swap is finished, no need to
1527 * create/update the dead reloc tree
1528 */
6282675e 1529 if (reloc_root_is_dead(root))
1fac4a54
QW
1530 return 0;
1531
aec7db3b
JB
1532 /*
1533 * This is subtle but important. We do not do
1534 * record_root_in_transaction for reloc roots, instead we record their
1535 * corresponding fs root, and then here we update the last trans for the
1536 * reloc root. This means that we have to do this for the entire life
1537 * of the reloc root, regardless of which stage of the relocation we are
1538 * in.
1539 */
3fd0a558
YZ
1540 if (root->reloc_root) {
1541 reloc_root = root->reloc_root;
1542 reloc_root->last_trans = trans->transid;
1543 return 0;
1544 }
1545
aec7db3b
JB
1546 /*
1547 * We are merging reloc roots, we do not need new reloc trees. Also
1548 * reloc trees never need their own reloc tree.
1549 */
1550 if (!rc->create_reloc_tree ||
1551 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
1552 return 0;
1553
20dd2cbf
MX
1554 if (!trans->reloc_reserved) {
1555 rsv = trans->block_rsv;
3fd0a558
YZ
1556 trans->block_rsv = rc->block_rsv;
1557 clear_rsv = 1;
1558 }
1559 reloc_root = create_reloc_root(trans, root, root->root_key.objectid);
1560 if (clear_rsv)
20dd2cbf 1561 trans->block_rsv = rsv;
5d4f98a2 1562
ffd7b339
JM
1563 ret = __add_reloc_root(reloc_root);
1564 BUG_ON(ret < 0);
f44deb74 1565 root->reloc_root = btrfs_grab_root(reloc_root);
5d4f98a2
YZ
1566 return 0;
1567}
1568
1569/*
1570 * update root item of reloc tree
1571 */
1572int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
1573 struct btrfs_root *root)
1574{
0b246afa 1575 struct btrfs_fs_info *fs_info = root->fs_info;
5d4f98a2
YZ
1576 struct btrfs_root *reloc_root;
1577 struct btrfs_root_item *root_item;
5d4f98a2
YZ
1578 int ret;
1579
6282675e 1580 if (!have_reloc_root(root))
7585717f 1581 goto out;
5d4f98a2
YZ
1582
1583 reloc_root = root->reloc_root;
1584 root_item = &reloc_root->root_item;
1585
f44deb74
JB
1586 /*
1587 * We are probably ok here, but __del_reloc_root() will drop its ref of
1588 * the root. We have the ref for root->reloc_root, but just in case
1589 * hold it while we update the reloc root.
1590 */
1591 btrfs_grab_root(reloc_root);
1592
d2311e69 1593 /* root->reloc_root will stay until current relocation finished */
0b246afa 1594 if (fs_info->reloc_ctl->merge_reloc_tree &&
3fd0a558 1595 btrfs_root_refs(root_item) == 0) {
d2311e69 1596 set_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state);
6282675e
QW
1597 /*
1598 * Mark the tree as dead before we change reloc_root so
1599 * have_reloc_root will not touch it from now on.
1600 */
1601 smp_wmb();
c974c464 1602 __del_reloc_root(reloc_root);
5d4f98a2
YZ
1603 }
1604
5d4f98a2 1605 if (reloc_root->commit_root != reloc_root->node) {
ea287ab1 1606 __update_reloc_root(reloc_root);
5d4f98a2
YZ
1607 btrfs_set_root_node(root_item, reloc_root->node);
1608 free_extent_buffer(reloc_root->commit_root);
1609 reloc_root->commit_root = btrfs_root_node(reloc_root);
1610 }
1611
0b246afa 1612 ret = btrfs_update_root(trans, fs_info->tree_root,
5d4f98a2
YZ
1613 &reloc_root->root_key, root_item);
1614 BUG_ON(ret);
f44deb74 1615 btrfs_put_root(reloc_root);
7585717f 1616out:
5d4f98a2
YZ
1617 return 0;
1618}
1619
1620/*
1621 * helper to find first cached inode with inode number >= objectid
1622 * in a subvolume
1623 */
1624static struct inode *find_next_inode(struct btrfs_root *root, u64 objectid)
1625{
1626 struct rb_node *node;
1627 struct rb_node *prev;
1628 struct btrfs_inode *entry;
1629 struct inode *inode;
1630
1631 spin_lock(&root->inode_lock);
1632again:
1633 node = root->inode_tree.rb_node;
1634 prev = NULL;
1635 while (node) {
1636 prev = node;
1637 entry = rb_entry(node, struct btrfs_inode, rb_node);
1638
4a0cc7ca 1639 if (objectid < btrfs_ino(entry))
5d4f98a2 1640 node = node->rb_left;
4a0cc7ca 1641 else if (objectid > btrfs_ino(entry))
5d4f98a2
YZ
1642 node = node->rb_right;
1643 else
1644 break;
1645 }
1646 if (!node) {
1647 while (prev) {
1648 entry = rb_entry(prev, struct btrfs_inode, rb_node);
4a0cc7ca 1649 if (objectid <= btrfs_ino(entry)) {
5d4f98a2
YZ
1650 node = prev;
1651 break;
1652 }
1653 prev = rb_next(prev);
1654 }
1655 }
1656 while (node) {
1657 entry = rb_entry(node, struct btrfs_inode, rb_node);
1658 inode = igrab(&entry->vfs_inode);
1659 if (inode) {
1660 spin_unlock(&root->inode_lock);
1661 return inode;
1662 }
1663
4a0cc7ca 1664 objectid = btrfs_ino(entry) + 1;
5d4f98a2
YZ
1665 if (cond_resched_lock(&root->inode_lock))
1666 goto again;
1667
1668 node = rb_next(node);
1669 }
1670 spin_unlock(&root->inode_lock);
1671 return NULL;
1672}
1673
5d4f98a2
YZ
1674/*
1675 * get new location of data
1676 */
1677static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,
1678 u64 bytenr, u64 num_bytes)
1679{
1680 struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
1681 struct btrfs_path *path;
1682 struct btrfs_file_extent_item *fi;
1683 struct extent_buffer *leaf;
1684 int ret;
1685
1686 path = btrfs_alloc_path();
1687 if (!path)
1688 return -ENOMEM;
1689
1690 bytenr -= BTRFS_I(reloc_inode)->index_cnt;
f85b7379
DS
1691 ret = btrfs_lookup_file_extent(NULL, root, path,
1692 btrfs_ino(BTRFS_I(reloc_inode)), bytenr, 0);
5d4f98a2
YZ
1693 if (ret < 0)
1694 goto out;
1695 if (ret > 0) {
1696 ret = -ENOENT;
1697 goto out;
1698 }
1699
1700 leaf = path->nodes[0];
1701 fi = btrfs_item_ptr(leaf, path->slots[0],
1702 struct btrfs_file_extent_item);
1703
1704 BUG_ON(btrfs_file_extent_offset(leaf, fi) ||
1705 btrfs_file_extent_compression(leaf, fi) ||
1706 btrfs_file_extent_encryption(leaf, fi) ||
1707 btrfs_file_extent_other_encoding(leaf, fi));
1708
1709 if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) {
83d4cfd4 1710 ret = -EINVAL;
5d4f98a2
YZ
1711 goto out;
1712 }
1713
3fd0a558 1714 *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
5d4f98a2
YZ
1715 ret = 0;
1716out:
1717 btrfs_free_path(path);
1718 return ret;
1719}
1720
1721/*
1722 * update file extent items in the tree leaf to point to
1723 * the new locations.
1724 */
3fd0a558
YZ
1725static noinline_for_stack
1726int replace_file_extents(struct btrfs_trans_handle *trans,
1727 struct reloc_control *rc,
1728 struct btrfs_root *root,
1729 struct extent_buffer *leaf)
5d4f98a2 1730{
0b246afa 1731 struct btrfs_fs_info *fs_info = root->fs_info;
5d4f98a2
YZ
1732 struct btrfs_key key;
1733 struct btrfs_file_extent_item *fi;
1734 struct inode *inode = NULL;
5d4f98a2
YZ
1735 u64 parent;
1736 u64 bytenr;
3fd0a558 1737 u64 new_bytenr = 0;
5d4f98a2
YZ
1738 u64 num_bytes;
1739 u64 end;
1740 u32 nritems;
1741 u32 i;
83d4cfd4 1742 int ret = 0;
5d4f98a2
YZ
1743 int first = 1;
1744 int dirty = 0;
1745
1746 if (rc->stage != UPDATE_DATA_PTRS)
1747 return 0;
1748
1749 /* reloc trees always use full backref */
1750 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
1751 parent = leaf->start;
1752 else
1753 parent = 0;
1754
1755 nritems = btrfs_header_nritems(leaf);
1756 for (i = 0; i < nritems; i++) {
82fa113f
QW
1757 struct btrfs_ref ref = { 0 };
1758
5d4f98a2
YZ
1759 cond_resched();
1760 btrfs_item_key_to_cpu(leaf, &key, i);
1761 if (key.type != BTRFS_EXTENT_DATA_KEY)
1762 continue;
1763 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1764 if (btrfs_file_extent_type(leaf, fi) ==
1765 BTRFS_FILE_EXTENT_INLINE)
1766 continue;
1767 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1768 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
1769 if (bytenr == 0)
1770 continue;
9569cc20
QW
1771 if (!in_range(bytenr, rc->block_group->start,
1772 rc->block_group->length))
5d4f98a2
YZ
1773 continue;
1774
1775 /*
1776 * if we are modifying block in fs tree, wait for readpage
1777 * to complete and drop the extent cache
1778 */
1779 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
5d4f98a2
YZ
1780 if (first) {
1781 inode = find_next_inode(root, key.objectid);
5d4f98a2 1782 first = 0;
4a0cc7ca 1783 } else if (inode && btrfs_ino(BTRFS_I(inode)) < key.objectid) {
3fd0a558 1784 btrfs_add_delayed_iput(inode);
5d4f98a2 1785 inode = find_next_inode(root, key.objectid);
5d4f98a2 1786 }
4a0cc7ca 1787 if (inode && btrfs_ino(BTRFS_I(inode)) == key.objectid) {
5d4f98a2
YZ
1788 end = key.offset +
1789 btrfs_file_extent_num_bytes(leaf, fi);
1790 WARN_ON(!IS_ALIGNED(key.offset,
0b246afa
JM
1791 fs_info->sectorsize));
1792 WARN_ON(!IS_ALIGNED(end, fs_info->sectorsize));
5d4f98a2
YZ
1793 end--;
1794 ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
d0082371 1795 key.offset, end);
5d4f98a2
YZ
1796 if (!ret)
1797 continue;
1798
dcdbc059
NB
1799 btrfs_drop_extent_cache(BTRFS_I(inode),
1800 key.offset, end, 1);
5d4f98a2 1801 unlock_extent(&BTRFS_I(inode)->io_tree,
d0082371 1802 key.offset, end);
5d4f98a2
YZ
1803 }
1804 }
1805
1806 ret = get_new_location(rc->data_inode, &new_bytenr,
1807 bytenr, num_bytes);
83d4cfd4
JB
1808 if (ret) {
1809 /*
1810 * Don't have to abort since we've not changed anything
1811 * in the file extent yet.
1812 */
1813 break;
3fd0a558 1814 }
5d4f98a2
YZ
1815
1816 btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr);
1817 dirty = 1;
1818
1819 key.offset -= btrfs_file_extent_offset(leaf, fi);
82fa113f
QW
1820 btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new_bytenr,
1821 num_bytes, parent);
1822 ref.real_root = root->root_key.objectid;
1823 btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
1824 key.objectid, key.offset);
1825 ret = btrfs_inc_extent_ref(trans, &ref);
83d4cfd4 1826 if (ret) {
66642832 1827 btrfs_abort_transaction(trans, ret);
83d4cfd4
JB
1828 break;
1829 }
5d4f98a2 1830
ffd4bb2a
QW
1831 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
1832 num_bytes, parent);
1833 ref.real_root = root->root_key.objectid;
1834 btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
1835 key.objectid, key.offset);
1836 ret = btrfs_free_extent(trans, &ref);
83d4cfd4 1837 if (ret) {
66642832 1838 btrfs_abort_transaction(trans, ret);
83d4cfd4
JB
1839 break;
1840 }
5d4f98a2
YZ
1841 }
1842 if (dirty)
1843 btrfs_mark_buffer_dirty(leaf);
3fd0a558
YZ
1844 if (inode)
1845 btrfs_add_delayed_iput(inode);
83d4cfd4 1846 return ret;
5d4f98a2
YZ
1847}
1848
1849static noinline_for_stack
1850int memcmp_node_keys(struct extent_buffer *eb, int slot,
1851 struct btrfs_path *path, int level)
1852{
1853 struct btrfs_disk_key key1;
1854 struct btrfs_disk_key key2;
1855 btrfs_node_key(eb, &key1, slot);
1856 btrfs_node_key(path->nodes[level], &key2, path->slots[level]);
1857 return memcmp(&key1, &key2, sizeof(key1));
1858}
1859
1860/*
1861 * try to replace tree blocks in fs tree with the new blocks
1862 * in reloc tree. tree blocks haven't been modified since the
1863 * reloc tree was create can be replaced.
1864 *
1865 * if a block was replaced, level of the block + 1 is returned.
1866 * if no block got replaced, 0 is returned. if there are other
1867 * errors, a negative error number is returned.
1868 */
3fd0a558 1869static noinline_for_stack
3d0174f7 1870int replace_path(struct btrfs_trans_handle *trans, struct reloc_control *rc,
3fd0a558
YZ
1871 struct btrfs_root *dest, struct btrfs_root *src,
1872 struct btrfs_path *path, struct btrfs_key *next_key,
1873 int lowest_level, int max_level)
5d4f98a2 1874{
0b246afa 1875 struct btrfs_fs_info *fs_info = dest->fs_info;
5d4f98a2
YZ
1876 struct extent_buffer *eb;
1877 struct extent_buffer *parent;
82fa113f 1878 struct btrfs_ref ref = { 0 };
5d4f98a2
YZ
1879 struct btrfs_key key;
1880 u64 old_bytenr;
1881 u64 new_bytenr;
1882 u64 old_ptr_gen;
1883 u64 new_ptr_gen;
1884 u64 last_snapshot;
1885 u32 blocksize;
3fd0a558 1886 int cow = 0;
5d4f98a2
YZ
1887 int level;
1888 int ret;
1889 int slot;
1890
1891 BUG_ON(src->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
1892 BUG_ON(dest->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID);
5d4f98a2
YZ
1893
1894 last_snapshot = btrfs_root_last_snapshot(&src->root_item);
3fd0a558 1895again:
5d4f98a2
YZ
1896 slot = path->slots[lowest_level];
1897 btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot);
1898
1899 eb = btrfs_lock_root_node(dest);
8bead258 1900 btrfs_set_lock_blocking_write(eb);
5d4f98a2
YZ
1901 level = btrfs_header_level(eb);
1902
1903 if (level < lowest_level) {
1904 btrfs_tree_unlock(eb);
1905 free_extent_buffer(eb);
1906 return 0;
1907 }
1908
3fd0a558
YZ
1909 if (cow) {
1910 ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb);
1911 BUG_ON(ret);
1912 }
8bead258 1913 btrfs_set_lock_blocking_write(eb);
5d4f98a2
YZ
1914
1915 if (next_key) {
1916 next_key->objectid = (u64)-1;
1917 next_key->type = (u8)-1;
1918 next_key->offset = (u64)-1;
1919 }
1920
1921 parent = eb;
1922 while (1) {
581c1760
QW
1923 struct btrfs_key first_key;
1924
5d4f98a2
YZ
1925 level = btrfs_header_level(parent);
1926 BUG_ON(level < lowest_level);
1927
1928 ret = btrfs_bin_search(parent, &key, level, &slot);
cbca7d59
FM
1929 if (ret < 0)
1930 break;
5d4f98a2
YZ
1931 if (ret && slot > 0)
1932 slot--;
1933
1934 if (next_key && slot + 1 < btrfs_header_nritems(parent))
1935 btrfs_node_key_to_cpu(parent, next_key, slot + 1);
1936
1937 old_bytenr = btrfs_node_blockptr(parent, slot);
0b246afa 1938 blocksize = fs_info->nodesize;
5d4f98a2 1939 old_ptr_gen = btrfs_node_ptr_generation(parent, slot);
17515f1b 1940 btrfs_node_key_to_cpu(parent, &first_key, slot);
5d4f98a2
YZ
1941
1942 if (level <= max_level) {
1943 eb = path->nodes[level];
1944 new_bytenr = btrfs_node_blockptr(eb,
1945 path->slots[level]);
1946 new_ptr_gen = btrfs_node_ptr_generation(eb,
1947 path->slots[level]);
1948 } else {
1949 new_bytenr = 0;
1950 new_ptr_gen = 0;
1951 }
1952
fae7f21c 1953 if (WARN_ON(new_bytenr > 0 && new_bytenr == old_bytenr)) {
5d4f98a2
YZ
1954 ret = level;
1955 break;
1956 }
1957
1958 if (new_bytenr == 0 || old_ptr_gen > last_snapshot ||
1959 memcmp_node_keys(parent, slot, path, level)) {
3fd0a558 1960 if (level <= lowest_level) {
5d4f98a2
YZ
1961 ret = 0;
1962 break;
1963 }
1964
581c1760
QW
1965 eb = read_tree_block(fs_info, old_bytenr, old_ptr_gen,
1966 level - 1, &first_key);
64c043de
LB
1967 if (IS_ERR(eb)) {
1968 ret = PTR_ERR(eb);
264813ac 1969 break;
64c043de
LB
1970 } else if (!extent_buffer_uptodate(eb)) {
1971 ret = -EIO;
416bc658 1972 free_extent_buffer(eb);
379cde74 1973 break;
416bc658 1974 }
5d4f98a2 1975 btrfs_tree_lock(eb);
3fd0a558
YZ
1976 if (cow) {
1977 ret = btrfs_cow_block(trans, dest, eb, parent,
1978 slot, &eb);
1979 BUG_ON(ret);
5d4f98a2 1980 }
8bead258 1981 btrfs_set_lock_blocking_write(eb);
5d4f98a2
YZ
1982
1983 btrfs_tree_unlock(parent);
1984 free_extent_buffer(parent);
1985
1986 parent = eb;
1987 continue;
1988 }
1989
3fd0a558
YZ
1990 if (!cow) {
1991 btrfs_tree_unlock(parent);
1992 free_extent_buffer(parent);
1993 cow = 1;
1994 goto again;
1995 }
1996
5d4f98a2
YZ
1997 btrfs_node_key_to_cpu(path->nodes[level], &key,
1998 path->slots[level]);
b3b4aa74 1999 btrfs_release_path(path);
5d4f98a2
YZ
2000
2001 path->lowest_level = level;
2002 ret = btrfs_search_slot(trans, src, &key, path, 0, 1);
2003 path->lowest_level = 0;
2004 BUG_ON(ret);
2005
824d8dff
QW
2006 /*
2007 * Info qgroup to trace both subtrees.
2008 *
2009 * We must trace both trees.
2010 * 1) Tree reloc subtree
2011 * If not traced, we will leak data numbers
2012 * 2) Fs subtree
2013 * If not traced, we will double count old data
f616f5cd
QW
2014 *
2015 * We don't scan the subtree right now, but only record
2016 * the swapped tree blocks.
2017 * The real subtree rescan is delayed until we have new
2018 * CoW on the subtree root node before transaction commit.
824d8dff 2019 */
370a11b8
QW
2020 ret = btrfs_qgroup_add_swapped_blocks(trans, dest,
2021 rc->block_group, parent, slot,
2022 path->nodes[level], path->slots[level],
2023 last_snapshot);
2024 if (ret < 0)
2025 break;
5d4f98a2
YZ
2026 /*
2027 * swap blocks in fs tree and reloc tree.
2028 */
2029 btrfs_set_node_blockptr(parent, slot, new_bytenr);
2030 btrfs_set_node_ptr_generation(parent, slot, new_ptr_gen);
2031 btrfs_mark_buffer_dirty(parent);
2032
2033 btrfs_set_node_blockptr(path->nodes[level],
2034 path->slots[level], old_bytenr);
2035 btrfs_set_node_ptr_generation(path->nodes[level],
2036 path->slots[level], old_ptr_gen);
2037 btrfs_mark_buffer_dirty(path->nodes[level]);
2038
82fa113f
QW
2039 btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, old_bytenr,
2040 blocksize, path->nodes[level]->start);
2041 ref.skip_qgroup = true;
2042 btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid);
2043 ret = btrfs_inc_extent_ref(trans, &ref);
5d4f98a2 2044 BUG_ON(ret);
82fa113f
QW
2045 btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new_bytenr,
2046 blocksize, 0);
2047 ref.skip_qgroup = true;
2048 btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid);
2049 ret = btrfs_inc_extent_ref(trans, &ref);
5d4f98a2
YZ
2050 BUG_ON(ret);
2051
ffd4bb2a
QW
2052 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, new_bytenr,
2053 blocksize, path->nodes[level]->start);
2054 btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid);
2055 ref.skip_qgroup = true;
2056 ret = btrfs_free_extent(trans, &ref);
5d4f98a2
YZ
2057 BUG_ON(ret);
2058
ffd4bb2a
QW
2059 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, old_bytenr,
2060 blocksize, 0);
2061 btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid);
2062 ref.skip_qgroup = true;
2063 ret = btrfs_free_extent(trans, &ref);
5d4f98a2
YZ
2064 BUG_ON(ret);
2065
2066 btrfs_unlock_up_safe(path, 0);
2067
2068 ret = level;
2069 break;
2070 }
2071 btrfs_tree_unlock(parent);
2072 free_extent_buffer(parent);
2073 return ret;
2074}
2075
2076/*
2077 * helper to find next relocated block in reloc tree
2078 */
2079static noinline_for_stack
2080int walk_up_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
2081 int *level)
2082{
2083 struct extent_buffer *eb;
2084 int i;
2085 u64 last_snapshot;
2086 u32 nritems;
2087
2088 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
2089
2090 for (i = 0; i < *level; i++) {
2091 free_extent_buffer(path->nodes[i]);
2092 path->nodes[i] = NULL;
2093 }
2094
2095 for (i = *level; i < BTRFS_MAX_LEVEL && path->nodes[i]; i++) {
2096 eb = path->nodes[i];
2097 nritems = btrfs_header_nritems(eb);
2098 while (path->slots[i] + 1 < nritems) {
2099 path->slots[i]++;
2100 if (btrfs_node_ptr_generation(eb, path->slots[i]) <=
2101 last_snapshot)
2102 continue;
2103
2104 *level = i;
2105 return 0;
2106 }
2107 free_extent_buffer(path->nodes[i]);
2108 path->nodes[i] = NULL;
2109 }
2110 return 1;
2111}
2112
2113/*
2114 * walk down reloc tree to find relocated block of lowest level
2115 */
2116static noinline_for_stack
2117int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
2118 int *level)
2119{
2ff7e61e 2120 struct btrfs_fs_info *fs_info = root->fs_info;
5d4f98a2
YZ
2121 struct extent_buffer *eb = NULL;
2122 int i;
2123 u64 bytenr;
2124 u64 ptr_gen = 0;
2125 u64 last_snapshot;
5d4f98a2
YZ
2126 u32 nritems;
2127
2128 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
2129
2130 for (i = *level; i > 0; i--) {
581c1760
QW
2131 struct btrfs_key first_key;
2132
5d4f98a2
YZ
2133 eb = path->nodes[i];
2134 nritems = btrfs_header_nritems(eb);
2135 while (path->slots[i] < nritems) {
2136 ptr_gen = btrfs_node_ptr_generation(eb, path->slots[i]);
2137 if (ptr_gen > last_snapshot)
2138 break;
2139 path->slots[i]++;
2140 }
2141 if (path->slots[i] >= nritems) {
2142 if (i == *level)
2143 break;
2144 *level = i + 1;
2145 return 0;
2146 }
2147 if (i == 1) {
2148 *level = i;
2149 return 0;
2150 }
2151
2152 bytenr = btrfs_node_blockptr(eb, path->slots[i]);
581c1760
QW
2153 btrfs_node_key_to_cpu(eb, &first_key, path->slots[i]);
2154 eb = read_tree_block(fs_info, bytenr, ptr_gen, i - 1,
2155 &first_key);
64c043de
LB
2156 if (IS_ERR(eb)) {
2157 return PTR_ERR(eb);
2158 } else if (!extent_buffer_uptodate(eb)) {
416bc658
JB
2159 free_extent_buffer(eb);
2160 return -EIO;
2161 }
5d4f98a2
YZ
2162 BUG_ON(btrfs_header_level(eb) != i - 1);
2163 path->nodes[i - 1] = eb;
2164 path->slots[i - 1] = 0;
2165 }
2166 return 1;
2167}
2168
2169/*
2170 * invalidate extent cache for file extents whose key in range of
2171 * [min_key, max_key)
2172 */
2173static int invalidate_extent_cache(struct btrfs_root *root,
2174 struct btrfs_key *min_key,
2175 struct btrfs_key *max_key)
2176{
0b246afa 2177 struct btrfs_fs_info *fs_info = root->fs_info;
5d4f98a2
YZ
2178 struct inode *inode = NULL;
2179 u64 objectid;
2180 u64 start, end;
33345d01 2181 u64 ino;
5d4f98a2
YZ
2182
2183 objectid = min_key->objectid;
2184 while (1) {
2185 cond_resched();
2186 iput(inode);
2187
2188 if (objectid > max_key->objectid)
2189 break;
2190
2191 inode = find_next_inode(root, objectid);
2192 if (!inode)
2193 break;
4a0cc7ca 2194 ino = btrfs_ino(BTRFS_I(inode));
5d4f98a2 2195
33345d01 2196 if (ino > max_key->objectid) {
5d4f98a2
YZ
2197 iput(inode);
2198 break;
2199 }
2200
33345d01 2201 objectid = ino + 1;
5d4f98a2
YZ
2202 if (!S_ISREG(inode->i_mode))
2203 continue;
2204
33345d01 2205 if (unlikely(min_key->objectid == ino)) {
5d4f98a2
YZ
2206 if (min_key->type > BTRFS_EXTENT_DATA_KEY)
2207 continue;
2208 if (min_key->type < BTRFS_EXTENT_DATA_KEY)
2209 start = 0;
2210 else {
2211 start = min_key->offset;
0b246afa 2212 WARN_ON(!IS_ALIGNED(start, fs_info->sectorsize));
5d4f98a2
YZ
2213 }
2214 } else {
2215 start = 0;
2216 }
2217
33345d01 2218 if (unlikely(max_key->objectid == ino)) {
5d4f98a2
YZ
2219 if (max_key->type < BTRFS_EXTENT_DATA_KEY)
2220 continue;
2221 if (max_key->type > BTRFS_EXTENT_DATA_KEY) {
2222 end = (u64)-1;
2223 } else {
2224 if (max_key->offset == 0)
2225 continue;
2226 end = max_key->offset;
0b246afa 2227 WARN_ON(!IS_ALIGNED(end, fs_info->sectorsize));
5d4f98a2
YZ
2228 end--;
2229 }
2230 } else {
2231 end = (u64)-1;
2232 }
2233
2234 /* the lock_extent waits for readpage to complete */
d0082371 2235 lock_extent(&BTRFS_I(inode)->io_tree, start, end);
dcdbc059 2236 btrfs_drop_extent_cache(BTRFS_I(inode), start, end, 1);
d0082371 2237 unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
5d4f98a2
YZ
2238 }
2239 return 0;
2240}
2241
2242static int find_next_key(struct btrfs_path *path, int level,
2243 struct btrfs_key *key)
2244
2245{
2246 while (level < BTRFS_MAX_LEVEL) {
2247 if (!path->nodes[level])
2248 break;
2249 if (path->slots[level] + 1 <
2250 btrfs_header_nritems(path->nodes[level])) {
2251 btrfs_node_key_to_cpu(path->nodes[level], key,
2252 path->slots[level] + 1);
2253 return 0;
2254 }
2255 level++;
2256 }
2257 return 1;
2258}
2259
d2311e69
QW
2260/*
2261 * Insert current subvolume into reloc_control::dirty_subvol_roots
2262 */
2263static void insert_dirty_subvol(struct btrfs_trans_handle *trans,
2264 struct reloc_control *rc,
2265 struct btrfs_root *root)
2266{
2267 struct btrfs_root *reloc_root = root->reloc_root;
2268 struct btrfs_root_item *reloc_root_item;
2269
2270 /* @root must be a subvolume tree root with a valid reloc tree */
2271 ASSERT(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
2272 ASSERT(reloc_root);
2273
2274 reloc_root_item = &reloc_root->root_item;
2275 memset(&reloc_root_item->drop_progress, 0,
2276 sizeof(reloc_root_item->drop_progress));
2277 reloc_root_item->drop_level = 0;
2278 btrfs_set_root_refs(reloc_root_item, 0);
2279 btrfs_update_reloc_root(trans, root);
2280
2281 if (list_empty(&root->reloc_dirty_list)) {
00246528 2282 btrfs_grab_root(root);
d2311e69
QW
2283 list_add_tail(&root->reloc_dirty_list, &rc->dirty_subvol_roots);
2284 }
2285}
2286
2287static int clean_dirty_subvols(struct reloc_control *rc)
2288{
2289 struct btrfs_root *root;
2290 struct btrfs_root *next;
2291 int ret = 0;
30d40577 2292 int ret2;
d2311e69
QW
2293
2294 list_for_each_entry_safe(root, next, &rc->dirty_subvol_roots,
2295 reloc_dirty_list) {
30d40577
QW
2296 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
2297 /* Merged subvolume, cleanup its reloc root */
2298 struct btrfs_root *reloc_root = root->reloc_root;
d2311e69 2299
30d40577
QW
2300 list_del_init(&root->reloc_dirty_list);
2301 root->reloc_root = NULL;
6282675e
QW
2302 /*
2303 * Need barrier to ensure clear_bit() only happens after
2304 * root->reloc_root = NULL. Pairs with have_reloc_root.
2305 */
2306 smp_wmb();
1fac4a54 2307 clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state);
f28de8d8 2308 if (reloc_root) {
f44deb74
JB
2309 /*
2310 * btrfs_drop_snapshot drops our ref we hold for
2311 * ->reloc_root. If it fails however we must
2312 * drop the ref ourselves.
2313 */
f28de8d8 2314 ret2 = btrfs_drop_snapshot(reloc_root, 0, 1);
f44deb74
JB
2315 if (ret2 < 0) {
2316 btrfs_put_root(reloc_root);
2317 if (!ret)
2318 ret = ret2;
2319 }
f28de8d8 2320 }
00246528 2321 btrfs_put_root(root);
30d40577
QW
2322 } else {
2323 /* Orphan reloc tree, just clean it up */
0078a9f9 2324 ret2 = btrfs_drop_snapshot(root, 0, 1);
f44deb74
JB
2325 if (ret2 < 0) {
2326 btrfs_put_root(root);
2327 if (!ret)
2328 ret = ret2;
2329 }
d2311e69 2330 }
d2311e69
QW
2331 }
2332 return ret;
2333}
2334
5d4f98a2
YZ
2335/*
2336 * merge the relocated tree blocks in reloc tree with corresponding
2337 * fs tree.
2338 */
2339static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
2340 struct btrfs_root *root)
2341{
0b246afa 2342 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
5d4f98a2
YZ
2343 struct btrfs_key key;
2344 struct btrfs_key next_key;
9e6a0c52 2345 struct btrfs_trans_handle *trans = NULL;
5d4f98a2
YZ
2346 struct btrfs_root *reloc_root;
2347 struct btrfs_root_item *root_item;
2348 struct btrfs_path *path;
3fd0a558 2349 struct extent_buffer *leaf;
5d4f98a2
YZ
2350 int level;
2351 int max_level;
2352 int replaced = 0;
2353 int ret;
2354 int err = 0;
3fd0a558 2355 u32 min_reserved;
5d4f98a2
YZ
2356
2357 path = btrfs_alloc_path();
2358 if (!path)
2359 return -ENOMEM;
e4058b54 2360 path->reada = READA_FORWARD;
5d4f98a2
YZ
2361
2362 reloc_root = root->reloc_root;
2363 root_item = &reloc_root->root_item;
2364
2365 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
2366 level = btrfs_root_level(root_item);
67439dad 2367 atomic_inc(&reloc_root->node->refs);
5d4f98a2
YZ
2368 path->nodes[level] = reloc_root->node;
2369 path->slots[level] = 0;
2370 } else {
2371 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
2372
2373 level = root_item->drop_level;
2374 BUG_ON(level == 0);
2375 path->lowest_level = level;
2376 ret = btrfs_search_slot(NULL, reloc_root, &key, path, 0, 0);
33c66f43 2377 path->lowest_level = 0;
5d4f98a2
YZ
2378 if (ret < 0) {
2379 btrfs_free_path(path);
2380 return ret;
2381 }
2382
2383 btrfs_node_key_to_cpu(path->nodes[level], &next_key,
2384 path->slots[level]);
2385 WARN_ON(memcmp(&key, &next_key, sizeof(key)));
2386
2387 btrfs_unlock_up_safe(path, 0);
2388 }
2389
0b246afa 2390 min_reserved = fs_info->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
3fd0a558 2391 memset(&next_key, 0, sizeof(next_key));
5d4f98a2 2392
3fd0a558 2393 while (1) {
08e007d2
MX
2394 ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved,
2395 BTRFS_RESERVE_FLUSH_ALL);
3fd0a558 2396 if (ret) {
9e6a0c52
JB
2397 err = ret;
2398 goto out;
5d4f98a2 2399 }
9e6a0c52
JB
2400 trans = btrfs_start_transaction(root, 0);
2401 if (IS_ERR(trans)) {
2402 err = PTR_ERR(trans);
2403 trans = NULL;
2404 goto out;
2405 }
2abc726a
JB
2406
2407 /*
2408 * At this point we no longer have a reloc_control, so we can't
2409 * depend on btrfs_init_reloc_root to update our last_trans.
2410 *
2411 * But that's ok, we started the trans handle on our
2412 * corresponding fs_root, which means it's been added to the
2413 * dirty list. At commit time we'll still call
2414 * btrfs_update_reloc_root() and update our root item
2415 * appropriately.
2416 */
2417 reloc_root->last_trans = trans->transid;
9e6a0c52 2418 trans->block_rsv = rc->block_rsv;
5d4f98a2 2419
5d4f98a2 2420 replaced = 0;
5d4f98a2
YZ
2421 max_level = level;
2422
2423 ret = walk_down_reloc_tree(reloc_root, path, &level);
2424 if (ret < 0) {
2425 err = ret;
2426 goto out;
2427 }
2428 if (ret > 0)
2429 break;
2430
2431 if (!find_next_key(path, level, &key) &&
2432 btrfs_comp_cpu_keys(&next_key, &key) >= 0) {
2433 ret = 0;
5d4f98a2 2434 } else {
3d0174f7 2435 ret = replace_path(trans, rc, root, reloc_root, path,
3fd0a558 2436 &next_key, level, max_level);
5d4f98a2
YZ
2437 }
2438 if (ret < 0) {
2439 err = ret;
2440 goto out;
2441 }
2442
2443 if (ret > 0) {
2444 level = ret;
2445 btrfs_node_key_to_cpu(path->nodes[level], &key,
2446 path->slots[level]);
2447 replaced = 1;
5d4f98a2
YZ
2448 }
2449
2450 ret = walk_up_reloc_tree(reloc_root, path, &level);
2451 if (ret > 0)
2452 break;
2453
2454 BUG_ON(level == 0);
2455 /*
2456 * save the merging progress in the drop_progress.
2457 * this is OK since root refs == 1 in this case.
2458 */
2459 btrfs_node_key(path->nodes[level], &root_item->drop_progress,
2460 path->slots[level]);
2461 root_item->drop_level = level;
2462
3a45bb20 2463 btrfs_end_transaction_throttle(trans);
9e6a0c52 2464 trans = NULL;
5d4f98a2 2465
2ff7e61e 2466 btrfs_btree_balance_dirty(fs_info);
5d4f98a2
YZ
2467
2468 if (replaced && rc->stage == UPDATE_DATA_PTRS)
2469 invalidate_extent_cache(root, &key, &next_key);
2470 }
2471
2472 /*
2473 * handle the case only one block in the fs tree need to be
2474 * relocated and the block is tree root.
2475 */
2476 leaf = btrfs_lock_root_node(root);
2477 ret = btrfs_cow_block(trans, root, leaf, NULL, 0, &leaf);
2478 btrfs_tree_unlock(leaf);
2479 free_extent_buffer(leaf);
2480 if (ret < 0)
2481 err = ret;
2482out:
2483 btrfs_free_path(path);
2484
d2311e69
QW
2485 if (err == 0)
2486 insert_dirty_subvol(trans, rc, root);
5d4f98a2 2487
9e6a0c52 2488 if (trans)
3a45bb20 2489 btrfs_end_transaction_throttle(trans);
5d4f98a2 2490
2ff7e61e 2491 btrfs_btree_balance_dirty(fs_info);
5d4f98a2 2492
5d4f98a2
YZ
2493 if (replaced && rc->stage == UPDATE_DATA_PTRS)
2494 invalidate_extent_cache(root, &key, &next_key);
2495
2496 return err;
2497}
2498
3fd0a558
YZ
2499static noinline_for_stack
2500int prepare_to_merge(struct reloc_control *rc, int err)
5d4f98a2 2501{
3fd0a558 2502 struct btrfs_root *root = rc->extent_root;
0b246afa 2503 struct btrfs_fs_info *fs_info = root->fs_info;
5d4f98a2 2504 struct btrfs_root *reloc_root;
3fd0a558
YZ
2505 struct btrfs_trans_handle *trans;
2506 LIST_HEAD(reloc_roots);
2507 u64 num_bytes = 0;
2508 int ret;
3fd0a558 2509
0b246afa
JM
2510 mutex_lock(&fs_info->reloc_mutex);
2511 rc->merging_rsv_size += fs_info->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
3fd0a558 2512 rc->merging_rsv_size += rc->nodes_relocated * 2;
0b246afa 2513 mutex_unlock(&fs_info->reloc_mutex);
7585717f 2514
3fd0a558
YZ
2515again:
2516 if (!err) {
2517 num_bytes = rc->merging_rsv_size;
08e007d2
MX
2518 ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes,
2519 BTRFS_RESERVE_FLUSH_ALL);
3fd0a558
YZ
2520 if (ret)
2521 err = ret;
2522 }
2523
7a7eaa40 2524 trans = btrfs_join_transaction(rc->extent_root);
3612b495
TI
2525 if (IS_ERR(trans)) {
2526 if (!err)
2ff7e61e 2527 btrfs_block_rsv_release(fs_info, rc->block_rsv,
63f018be 2528 num_bytes, NULL);
3612b495
TI
2529 return PTR_ERR(trans);
2530 }
3fd0a558
YZ
2531
2532 if (!err) {
2533 if (num_bytes != rc->merging_rsv_size) {
3a45bb20 2534 btrfs_end_transaction(trans);
2ff7e61e 2535 btrfs_block_rsv_release(fs_info, rc->block_rsv,
63f018be 2536 num_bytes, NULL);
3fd0a558
YZ
2537 goto again;
2538 }
2539 }
5d4f98a2 2540
3fd0a558
YZ
2541 rc->merge_reloc_tree = 1;
2542
2543 while (!list_empty(&rc->reloc_roots)) {
2544 reloc_root = list_entry(rc->reloc_roots.next,
2545 struct btrfs_root, root_list);
2546 list_del_init(&reloc_root->root_list);
5d4f98a2 2547
0b246afa 2548 root = read_fs_root(fs_info, reloc_root->root_key.offset);
5d4f98a2
YZ
2549 BUG_ON(IS_ERR(root));
2550 BUG_ON(root->reloc_root != reloc_root);
2551
3fd0a558
YZ
2552 /*
2553 * set reference count to 1, so btrfs_recover_relocation
2554 * knows it should resumes merging
2555 */
2556 if (!err)
2557 btrfs_set_root_refs(&reloc_root->root_item, 1);
5d4f98a2 2558 btrfs_update_reloc_root(trans, root);
5d4f98a2 2559
3fd0a558 2560 list_add(&reloc_root->root_list, &reloc_roots);
00246528 2561 btrfs_put_root(root);
3fd0a558 2562 }
5d4f98a2 2563
3fd0a558 2564 list_splice(&reloc_roots, &rc->reloc_roots);
5d4f98a2 2565
3fd0a558 2566 if (!err)
3a45bb20 2567 btrfs_commit_transaction(trans);
3fd0a558 2568 else
3a45bb20 2569 btrfs_end_transaction(trans);
3fd0a558 2570 return err;
5d4f98a2
YZ
2571}
2572
aca1bba6
LB
2573static noinline_for_stack
2574void free_reloc_roots(struct list_head *list)
2575{
2576 struct btrfs_root *reloc_root;
2577
2578 while (!list_empty(list)) {
2579 reloc_root = list_entry(list->next, struct btrfs_root,
2580 root_list);
bb166d72 2581 __del_reloc_root(reloc_root);
aca1bba6
LB
2582 }
2583}
2584
3fd0a558 2585static noinline_for_stack
94404e82 2586void merge_reloc_roots(struct reloc_control *rc)
5d4f98a2 2587{
0b246afa 2588 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
5d4f98a2 2589 struct btrfs_root *root;
3fd0a558
YZ
2590 struct btrfs_root *reloc_root;
2591 LIST_HEAD(reloc_roots);
2592 int found = 0;
aca1bba6 2593 int ret = 0;
3fd0a558
YZ
2594again:
2595 root = rc->extent_root;
7585717f
CM
2596
2597 /*
2598 * this serializes us with btrfs_record_root_in_transaction,
2599 * we have to make sure nobody is in the middle of
2600 * adding their roots to the list while we are
2601 * doing this splice
2602 */
0b246afa 2603 mutex_lock(&fs_info->reloc_mutex);
3fd0a558 2604 list_splice_init(&rc->reloc_roots, &reloc_roots);
0b246afa 2605 mutex_unlock(&fs_info->reloc_mutex);
5d4f98a2 2606
3fd0a558
YZ
2607 while (!list_empty(&reloc_roots)) {
2608 found = 1;
2609 reloc_root = list_entry(reloc_roots.next,
2610 struct btrfs_root, root_list);
5d4f98a2 2611
3fd0a558 2612 if (btrfs_root_refs(&reloc_root->root_item) > 0) {
0b246afa 2613 root = read_fs_root(fs_info,
3fd0a558
YZ
2614 reloc_root->root_key.offset);
2615 BUG_ON(IS_ERR(root));
2616 BUG_ON(root->reloc_root != reloc_root);
5d4f98a2 2617
3fd0a558 2618 ret = merge_reloc_root(rc, root);
00246528 2619 btrfs_put_root(root);
b37b39cd 2620 if (ret) {
25e293c2
WS
2621 if (list_empty(&reloc_root->root_list))
2622 list_add_tail(&reloc_root->root_list,
2623 &reloc_roots);
aca1bba6 2624 goto out;
b37b39cd 2625 }
3fd0a558
YZ
2626 } else {
2627 list_del_init(&reloc_root->root_list);
30d40577
QW
2628 /* Don't forget to queue this reloc root for cleanup */
2629 list_add_tail(&reloc_root->reloc_dirty_list,
2630 &rc->dirty_subvol_roots);
3fd0a558 2631 }
5d4f98a2
YZ
2632 }
2633
3fd0a558
YZ
2634 if (found) {
2635 found = 0;
2636 goto again;
2637 }
aca1bba6
LB
2638out:
2639 if (ret) {
0b246afa 2640 btrfs_handle_fs_error(fs_info, ret, NULL);
aca1bba6
LB
2641 if (!list_empty(&reloc_roots))
2642 free_reloc_roots(&reloc_roots);
467bb1d2
WS
2643
2644 /* new reloc root may be added */
0b246afa 2645 mutex_lock(&fs_info->reloc_mutex);
467bb1d2 2646 list_splice_init(&rc->reloc_roots, &reloc_roots);
0b246afa 2647 mutex_unlock(&fs_info->reloc_mutex);
467bb1d2
WS
2648 if (!list_empty(&reloc_roots))
2649 free_reloc_roots(&reloc_roots);
aca1bba6
LB
2650 }
2651
7b7b7431
JB
2652 /*
2653 * We used to have
2654 *
2655 * BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root));
2656 *
2657 * here, but it's wrong. If we fail to start the transaction in
2658 * prepare_to_merge() we will have only 0 ref reloc roots, none of which
2659 * have actually been removed from the reloc_root_tree rb tree. This is
2660 * fine because we're bailing here, and we hold a reference on the root
2661 * for the list that holds it, so these roots will be cleaned up when we
2662 * do the reloc_dirty_list afterwards. Meanwhile the root->reloc_root
2663 * will be cleaned up on unmount.
2664 *
2665 * The remaining nodes will be cleaned up by free_reloc_control.
2666 */
5d4f98a2
YZ
2667}
2668
2669static void free_block_list(struct rb_root *blocks)
2670{
2671 struct tree_block *block;
2672 struct rb_node *rb_node;
2673 while ((rb_node = rb_first(blocks))) {
2674 block = rb_entry(rb_node, struct tree_block, rb_node);
2675 rb_erase(rb_node, blocks);
2676 kfree(block);
2677 }
2678}
2679
2680static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans,
2681 struct btrfs_root *reloc_root)
2682{
0b246afa 2683 struct btrfs_fs_info *fs_info = reloc_root->fs_info;
5d4f98a2 2684 struct btrfs_root *root;
442b1ac5 2685 int ret;
5d4f98a2
YZ
2686
2687 if (reloc_root->last_trans == trans->transid)
2688 return 0;
2689
0b246afa 2690 root = read_fs_root(fs_info, reloc_root->root_key.offset);
5d4f98a2
YZ
2691 BUG_ON(IS_ERR(root));
2692 BUG_ON(root->reloc_root != reloc_root);
442b1ac5 2693 ret = btrfs_record_root_in_trans(trans, root);
00246528 2694 btrfs_put_root(root);
5d4f98a2 2695
442b1ac5 2696 return ret;
5d4f98a2
YZ
2697}
2698
3fd0a558
YZ
2699static noinline_for_stack
2700struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,
2701 struct reloc_control *rc,
a26195a5
QW
2702 struct btrfs_backref_node *node,
2703 struct btrfs_backref_edge *edges[])
5d4f98a2 2704{
a26195a5 2705 struct btrfs_backref_node *next;
5d4f98a2 2706 struct btrfs_root *root;
3fd0a558
YZ
2707 int index = 0;
2708
5d4f98a2
YZ
2709 next = node;
2710 while (1) {
2711 cond_resched();
2712 next = walk_up_backref(next, edges, &index);
2713 root = next->root;
3fd0a558 2714 BUG_ON(!root);
27cdeb70 2715 BUG_ON(!test_bit(BTRFS_ROOT_REF_COWS, &root->state));
5d4f98a2
YZ
2716
2717 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
2718 record_reloc_root_in_trans(trans, root);
2719 break;
2720 }
2721
3fd0a558
YZ
2722 btrfs_record_root_in_trans(trans, root);
2723 root = root->reloc_root;
2724
2725 if (next->new_bytenr != root->node->start) {
2726 BUG_ON(next->new_bytenr);
2727 BUG_ON(!list_empty(&next->list));
2728 next->new_bytenr = root->node->start;
00246528
JB
2729 btrfs_put_root(next->root);
2730 next->root = btrfs_grab_root(root);
0b530bc5 2731 ASSERT(next->root);
3fd0a558
YZ
2732 list_add_tail(&next->list,
2733 &rc->backref_cache.changed);
9569cc20 2734 mark_block_processed(rc, next);
5d4f98a2
YZ
2735 break;
2736 }
2737
3fd0a558 2738 WARN_ON(1);
5d4f98a2
YZ
2739 root = NULL;
2740 next = walk_down_backref(edges, &index);
2741 if (!next || next->level <= node->level)
2742 break;
2743 }
3fd0a558
YZ
2744 if (!root)
2745 return NULL;
5d4f98a2 2746
3fd0a558
YZ
2747 next = node;
2748 /* setup backref node path for btrfs_reloc_cow_block */
2749 while (1) {
2750 rc->backref_cache.path[next->level] = next;
2751 if (--index < 0)
2752 break;
2753 next = edges[index]->node[UPPER];
5d4f98a2 2754 }
5d4f98a2
YZ
2755 return root;
2756}
2757
3fd0a558
YZ
2758/*
2759 * select a tree root for relocation. return NULL if the block
2760 * is reference counted. we should use do_relocation() in this
2761 * case. return a tree root pointer if the block isn't reference
2762 * counted. return -ENOENT if the block is root of reloc tree.
2763 */
5d4f98a2 2764static noinline_for_stack
a26195a5 2765struct btrfs_root *select_one_root(struct btrfs_backref_node *node)
5d4f98a2 2766{
a26195a5 2767 struct btrfs_backref_node *next;
3fd0a558
YZ
2768 struct btrfs_root *root;
2769 struct btrfs_root *fs_root = NULL;
a26195a5 2770 struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1];
3fd0a558
YZ
2771 int index = 0;
2772
2773 next = node;
2774 while (1) {
2775 cond_resched();
2776 next = walk_up_backref(next, edges, &index);
2777 root = next->root;
2778 BUG_ON(!root);
2779
25985edc 2780 /* no other choice for non-references counted tree */
27cdeb70 2781 if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
3fd0a558
YZ
2782 return root;
2783
2784 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID)
2785 fs_root = root;
2786
2787 if (next != node)
2788 return NULL;
2789
2790 next = walk_down_backref(edges, &index);
2791 if (!next || next->level <= node->level)
2792 break;
2793 }
2794
2795 if (!fs_root)
2796 return ERR_PTR(-ENOENT);
2797 return fs_root;
5d4f98a2
YZ
2798}
2799
2800static noinline_for_stack
3fd0a558 2801u64 calcu_metadata_size(struct reloc_control *rc,
a26195a5 2802 struct btrfs_backref_node *node, int reserve)
5d4f98a2 2803{
0b246afa 2804 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
a26195a5
QW
2805 struct btrfs_backref_node *next = node;
2806 struct btrfs_backref_edge *edge;
2807 struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1];
3fd0a558
YZ
2808 u64 num_bytes = 0;
2809 int index = 0;
2810
2811 BUG_ON(reserve && node->processed);
2812
2813 while (next) {
2814 cond_resched();
2815 while (1) {
2816 if (next->processed && (reserve || next != node))
2817 break;
2818
0b246afa 2819 num_bytes += fs_info->nodesize;
3fd0a558
YZ
2820
2821 if (list_empty(&next->upper))
2822 break;
2823
2824 edge = list_entry(next->upper.next,
a26195a5 2825 struct btrfs_backref_edge, list[LOWER]);
3fd0a558
YZ
2826 edges[index++] = edge;
2827 next = edge->node[UPPER];
2828 }
2829 next = walk_down_backref(edges, &index);
2830 }
2831 return num_bytes;
5d4f98a2
YZ
2832}
2833
3fd0a558
YZ
2834static int reserve_metadata_space(struct btrfs_trans_handle *trans,
2835 struct reloc_control *rc,
a26195a5 2836 struct btrfs_backref_node *node)
5d4f98a2 2837{
3fd0a558 2838 struct btrfs_root *root = rc->extent_root;
da17066c 2839 struct btrfs_fs_info *fs_info = root->fs_info;
3fd0a558
YZ
2840 u64 num_bytes;
2841 int ret;
0647bf56 2842 u64 tmp;
3fd0a558
YZ
2843
2844 num_bytes = calcu_metadata_size(rc, node, 1) * 2;
5d4f98a2 2845
3fd0a558 2846 trans->block_rsv = rc->block_rsv;
0647bf56 2847 rc->reserved_bytes += num_bytes;
8ca17f0f
JB
2848
2849 /*
2850 * We are under a transaction here so we can only do limited flushing.
2851 * If we get an enospc just kick back -EAGAIN so we know to drop the
2852 * transaction and try to refill when we can flush all the things.
2853 */
0647bf56 2854 ret = btrfs_block_rsv_refill(root, rc->block_rsv, num_bytes,
8ca17f0f 2855 BTRFS_RESERVE_FLUSH_LIMIT);
3fd0a558 2856 if (ret) {
da17066c 2857 tmp = fs_info->nodesize * RELOCATION_RESERVED_NODES;
8ca17f0f
JB
2858 while (tmp <= rc->reserved_bytes)
2859 tmp <<= 1;
2860 /*
2861 * only one thread can access block_rsv at this point,
2862 * so we don't need hold lock to protect block_rsv.
2863 * we expand more reservation size here to allow enough
52042d8e 2864 * space for relocation and we will return earlier in
8ca17f0f
JB
2865 * enospc case.
2866 */
da17066c
JM
2867 rc->block_rsv->size = tmp + fs_info->nodesize *
2868 RELOCATION_RESERVED_NODES;
8ca17f0f 2869 return -EAGAIN;
5d4f98a2 2870 }
3fd0a558 2871
3fd0a558
YZ
2872 return 0;
2873}
2874
5d4f98a2
YZ
2875/*
2876 * relocate a block tree, and then update pointers in upper level
2877 * blocks that reference the block to point to the new location.
2878 *
2879 * if called by link_to_upper, the block has already been relocated.
2880 * in that case this function just updates pointers.
2881 */
2882static int do_relocation(struct btrfs_trans_handle *trans,
3fd0a558 2883 struct reloc_control *rc,
a26195a5 2884 struct btrfs_backref_node *node,
5d4f98a2
YZ
2885 struct btrfs_key *key,
2886 struct btrfs_path *path, int lowest)
2887{
2ff7e61e 2888 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
a26195a5
QW
2889 struct btrfs_backref_node *upper;
2890 struct btrfs_backref_edge *edge;
2891 struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1];
5d4f98a2
YZ
2892 struct btrfs_root *root;
2893 struct extent_buffer *eb;
2894 u32 blocksize;
2895 u64 bytenr;
2896 u64 generation;
5d4f98a2
YZ
2897 int slot;
2898 int ret;
2899 int err = 0;
2900
2901 BUG_ON(lowest && node->eb);
2902
2903 path->lowest_level = node->level + 1;
3fd0a558 2904 rc->backref_cache.path[node->level] = node;
5d4f98a2 2905 list_for_each_entry(edge, &node->upper, list[LOWER]) {
581c1760 2906 struct btrfs_key first_key;
82fa113f 2907 struct btrfs_ref ref = { 0 };
581c1760 2908
5d4f98a2 2909 cond_resched();
5d4f98a2
YZ
2910
2911 upper = edge->node[UPPER];
dc4103f9 2912 root = select_reloc_root(trans, rc, upper, edges);
3fd0a558
YZ
2913 BUG_ON(!root);
2914
2915 if (upper->eb && !upper->locked) {
2916 if (!lowest) {
2917 ret = btrfs_bin_search(upper->eb, key,
2918 upper->level, &slot);
cbca7d59
FM
2919 if (ret < 0) {
2920 err = ret;
2921 goto next;
2922 }
3fd0a558
YZ
2923 BUG_ON(ret);
2924 bytenr = btrfs_node_blockptr(upper->eb, slot);
2925 if (node->eb->start == bytenr)
2926 goto next;
2927 }
5d4f98a2 2928 drop_node_buffer(upper);
3fd0a558 2929 }
5d4f98a2
YZ
2930
2931 if (!upper->eb) {
2932 ret = btrfs_search_slot(trans, root, key, path, 0, 1);
3561b9db
LB
2933 if (ret) {
2934 if (ret < 0)
2935 err = ret;
2936 else
2937 err = -ENOENT;
2938
2939 btrfs_release_path(path);
5d4f98a2
YZ
2940 break;
2941 }
5d4f98a2 2942
3fd0a558
YZ
2943 if (!upper->eb) {
2944 upper->eb = path->nodes[upper->level];
2945 path->nodes[upper->level] = NULL;
2946 } else {
2947 BUG_ON(upper->eb != path->nodes[upper->level]);
2948 }
5d4f98a2 2949
3fd0a558
YZ
2950 upper->locked = 1;
2951 path->locks[upper->level] = 0;
5d4f98a2 2952
3fd0a558 2953 slot = path->slots[upper->level];
b3b4aa74 2954 btrfs_release_path(path);
5d4f98a2
YZ
2955 } else {
2956 ret = btrfs_bin_search(upper->eb, key, upper->level,
2957 &slot);
cbca7d59
FM
2958 if (ret < 0) {
2959 err = ret;
2960 goto next;
2961 }
5d4f98a2
YZ
2962 BUG_ON(ret);
2963 }
2964
2965 bytenr = btrfs_node_blockptr(upper->eb, slot);
3fd0a558 2966 if (lowest) {
4547f4d8
LB
2967 if (bytenr != node->bytenr) {
2968 btrfs_err(root->fs_info,
2969 "lowest leaf/node mismatch: bytenr %llu node->bytenr %llu slot %d upper %llu",
2970 bytenr, node->bytenr, slot,
2971 upper->eb->start);
2972 err = -EIO;
2973 goto next;
2974 }
5d4f98a2 2975 } else {
3fd0a558
YZ
2976 if (node->eb->start == bytenr)
2977 goto next;
5d4f98a2
YZ
2978 }
2979
da17066c 2980 blocksize = root->fs_info->nodesize;
5d4f98a2 2981 generation = btrfs_node_ptr_generation(upper->eb, slot);
581c1760
QW
2982 btrfs_node_key_to_cpu(upper->eb, &first_key, slot);
2983 eb = read_tree_block(fs_info, bytenr, generation,
2984 upper->level - 1, &first_key);
64c043de
LB
2985 if (IS_ERR(eb)) {
2986 err = PTR_ERR(eb);
2987 goto next;
2988 } else if (!extent_buffer_uptodate(eb)) {
416bc658 2989 free_extent_buffer(eb);
97d9a8a4
TI
2990 err = -EIO;
2991 goto next;
2992 }
5d4f98a2 2993 btrfs_tree_lock(eb);
8bead258 2994 btrfs_set_lock_blocking_write(eb);
5d4f98a2
YZ
2995
2996 if (!node->eb) {
2997 ret = btrfs_cow_block(trans, root, eb, upper->eb,
2998 slot, &eb);
3fd0a558
YZ
2999 btrfs_tree_unlock(eb);
3000 free_extent_buffer(eb);
5d4f98a2
YZ
3001 if (ret < 0) {
3002 err = ret;
3fd0a558 3003 goto next;
5d4f98a2 3004 }
3fd0a558 3005 BUG_ON(node->eb != eb);
5d4f98a2
YZ
3006 } else {
3007 btrfs_set_node_blockptr(upper->eb, slot,
3008 node->eb->start);
3009 btrfs_set_node_ptr_generation(upper->eb, slot,
3010 trans->transid);
3011 btrfs_mark_buffer_dirty(upper->eb);
3012
82fa113f
QW
3013 btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF,
3014 node->eb->start, blocksize,
3015 upper->eb->start);
3016 ref.real_root = root->root_key.objectid;
3017 btrfs_init_tree_ref(&ref, node->level,
3018 btrfs_header_owner(upper->eb));
3019 ret = btrfs_inc_extent_ref(trans, &ref);
5d4f98a2
YZ
3020 BUG_ON(ret);
3021
3022 ret = btrfs_drop_subtree(trans, root, eb, upper->eb);
3023 BUG_ON(ret);
5d4f98a2 3024 }
3fd0a558
YZ
3025next:
3026 if (!upper->pending)
3027 drop_node_buffer(upper);
3028 else
3029 unlock_node_buffer(upper);
3030 if (err)
3031 break;
5d4f98a2 3032 }
3fd0a558
YZ
3033
3034 if (!err && node->pending) {
3035 drop_node_buffer(node);
3036 list_move_tail(&node->list, &rc->backref_cache.changed);
3037 node->pending = 0;
3038 }
3039
5d4f98a2 3040 path->lowest_level = 0;
3fd0a558 3041 BUG_ON(err == -ENOSPC);
5d4f98a2
YZ
3042 return err;
3043}
3044
3045static int link_to_upper(struct btrfs_trans_handle *trans,
3fd0a558 3046 struct reloc_control *rc,
a26195a5 3047 struct btrfs_backref_node *node,
5d4f98a2
YZ
3048 struct btrfs_path *path)
3049{
3050 struct btrfs_key key;
5d4f98a2
YZ
3051
3052 btrfs_node_key_to_cpu(node->eb, &key, 0);
3fd0a558 3053 return do_relocation(trans, rc, node, &key, path, 0);
5d4f98a2
YZ
3054}
3055
3056static int finish_pending_nodes(struct btrfs_trans_handle *trans,
3fd0a558
YZ
3057 struct reloc_control *rc,
3058 struct btrfs_path *path, int err)
5d4f98a2 3059{
3fd0a558 3060 LIST_HEAD(list);
a26195a5
QW
3061 struct btrfs_backref_cache *cache = &rc->backref_cache;
3062 struct btrfs_backref_node *node;
5d4f98a2
YZ
3063 int level;
3064 int ret;
5d4f98a2
YZ
3065
3066 for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
3067 while (!list_empty(&cache->pending[level])) {
3068 node = list_entry(cache->pending[level].next,
a26195a5 3069 struct btrfs_backref_node, list);
3fd0a558
YZ
3070 list_move_tail(&node->list, &list);
3071 BUG_ON(!node->pending);
5d4f98a2 3072
3fd0a558
YZ
3073 if (!err) {
3074 ret = link_to_upper(trans, rc, node, path);
3075 if (ret < 0)
3076 err = ret;
3077 }
5d4f98a2 3078 }
3fd0a558 3079 list_splice_init(&list, &cache->pending[level]);
5d4f98a2 3080 }
5d4f98a2
YZ
3081 return err;
3082}
3083
5d4f98a2
YZ
3084/*
3085 * mark a block and all blocks directly/indirectly reference the block
3086 * as processed.
3087 */
3088static void update_processed_blocks(struct reloc_control *rc,
a26195a5 3089 struct btrfs_backref_node *node)
5d4f98a2 3090{
a26195a5
QW
3091 struct btrfs_backref_node *next = node;
3092 struct btrfs_backref_edge *edge;
3093 struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1];
5d4f98a2
YZ
3094 int index = 0;
3095
3096 while (next) {
3097 cond_resched();
3098 while (1) {
3099 if (next->processed)
3100 break;
3101
9569cc20 3102 mark_block_processed(rc, next);
5d4f98a2
YZ
3103
3104 if (list_empty(&next->upper))
3105 break;
3106
3107 edge = list_entry(next->upper.next,
a26195a5 3108 struct btrfs_backref_edge, list[LOWER]);
5d4f98a2
YZ
3109 edges[index++] = edge;
3110 next = edge->node[UPPER];
3111 }
3112 next = walk_down_backref(edges, &index);
3113 }
3114}
3115
7476dfda 3116static int tree_block_processed(u64 bytenr, struct reloc_control *rc)
3fd0a558 3117{
da17066c 3118 u32 blocksize = rc->extent_root->fs_info->nodesize;
7476dfda 3119
3fd0a558
YZ
3120 if (test_range_bit(&rc->processed_blocks, bytenr,
3121 bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL))
3122 return 1;
3123 return 0;
5d4f98a2
YZ
3124}
3125
2ff7e61e 3126static int get_tree_block_key(struct btrfs_fs_info *fs_info,
5d4f98a2
YZ
3127 struct tree_block *block)
3128{
3129 struct extent_buffer *eb;
3130
581c1760
QW
3131 eb = read_tree_block(fs_info, block->bytenr, block->key.offset,
3132 block->level, NULL);
64c043de
LB
3133 if (IS_ERR(eb)) {
3134 return PTR_ERR(eb);
3135 } else if (!extent_buffer_uptodate(eb)) {
416bc658
JB
3136 free_extent_buffer(eb);
3137 return -EIO;
3138 }
5d4f98a2
YZ
3139 if (block->level == 0)
3140 btrfs_item_key_to_cpu(eb, &block->key, 0);
3141 else
3142 btrfs_node_key_to_cpu(eb, &block->key, 0);
3143 free_extent_buffer(eb);
3144 block->key_ready = 1;
3145 return 0;
3146}
3147
5d4f98a2
YZ
3148/*
3149 * helper function to relocate a tree block
3150 */
3151static int relocate_tree_block(struct btrfs_trans_handle *trans,
3152 struct reloc_control *rc,
a26195a5 3153 struct btrfs_backref_node *node,
5d4f98a2
YZ
3154 struct btrfs_key *key,
3155 struct btrfs_path *path)
3156{
3157 struct btrfs_root *root;
3fd0a558
YZ
3158 int ret = 0;
3159
3160 if (!node)
3161 return 0;
5d4f98a2 3162
5f6b2e5c
JB
3163 /*
3164 * If we fail here we want to drop our backref_node because we are going
3165 * to start over and regenerate the tree for it.
3166 */
3167 ret = reserve_metadata_space(trans, rc, node);
3168 if (ret)
3169 goto out;
3170
3fd0a558 3171 BUG_ON(node->processed);
147d256e 3172 root = select_one_root(node);
3fd0a558 3173 if (root == ERR_PTR(-ENOENT)) {
5d4f98a2 3174 update_processed_blocks(rc, node);
3fd0a558 3175 goto out;
5d4f98a2
YZ
3176 }
3177
3fd0a558 3178 if (root) {
27cdeb70 3179 if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) {
3fd0a558
YZ
3180 BUG_ON(node->new_bytenr);
3181 BUG_ON(!list_empty(&node->list));
3182 btrfs_record_root_in_trans(trans, root);
3183 root = root->reloc_root;
3184 node->new_bytenr = root->node->start;
00246528
JB
3185 btrfs_put_root(node->root);
3186 node->root = btrfs_grab_root(root);
0b530bc5 3187 ASSERT(node->root);
3fd0a558
YZ
3188 list_add_tail(&node->list, &rc->backref_cache.changed);
3189 } else {
3190 path->lowest_level = node->level;
3191 ret = btrfs_search_slot(trans, root, key, path, 0, 1);
b3b4aa74 3192 btrfs_release_path(path);
3fd0a558
YZ
3193 if (ret > 0)
3194 ret = 0;
3195 }
3196 if (!ret)
3197 update_processed_blocks(rc, node);
3198 } else {
3199 ret = do_relocation(trans, rc, node, key, path, 1);
3200 }
5d4f98a2 3201out:
0647bf56 3202 if (ret || node->level == 0 || node->cowonly)
3fd0a558 3203 remove_backref_node(&rc->backref_cache, node);
5d4f98a2
YZ
3204 return ret;
3205}
3206
3207/*
3208 * relocate a list of blocks
3209 */
3210static noinline_for_stack
3211int relocate_tree_blocks(struct btrfs_trans_handle *trans,
3212 struct reloc_control *rc, struct rb_root *blocks)
3213{
2ff7e61e 3214 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
a26195a5 3215 struct btrfs_backref_node *node;
5d4f98a2
YZ
3216 struct btrfs_path *path;
3217 struct tree_block *block;
98ff7b94 3218 struct tree_block *next;
5d4f98a2
YZ
3219 int ret;
3220 int err = 0;
3221
3222 path = btrfs_alloc_path();
e1a12670
LB
3223 if (!path) {
3224 err = -ENOMEM;
34c2b290 3225 goto out_free_blocks;
e1a12670 3226 }
5d4f98a2 3227
98ff7b94
QW
3228 /* Kick in readahead for tree blocks with missing keys */
3229 rbtree_postorder_for_each_entry_safe(block, next, blocks, rb_node) {
5d4f98a2 3230 if (!block->key_ready)
2ff7e61e 3231 readahead_tree_block(fs_info, block->bytenr);
5d4f98a2
YZ
3232 }
3233
98ff7b94
QW
3234 /* Get first keys */
3235 rbtree_postorder_for_each_entry_safe(block, next, blocks, rb_node) {
34c2b290 3236 if (!block->key_ready) {
2ff7e61e 3237 err = get_tree_block_key(fs_info, block);
34c2b290
DS
3238 if (err)
3239 goto out_free_path;
3240 }
5d4f98a2
YZ
3241 }
3242
98ff7b94
QW
3243 /* Do tree relocation */
3244 rbtree_postorder_for_each_entry_safe(block, next, blocks, rb_node) {
3fd0a558 3245 node = build_backref_tree(rc, &block->key,
5d4f98a2
YZ
3246 block->level, block->bytenr);
3247 if (IS_ERR(node)) {
3248 err = PTR_ERR(node);
3249 goto out;
3250 }
3251
3252 ret = relocate_tree_block(trans, rc, node, &block->key,
3253 path);
3254 if (ret < 0) {
50dbbb71
JB
3255 err = ret;
3256 break;
5d4f98a2 3257 }
5d4f98a2 3258 }
5d4f98a2 3259out:
3fd0a558 3260 err = finish_pending_nodes(trans, rc, path, err);
5d4f98a2 3261
34c2b290 3262out_free_path:
5d4f98a2 3263 btrfs_free_path(path);
34c2b290 3264out_free_blocks:
e1a12670 3265 free_block_list(blocks);
5d4f98a2
YZ
3266 return err;
3267}
3268
efa56464
YZ
3269static noinline_for_stack
3270int prealloc_file_extent_cluster(struct inode *inode,
3271 struct file_extent_cluster *cluster)
3272{
3273 u64 alloc_hint = 0;
3274 u64 start;
3275 u64 end;
3276 u64 offset = BTRFS_I(inode)->index_cnt;
3277 u64 num_bytes;
3278 int nr = 0;
3279 int ret = 0;
dcb40c19
WX
3280 u64 prealloc_start = cluster->start - offset;
3281 u64 prealloc_end = cluster->end - offset;
18513091 3282 u64 cur_offset;
364ecf36 3283 struct extent_changeset *data_reserved = NULL;
efa56464
YZ
3284
3285 BUG_ON(cluster->start != cluster->boundary[0]);
5955102c 3286 inode_lock(inode);
efa56464 3287
364ecf36 3288 ret = btrfs_check_data_free_space(inode, &data_reserved, prealloc_start,
dcb40c19 3289 prealloc_end + 1 - prealloc_start);
efa56464
YZ
3290 if (ret)
3291 goto out;
3292
18513091 3293 cur_offset = prealloc_start;
efa56464
YZ
3294 while (nr < cluster->nr) {
3295 start = cluster->boundary[nr] - offset;
3296 if (nr + 1 < cluster->nr)
3297 end = cluster->boundary[nr + 1] - 1 - offset;
3298 else
3299 end = cluster->end - offset;
3300
d0082371 3301 lock_extent(&BTRFS_I(inode)->io_tree, start, end);
efa56464 3302 num_bytes = end + 1 - start;
18513091 3303 if (cur_offset < start)
bc42bda2
QW
3304 btrfs_free_reserved_data_space(inode, data_reserved,
3305 cur_offset, start - cur_offset);
efa56464
YZ
3306 ret = btrfs_prealloc_file_range(inode, 0, start,
3307 num_bytes, num_bytes,
3308 end + 1, &alloc_hint);
18513091 3309 cur_offset = end + 1;
d0082371 3310 unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
efa56464
YZ
3311 if (ret)
3312 break;
3313 nr++;
3314 }
18513091 3315 if (cur_offset < prealloc_end)
bc42bda2
QW
3316 btrfs_free_reserved_data_space(inode, data_reserved,
3317 cur_offset, prealloc_end + 1 - cur_offset);
efa56464 3318out:
5955102c 3319 inode_unlock(inode);
364ecf36 3320 extent_changeset_free(data_reserved);
efa56464
YZ
3321 return ret;
3322}
3323
5d4f98a2 3324static noinline_for_stack
0257bb82
YZ
3325int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
3326 u64 block_start)
3327{
0257bb82
YZ
3328 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
3329 struct extent_map *em;
3330 int ret = 0;
3331
172ddd60 3332 em = alloc_extent_map();
0257bb82
YZ
3333 if (!em)
3334 return -ENOMEM;
3335
3336 em->start = start;
3337 em->len = end + 1 - start;
3338 em->block_len = em->len;
3339 em->block_start = block_start;
0257bb82
YZ
3340 set_bit(EXTENT_FLAG_PINNED, &em->flags);
3341
d0082371 3342 lock_extent(&BTRFS_I(inode)->io_tree, start, end);
0257bb82
YZ
3343 while (1) {
3344 write_lock(&em_tree->lock);
09a2a8f9 3345 ret = add_extent_mapping(em_tree, em, 0);
0257bb82
YZ
3346 write_unlock(&em_tree->lock);
3347 if (ret != -EEXIST) {
3348 free_extent_map(em);
3349 break;
3350 }
dcdbc059 3351 btrfs_drop_extent_cache(BTRFS_I(inode), start, end, 0);
0257bb82 3352 }
d0082371 3353 unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
0257bb82
YZ
3354 return ret;
3355}
3356
726a3421
QW
3357/*
3358 * Allow error injection to test balance cancellation
3359 */
3360int btrfs_should_cancel_balance(struct btrfs_fs_info *fs_info)
3361{
3362 return atomic_read(&fs_info->balance_cancel_req);
3363}
3364ALLOW_ERROR_INJECTION(btrfs_should_cancel_balance, TRUE);
3365
0257bb82
YZ
3366static int relocate_file_extent_cluster(struct inode *inode,
3367 struct file_extent_cluster *cluster)
5d4f98a2 3368{
2ff7e61e 3369 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
5d4f98a2
YZ
3370 u64 page_start;
3371 u64 page_end;
0257bb82
YZ
3372 u64 offset = BTRFS_I(inode)->index_cnt;
3373 unsigned long index;
5d4f98a2 3374 unsigned long last_index;
5d4f98a2
YZ
3375 struct page *page;
3376 struct file_ra_state *ra;
3b16a4e3 3377 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
0257bb82 3378 int nr = 0;
5d4f98a2
YZ
3379 int ret = 0;
3380
0257bb82
YZ
3381 if (!cluster->nr)
3382 return 0;
3383
5d4f98a2
YZ
3384 ra = kzalloc(sizeof(*ra), GFP_NOFS);
3385 if (!ra)
3386 return -ENOMEM;
3387
efa56464
YZ
3388 ret = prealloc_file_extent_cluster(inode, cluster);
3389 if (ret)
3390 goto out;
0257bb82 3391
efa56464 3392 file_ra_state_init(ra, inode->i_mapping);
5d4f98a2 3393
0257bb82
YZ
3394 ret = setup_extent_mapping(inode, cluster->start - offset,
3395 cluster->end - offset, cluster->start);
5d4f98a2 3396 if (ret)
efa56464 3397 goto out;
5d4f98a2 3398
09cbfeaf
KS
3399 index = (cluster->start - offset) >> PAGE_SHIFT;
3400 last_index = (cluster->end - offset) >> PAGE_SHIFT;
0257bb82 3401 while (index <= last_index) {
9f3db423
NB
3402 ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
3403 PAGE_SIZE);
efa56464
YZ
3404 if (ret)
3405 goto out;
3406
0257bb82 3407 page = find_lock_page(inode->i_mapping, index);
5d4f98a2 3408 if (!page) {
0257bb82
YZ
3409 page_cache_sync_readahead(inode->i_mapping,
3410 ra, NULL, index,
3411 last_index + 1 - index);
a94733d0 3412 page = find_or_create_page(inode->i_mapping, index,
3b16a4e3 3413 mask);
0257bb82 3414 if (!page) {
691fa059 3415 btrfs_delalloc_release_metadata(BTRFS_I(inode),
43b18595 3416 PAGE_SIZE, true);
44db1216 3417 btrfs_delalloc_release_extents(BTRFS_I(inode),
8702ba93 3418 PAGE_SIZE);
0257bb82 3419 ret = -ENOMEM;
efa56464 3420 goto out;
0257bb82 3421 }
5d4f98a2 3422 }
0257bb82
YZ
3423
3424 if (PageReadahead(page)) {
3425 page_cache_async_readahead(inode->i_mapping,
3426 ra, NULL, page, index,
3427 last_index + 1 - index);
3428 }
3429
5d4f98a2
YZ
3430 if (!PageUptodate(page)) {
3431 btrfs_readpage(NULL, page);
3432 lock_page(page);
3433 if (!PageUptodate(page)) {
3434 unlock_page(page);
09cbfeaf 3435 put_page(page);
691fa059 3436 btrfs_delalloc_release_metadata(BTRFS_I(inode),
43b18595 3437 PAGE_SIZE, true);
8b62f87b 3438 btrfs_delalloc_release_extents(BTRFS_I(inode),
8702ba93 3439 PAGE_SIZE);
5d4f98a2 3440 ret = -EIO;
efa56464 3441 goto out;
5d4f98a2
YZ
3442 }
3443 }
5d4f98a2 3444
4eee4fa4 3445 page_start = page_offset(page);
09cbfeaf 3446 page_end = page_start + PAGE_SIZE - 1;
0257bb82 3447
d0082371 3448 lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end);
0257bb82 3449
5d4f98a2
YZ
3450 set_page_extent_mapped(page);
3451
0257bb82
YZ
3452 if (nr < cluster->nr &&
3453 page_start + offset == cluster->boundary[nr]) {
3454 set_extent_bits(&BTRFS_I(inode)->io_tree,
3455 page_start, page_end,
ceeb0ae7 3456 EXTENT_BOUNDARY);
0257bb82
YZ
3457 nr++;
3458 }
5d4f98a2 3459
765f3ceb 3460 ret = btrfs_set_extent_delalloc(inode, page_start, page_end, 0,
330a5827 3461 NULL);
765f3ceb
NB
3462 if (ret) {
3463 unlock_page(page);
3464 put_page(page);
3465 btrfs_delalloc_release_metadata(BTRFS_I(inode),
43b18595 3466 PAGE_SIZE, true);
765f3ceb 3467 btrfs_delalloc_release_extents(BTRFS_I(inode),
8702ba93 3468 PAGE_SIZE);
765f3ceb
NB
3469
3470 clear_extent_bits(&BTRFS_I(inode)->io_tree,
3471 page_start, page_end,
3472 EXTENT_LOCKED | EXTENT_BOUNDARY);
3473 goto out;
3474
3475 }
5d4f98a2 3476 set_page_dirty(page);
5d4f98a2 3477
0257bb82 3478 unlock_extent(&BTRFS_I(inode)->io_tree,
d0082371 3479 page_start, page_end);
5d4f98a2 3480 unlock_page(page);
09cbfeaf 3481 put_page(page);
0257bb82
YZ
3482
3483 index++;
8702ba93 3484 btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
efa56464 3485 balance_dirty_pages_ratelimited(inode->i_mapping);
2ff7e61e 3486 btrfs_throttle(fs_info);
7f913c7c
QW
3487 if (btrfs_should_cancel_balance(fs_info)) {
3488 ret = -ECANCELED;
3489 goto out;
3490 }
5d4f98a2 3491 }
0257bb82 3492 WARN_ON(nr != cluster->nr);
efa56464 3493out:
5d4f98a2 3494 kfree(ra);
5d4f98a2
YZ
3495 return ret;
3496}
3497
3498static noinline_for_stack
0257bb82
YZ
3499int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key,
3500 struct file_extent_cluster *cluster)
5d4f98a2 3501{
0257bb82 3502 int ret;
5d4f98a2 3503
0257bb82
YZ
3504 if (cluster->nr > 0 && extent_key->objectid != cluster->end + 1) {
3505 ret = relocate_file_extent_cluster(inode, cluster);
3506 if (ret)
3507 return ret;
3508 cluster->nr = 0;
5d4f98a2 3509 }
5d4f98a2 3510
0257bb82
YZ
3511 if (!cluster->nr)
3512 cluster->start = extent_key->objectid;
3513 else
3514 BUG_ON(cluster->nr >= MAX_EXTENTS);
3515 cluster->end = extent_key->objectid + extent_key->offset - 1;
3516 cluster->boundary[cluster->nr] = extent_key->objectid;
3517 cluster->nr++;
3518
3519 if (cluster->nr >= MAX_EXTENTS) {
3520 ret = relocate_file_extent_cluster(inode, cluster);
3521 if (ret)
3522 return ret;
3523 cluster->nr = 0;
3524 }
3525 return 0;
5d4f98a2
YZ
3526}
3527
5d4f98a2
YZ
3528/*
3529 * helper to add a tree block to the list.
3530 * the major work is getting the generation and level of the block
3531 */
3532static int add_tree_block(struct reloc_control *rc,
3533 struct btrfs_key *extent_key,
3534 struct btrfs_path *path,
3535 struct rb_root *blocks)
3536{
3537 struct extent_buffer *eb;
3538 struct btrfs_extent_item *ei;
3539 struct btrfs_tree_block_info *bi;
3540 struct tree_block *block;
3541 struct rb_node *rb_node;
3542 u32 item_size;
3543 int level = -1;
7fdf4b60 3544 u64 generation;
5d4f98a2
YZ
3545
3546 eb = path->nodes[0];
3547 item_size = btrfs_item_size_nr(eb, path->slots[0]);
3548
3173a18f
JB
3549 if (extent_key->type == BTRFS_METADATA_ITEM_KEY ||
3550 item_size >= sizeof(*ei) + sizeof(*bi)) {
5d4f98a2
YZ
3551 ei = btrfs_item_ptr(eb, path->slots[0],
3552 struct btrfs_extent_item);
3173a18f
JB
3553 if (extent_key->type == BTRFS_EXTENT_ITEM_KEY) {
3554 bi = (struct btrfs_tree_block_info *)(ei + 1);
3555 level = btrfs_tree_block_level(eb, bi);
3556 } else {
3557 level = (int)extent_key->offset;
3558 }
5d4f98a2 3559 generation = btrfs_extent_generation(eb, ei);
6d8ff4e4 3560 } else if (unlikely(item_size == sizeof(struct btrfs_extent_item_v0))) {
ba3c2b19
NB
3561 btrfs_print_v0_err(eb->fs_info);
3562 btrfs_handle_fs_error(eb->fs_info, -EINVAL, NULL);
3563 return -EINVAL;
5d4f98a2 3564 } else {
5d4f98a2 3565 BUG();
5d4f98a2
YZ
3566 }
3567
b3b4aa74 3568 btrfs_release_path(path);
5d4f98a2
YZ
3569
3570 BUG_ON(level == -1);
3571
3572 block = kmalloc(sizeof(*block), GFP_NOFS);
3573 if (!block)
3574 return -ENOMEM;
3575
3576 block->bytenr = extent_key->objectid;
da17066c 3577 block->key.objectid = rc->extent_root->fs_info->nodesize;
5d4f98a2
YZ
3578 block->key.offset = generation;
3579 block->level = level;
3580 block->key_ready = 0;
3581
e9a28dc5 3582 rb_node = rb_simple_insert(blocks, block->bytenr, &block->rb_node);
43c04fb1
JM
3583 if (rb_node)
3584 backref_tree_panic(rb_node, -EEXIST, block->bytenr);
5d4f98a2
YZ
3585
3586 return 0;
3587}
3588
3589/*
3590 * helper to add tree blocks for backref of type BTRFS_SHARED_DATA_REF_KEY
3591 */
3592static int __add_tree_block(struct reloc_control *rc,
3593 u64 bytenr, u32 blocksize,
3594 struct rb_root *blocks)
3595{
0b246afa 3596 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
5d4f98a2
YZ
3597 struct btrfs_path *path;
3598 struct btrfs_key key;
3599 int ret;
0b246afa 3600 bool skinny = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
5d4f98a2 3601
7476dfda 3602 if (tree_block_processed(bytenr, rc))
5d4f98a2
YZ
3603 return 0;
3604
e9a28dc5 3605 if (rb_simple_search(blocks, bytenr))
5d4f98a2
YZ
3606 return 0;
3607
3608 path = btrfs_alloc_path();
3609 if (!path)
3610 return -ENOMEM;
aee68ee5 3611again:
5d4f98a2 3612 key.objectid = bytenr;
aee68ee5
JB
3613 if (skinny) {
3614 key.type = BTRFS_METADATA_ITEM_KEY;
3615 key.offset = (u64)-1;
3616 } else {
3617 key.type = BTRFS_EXTENT_ITEM_KEY;
3618 key.offset = blocksize;
3619 }
5d4f98a2
YZ
3620
3621 path->search_commit_root = 1;
3622 path->skip_locking = 1;
3623 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0);
3624 if (ret < 0)
3625 goto out;
5d4f98a2 3626
aee68ee5
JB
3627 if (ret > 0 && skinny) {
3628 if (path->slots[0]) {
3629 path->slots[0]--;
3630 btrfs_item_key_to_cpu(path->nodes[0], &key,
3631 path->slots[0]);
3632 if (key.objectid == bytenr &&
3633 (key.type == BTRFS_METADATA_ITEM_KEY ||
3634 (key.type == BTRFS_EXTENT_ITEM_KEY &&
3635 key.offset == blocksize)))
3636 ret = 0;
3637 }
3638
3639 if (ret) {
3640 skinny = false;
3641 btrfs_release_path(path);
3642 goto again;
3643 }
3173a18f 3644 }
cdccee99
LB
3645 if (ret) {
3646 ASSERT(ret == 1);
3647 btrfs_print_leaf(path->nodes[0]);
3648 btrfs_err(fs_info,
3649 "tree block extent item (%llu) is not found in extent tree",
3650 bytenr);
3651 WARN_ON(1);
3652 ret = -EINVAL;
3653 goto out;
3654 }
3173a18f 3655
5d4f98a2
YZ
3656 ret = add_tree_block(rc, &key, path, blocks);
3657out:
3658 btrfs_free_path(path);
3659 return ret;
3660}
3661
0af3d00b 3662static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
32da5386 3663 struct btrfs_block_group *block_group,
1bbc621e
CM
3664 struct inode *inode,
3665 u64 ino)
0af3d00b
JB
3666{
3667 struct btrfs_key key;
0af3d00b
JB
3668 struct btrfs_root *root = fs_info->tree_root;
3669 struct btrfs_trans_handle *trans;
0af3d00b
JB
3670 int ret = 0;
3671
3672 if (inode)
3673 goto truncate;
3674
3675 key.objectid = ino;
3676 key.type = BTRFS_INODE_ITEM_KEY;
3677 key.offset = 0;
3678
4c66e0d4 3679 inode = btrfs_iget(fs_info->sb, &key, root);
2e19f1f9 3680 if (IS_ERR(inode))
0af3d00b 3681 return -ENOENT;
0af3d00b
JB
3682
3683truncate:
2ff7e61e 3684 ret = btrfs_check_trunc_cache_free_space(fs_info,
7b61cd92
MX
3685 &fs_info->global_block_rsv);
3686 if (ret)
3687 goto out;
3688
7a7eaa40 3689 trans = btrfs_join_transaction(root);
0af3d00b 3690 if (IS_ERR(trans)) {
3612b495 3691 ret = PTR_ERR(trans);
0af3d00b
JB
3692 goto out;
3693 }
3694
77ab86bf 3695 ret = btrfs_truncate_free_space_cache(trans, block_group, inode);
0af3d00b 3696
3a45bb20 3697 btrfs_end_transaction(trans);
2ff7e61e 3698 btrfs_btree_balance_dirty(fs_info);
0af3d00b
JB
3699out:
3700 iput(inode);
3701 return ret;
3702}
3703
5d4f98a2 3704/*
19b546d7
QW
3705 * Locate the free space cache EXTENT_DATA in root tree leaf and delete the
3706 * cache inode, to avoid free space cache data extent blocking data relocation.
5d4f98a2 3707 */
19b546d7
QW
3708static int delete_v1_space_cache(struct extent_buffer *leaf,
3709 struct btrfs_block_group *block_group,
3710 u64 data_bytenr)
5d4f98a2 3711{
19b546d7
QW
3712 u64 space_cache_ino;
3713 struct btrfs_file_extent_item *ei;
5d4f98a2 3714 struct btrfs_key key;
19b546d7
QW
3715 bool found = false;
3716 int i;
5d4f98a2
YZ
3717 int ret;
3718
19b546d7
QW
3719 if (btrfs_header_owner(leaf) != BTRFS_ROOT_TREE_OBJECTID)
3720 return 0;
5d4f98a2 3721
19b546d7
QW
3722 for (i = 0; i < btrfs_header_nritems(leaf); i++) {
3723 btrfs_item_key_to_cpu(leaf, &key, i);
3724 if (key.type != BTRFS_EXTENT_DATA_KEY)
3725 continue;
3726 ei = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
3727 if (btrfs_file_extent_type(leaf, ei) == BTRFS_FILE_EXTENT_REG &&
3728 btrfs_file_extent_disk_bytenr(leaf, ei) == data_bytenr) {
3729 found = true;
3730 space_cache_ino = key.objectid;
5d4f98a2 3731 break;
5d4f98a2 3732 }
5d4f98a2 3733 }
19b546d7
QW
3734 if (!found)
3735 return -ENOENT;
3736 ret = delete_block_group_cache(leaf->fs_info, block_group, NULL,
3737 space_cache_ino);
3738 return ret;
5d4f98a2
YZ
3739}
3740
3741/*
2c016dc2 3742 * helper to find all tree blocks that reference a given data extent
5d4f98a2
YZ
3743 */
3744static noinline_for_stack
3745int add_data_references(struct reloc_control *rc,
3746 struct btrfs_key *extent_key,
3747 struct btrfs_path *path,
3748 struct rb_root *blocks)
3749{
19b546d7
QW
3750 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3751 struct ulist *leaves = NULL;
3752 struct ulist_iterator leaf_uiter;
3753 struct ulist_node *ref_node = NULL;
3754 const u32 blocksize = fs_info->nodesize;
647f63bd 3755 int ret = 0;
5d4f98a2 3756
19b546d7
QW
3757 btrfs_release_path(path);
3758 ret = btrfs_find_all_leafs(NULL, fs_info, extent_key->objectid,
3759 0, &leaves, NULL, true);
3760 if (ret < 0)
3761 return ret;
5d4f98a2 3762
19b546d7
QW
3763 ULIST_ITER_INIT(&leaf_uiter);
3764 while ((ref_node = ulist_next(leaves, &leaf_uiter))) {
3765 struct extent_buffer *eb;
5d4f98a2 3766
19b546d7
QW
3767 eb = read_tree_block(fs_info, ref_node->val, 0, 0, NULL);
3768 if (IS_ERR(eb)) {
3769 ret = PTR_ERR(eb);
5d4f98a2 3770 break;
5d4f98a2 3771 }
19b546d7
QW
3772 ret = delete_v1_space_cache(eb, rc->block_group,
3773 extent_key->objectid);
3774 free_extent_buffer(eb);
3775 if (ret < 0)
3776 break;
3777 ret = __add_tree_block(rc, ref_node->val, blocksize, blocks);
3778 if (ret < 0)
5d4f98a2 3779 break;
5d4f98a2 3780 }
19b546d7 3781 if (ret < 0)
5d4f98a2 3782 free_block_list(blocks);
19b546d7
QW
3783 ulist_free(leaves);
3784 return ret;
5d4f98a2
YZ
3785}
3786
3787/*
2c016dc2 3788 * helper to find next unprocessed extent
5d4f98a2
YZ
3789 */
3790static noinline_for_stack
147d256e 3791int find_next_extent(struct reloc_control *rc, struct btrfs_path *path,
3fd0a558 3792 struct btrfs_key *extent_key)
5d4f98a2 3793{
0b246afa 3794 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
5d4f98a2
YZ
3795 struct btrfs_key key;
3796 struct extent_buffer *leaf;
3797 u64 start, end, last;
3798 int ret;
3799
b3470b5d 3800 last = rc->block_group->start + rc->block_group->length;
5d4f98a2
YZ
3801 while (1) {
3802 cond_resched();
3803 if (rc->search_start >= last) {
3804 ret = 1;
3805 break;
3806 }
3807
3808 key.objectid = rc->search_start;
3809 key.type = BTRFS_EXTENT_ITEM_KEY;
3810 key.offset = 0;
3811
3812 path->search_commit_root = 1;
3813 path->skip_locking = 1;
3814 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path,
3815 0, 0);
3816 if (ret < 0)
3817 break;
3818next:
3819 leaf = path->nodes[0];
3820 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
3821 ret = btrfs_next_leaf(rc->extent_root, path);
3822 if (ret != 0)
3823 break;
3824 leaf = path->nodes[0];
3825 }
3826
3827 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3828 if (key.objectid >= last) {
3829 ret = 1;
3830 break;
3831 }
3832
3173a18f
JB
3833 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
3834 key.type != BTRFS_METADATA_ITEM_KEY) {
3835 path->slots[0]++;
3836 goto next;
3837 }
3838
3839 if (key.type == BTRFS_EXTENT_ITEM_KEY &&
5d4f98a2
YZ
3840 key.objectid + key.offset <= rc->search_start) {
3841 path->slots[0]++;
3842 goto next;
3843 }
3844
3173a18f 3845 if (key.type == BTRFS_METADATA_ITEM_KEY &&
0b246afa 3846 key.objectid + fs_info->nodesize <=
3173a18f
JB
3847 rc->search_start) {
3848 path->slots[0]++;
3849 goto next;
3850 }
3851
5d4f98a2
YZ
3852 ret = find_first_extent_bit(&rc->processed_blocks,
3853 key.objectid, &start, &end,
e6138876 3854 EXTENT_DIRTY, NULL);
5d4f98a2
YZ
3855
3856 if (ret == 0 && start <= key.objectid) {
b3b4aa74 3857 btrfs_release_path(path);
5d4f98a2
YZ
3858 rc->search_start = end + 1;
3859 } else {
3173a18f
JB
3860 if (key.type == BTRFS_EXTENT_ITEM_KEY)
3861 rc->search_start = key.objectid + key.offset;
3862 else
3863 rc->search_start = key.objectid +
0b246afa 3864 fs_info->nodesize;
3fd0a558 3865 memcpy(extent_key, &key, sizeof(key));
5d4f98a2
YZ
3866 return 0;
3867 }
3868 }
b3b4aa74 3869 btrfs_release_path(path);
5d4f98a2
YZ
3870 return ret;
3871}
3872
3873static void set_reloc_control(struct reloc_control *rc)
3874{
3875 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
7585717f
CM
3876
3877 mutex_lock(&fs_info->reloc_mutex);
5d4f98a2 3878 fs_info->reloc_ctl = rc;
7585717f 3879 mutex_unlock(&fs_info->reloc_mutex);
5d4f98a2
YZ
3880}
3881
3882static void unset_reloc_control(struct reloc_control *rc)
3883{
3884 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
7585717f
CM
3885
3886 mutex_lock(&fs_info->reloc_mutex);
5d4f98a2 3887 fs_info->reloc_ctl = NULL;
7585717f 3888 mutex_unlock(&fs_info->reloc_mutex);
5d4f98a2
YZ
3889}
3890
3891static int check_extent_flags(u64 flags)
3892{
3893 if ((flags & BTRFS_EXTENT_FLAG_DATA) &&
3894 (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
3895 return 1;
3896 if (!(flags & BTRFS_EXTENT_FLAG_DATA) &&
3897 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
3898 return 1;
3899 if ((flags & BTRFS_EXTENT_FLAG_DATA) &&
3900 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
3901 return 1;
3902 return 0;
3903}
3904
3fd0a558
YZ
3905static noinline_for_stack
3906int prepare_to_relocate(struct reloc_control *rc)
3907{
3908 struct btrfs_trans_handle *trans;
ac2fabac 3909 int ret;
3fd0a558 3910
2ff7e61e 3911 rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root->fs_info,
66d8f3dd 3912 BTRFS_BLOCK_RSV_TEMP);
3fd0a558
YZ
3913 if (!rc->block_rsv)
3914 return -ENOMEM;
3915
3fd0a558 3916 memset(&rc->cluster, 0, sizeof(rc->cluster));
b3470b5d 3917 rc->search_start = rc->block_group->start;
3fd0a558
YZ
3918 rc->extents_found = 0;
3919 rc->nodes_relocated = 0;
3920 rc->merging_rsv_size = 0;
0647bf56 3921 rc->reserved_bytes = 0;
da17066c 3922 rc->block_rsv->size = rc->extent_root->fs_info->nodesize *
0647bf56 3923 RELOCATION_RESERVED_NODES;
ac2fabac
JB
3924 ret = btrfs_block_rsv_refill(rc->extent_root,
3925 rc->block_rsv, rc->block_rsv->size,
3926 BTRFS_RESERVE_FLUSH_ALL);
3927 if (ret)
3928 return ret;
3fd0a558
YZ
3929
3930 rc->create_reloc_tree = 1;
3931 set_reloc_control(rc);
3932
7a7eaa40 3933 trans = btrfs_join_transaction(rc->extent_root);
28818947
LB
3934 if (IS_ERR(trans)) {
3935 unset_reloc_control(rc);
3936 /*
3937 * extent tree is not a ref_cow tree and has no reloc_root to
3938 * cleanup. And callers are responsible to free the above
3939 * block rsv.
3940 */
3941 return PTR_ERR(trans);
3942 }
3a45bb20 3943 btrfs_commit_transaction(trans);
3fd0a558
YZ
3944 return 0;
3945}
76dda93c 3946
5d4f98a2
YZ
3947static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3948{
2ff7e61e 3949 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
5d4f98a2
YZ
3950 struct rb_root blocks = RB_ROOT;
3951 struct btrfs_key key;
3952 struct btrfs_trans_handle *trans = NULL;
3953 struct btrfs_path *path;
3954 struct btrfs_extent_item *ei;
5d4f98a2
YZ
3955 u64 flags;
3956 u32 item_size;
3957 int ret;
3958 int err = 0;
c87f08ca 3959 int progress = 0;
5d4f98a2
YZ
3960
3961 path = btrfs_alloc_path();
3fd0a558 3962 if (!path)
5d4f98a2 3963 return -ENOMEM;
e4058b54 3964 path->reada = READA_FORWARD;
5d4f98a2 3965
3fd0a558
YZ
3966 ret = prepare_to_relocate(rc);
3967 if (ret) {
3968 err = ret;
3969 goto out_free;
3970 }
5d4f98a2
YZ
3971
3972 while (1) {
0647bf56
WS
3973 rc->reserved_bytes = 0;
3974 ret = btrfs_block_rsv_refill(rc->extent_root,
3975 rc->block_rsv, rc->block_rsv->size,
3976 BTRFS_RESERVE_FLUSH_ALL);
3977 if (ret) {
3978 err = ret;
3979 break;
3980 }
c87f08ca 3981 progress++;
a22285a6 3982 trans = btrfs_start_transaction(rc->extent_root, 0);
0f788c58
LB
3983 if (IS_ERR(trans)) {
3984 err = PTR_ERR(trans);
3985 trans = NULL;
3986 break;
3987 }
c87f08ca 3988restart:
3fd0a558 3989 if (update_backref_cache(trans, &rc->backref_cache)) {
3a45bb20 3990 btrfs_end_transaction(trans);
42a657f5 3991 trans = NULL;
3fd0a558
YZ
3992 continue;
3993 }
3994
147d256e 3995 ret = find_next_extent(rc, path, &key);
5d4f98a2
YZ
3996 if (ret < 0)
3997 err = ret;
3998 if (ret != 0)
3999 break;
4000
4001 rc->extents_found++;
4002
4003 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
4004 struct btrfs_extent_item);
3fd0a558 4005 item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
5d4f98a2
YZ
4006 if (item_size >= sizeof(*ei)) {
4007 flags = btrfs_extent_flags(path->nodes[0], ei);
4008 ret = check_extent_flags(flags);
4009 BUG_ON(ret);
6d8ff4e4 4010 } else if (unlikely(item_size == sizeof(struct btrfs_extent_item_v0))) {
ba3c2b19
NB
4011 err = -EINVAL;
4012 btrfs_print_v0_err(trans->fs_info);
4013 btrfs_abort_transaction(trans, err);
4014 break;
5d4f98a2 4015 } else {
5d4f98a2 4016 BUG();
5d4f98a2
YZ
4017 }
4018
4019 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
4020 ret = add_tree_block(rc, &key, path, &blocks);
4021 } else if (rc->stage == UPDATE_DATA_PTRS &&
3fd0a558 4022 (flags & BTRFS_EXTENT_FLAG_DATA)) {
5d4f98a2
YZ
4023 ret = add_data_references(rc, &key, path, &blocks);
4024 } else {
b3b4aa74 4025 btrfs_release_path(path);
5d4f98a2
YZ
4026 ret = 0;
4027 }
4028 if (ret < 0) {
3fd0a558 4029 err = ret;
5d4f98a2
YZ
4030 break;
4031 }
4032
4033 if (!RB_EMPTY_ROOT(&blocks)) {
4034 ret = relocate_tree_blocks(trans, rc, &blocks);
4035 if (ret < 0) {
3fd0a558
YZ
4036 if (ret != -EAGAIN) {
4037 err = ret;
4038 break;
4039 }
4040 rc->extents_found--;
4041 rc->search_start = key.objectid;
4042 }
4043 }
4044
3a45bb20 4045 btrfs_end_transaction_throttle(trans);
2ff7e61e 4046 btrfs_btree_balance_dirty(fs_info);
5d4f98a2 4047 trans = NULL;
5d4f98a2
YZ
4048
4049 if (rc->stage == MOVE_DATA_EXTENTS &&
4050 (flags & BTRFS_EXTENT_FLAG_DATA)) {
4051 rc->found_file_extent = 1;
0257bb82 4052 ret = relocate_data_extent(rc->data_inode,
3fd0a558 4053 &key, &rc->cluster);
5d4f98a2
YZ
4054 if (ret < 0) {
4055 err = ret;
4056 break;
4057 }
4058 }
f31ea088
QW
4059 if (btrfs_should_cancel_balance(fs_info)) {
4060 err = -ECANCELED;
4061 break;
4062 }
5d4f98a2 4063 }
c87f08ca 4064 if (trans && progress && err == -ENOSPC) {
43a7e99d 4065 ret = btrfs_force_chunk_alloc(trans, rc->block_group->flags);
9689457b 4066 if (ret == 1) {
c87f08ca
CM
4067 err = 0;
4068 progress = 0;
4069 goto restart;
4070 }
4071 }
3fd0a558 4072
b3b4aa74 4073 btrfs_release_path(path);
91166212 4074 clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY);
5d4f98a2
YZ
4075
4076 if (trans) {
3a45bb20 4077 btrfs_end_transaction_throttle(trans);
2ff7e61e 4078 btrfs_btree_balance_dirty(fs_info);
5d4f98a2
YZ
4079 }
4080
0257bb82 4081 if (!err) {
3fd0a558
YZ
4082 ret = relocate_file_extent_cluster(rc->data_inode,
4083 &rc->cluster);
0257bb82
YZ
4084 if (ret < 0)
4085 err = ret;
4086 }
4087
3fd0a558
YZ
4088 rc->create_reloc_tree = 0;
4089 set_reloc_control(rc);
0257bb82 4090
3fd0a558 4091 backref_cache_cleanup(&rc->backref_cache);
63f018be 4092 btrfs_block_rsv_release(fs_info, rc->block_rsv, (u64)-1, NULL);
5d4f98a2 4093
7f913c7c
QW
4094 /*
4095 * Even in the case when the relocation is cancelled, we should all go
4096 * through prepare_to_merge() and merge_reloc_roots().
4097 *
4098 * For error (including cancelled balance), prepare_to_merge() will
4099 * mark all reloc trees orphan, then queue them for cleanup in
4100 * merge_reloc_roots()
4101 */
3fd0a558 4102 err = prepare_to_merge(rc, err);
5d4f98a2
YZ
4103
4104 merge_reloc_roots(rc);
4105
3fd0a558 4106 rc->merge_reloc_tree = 0;
5d4f98a2 4107 unset_reloc_control(rc);
63f018be 4108 btrfs_block_rsv_release(fs_info, rc->block_rsv, (u64)-1, NULL);
5d4f98a2
YZ
4109
4110 /* get rid of pinned extents */
7a7eaa40 4111 trans = btrfs_join_transaction(rc->extent_root);
62b99540 4112 if (IS_ERR(trans)) {
3612b495 4113 err = PTR_ERR(trans);
62b99540
QW
4114 goto out_free;
4115 }
3a45bb20 4116 btrfs_commit_transaction(trans);
6217b0fa 4117out_free:
d2311e69
QW
4118 ret = clean_dirty_subvols(rc);
4119 if (ret < 0 && !err)
4120 err = ret;
2ff7e61e 4121 btrfs_free_block_rsv(fs_info, rc->block_rsv);
3fd0a558 4122 btrfs_free_path(path);
5d4f98a2
YZ
4123 return err;
4124}
4125
4126static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
0257bb82 4127 struct btrfs_root *root, u64 objectid)
5d4f98a2
YZ
4128{
4129 struct btrfs_path *path;
4130 struct btrfs_inode_item *item;
4131 struct extent_buffer *leaf;
4132 int ret;
4133
4134 path = btrfs_alloc_path();
4135 if (!path)
4136 return -ENOMEM;
4137
4138 ret = btrfs_insert_empty_inode(trans, root, path, objectid);
4139 if (ret)
4140 goto out;
4141
4142 leaf = path->nodes[0];
4143 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
b159fa28 4144 memzero_extent_buffer(leaf, (unsigned long)item, sizeof(*item));
5d4f98a2 4145 btrfs_set_inode_generation(leaf, item, 1);
0257bb82 4146 btrfs_set_inode_size(leaf, item, 0);
5d4f98a2 4147 btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
3fd0a558
YZ
4148 btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS |
4149 BTRFS_INODE_PREALLOC);
5d4f98a2 4150 btrfs_mark_buffer_dirty(leaf);
5d4f98a2
YZ
4151out:
4152 btrfs_free_path(path);
4153 return ret;
4154}
4155
4156/*
4157 * helper to create inode for data relocation.
4158 * the inode is in data relocation tree and its link count is 0
4159 */
3fd0a558
YZ
4160static noinline_for_stack
4161struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
32da5386 4162 struct btrfs_block_group *group)
5d4f98a2
YZ
4163{
4164 struct inode *inode = NULL;
4165 struct btrfs_trans_handle *trans;
4166 struct btrfs_root *root;
4167 struct btrfs_key key;
4624900d 4168 u64 objectid;
5d4f98a2
YZ
4169 int err = 0;
4170
4171 root = read_fs_root(fs_info, BTRFS_DATA_RELOC_TREE_OBJECTID);
4172 if (IS_ERR(root))
4173 return ERR_CAST(root);
4174
a22285a6 4175 trans = btrfs_start_transaction(root, 6);
76deacf0 4176 if (IS_ERR(trans)) {
00246528 4177 btrfs_put_root(root);
3fd0a558 4178 return ERR_CAST(trans);
76deacf0 4179 }
5d4f98a2 4180
581bb050 4181 err = btrfs_find_free_objectid(root, &objectid);
5d4f98a2
YZ
4182 if (err)
4183 goto out;
4184
0257bb82 4185 err = __insert_orphan_inode(trans, root, objectid);
5d4f98a2
YZ
4186 BUG_ON(err);
4187
4188 key.objectid = objectid;
4189 key.type = BTRFS_INODE_ITEM_KEY;
4190 key.offset = 0;
4c66e0d4 4191 inode = btrfs_iget(fs_info->sb, &key, root);
2e19f1f9 4192 BUG_ON(IS_ERR(inode));
b3470b5d 4193 BTRFS_I(inode)->index_cnt = group->start;
5d4f98a2 4194
73f2e545 4195 err = btrfs_orphan_add(trans, BTRFS_I(inode));
5d4f98a2 4196out:
00246528 4197 btrfs_put_root(root);
3a45bb20 4198 btrfs_end_transaction(trans);
2ff7e61e 4199 btrfs_btree_balance_dirty(fs_info);
5d4f98a2
YZ
4200 if (err) {
4201 if (inode)
4202 iput(inode);
4203 inode = ERR_PTR(err);
4204 }
4205 return inode;
4206}
4207
c258d6e3 4208static struct reloc_control *alloc_reloc_control(struct btrfs_fs_info *fs_info)
3fd0a558
YZ
4209{
4210 struct reloc_control *rc;
4211
4212 rc = kzalloc(sizeof(*rc), GFP_NOFS);
4213 if (!rc)
4214 return NULL;
4215
4216 INIT_LIST_HEAD(&rc->reloc_roots);
d2311e69 4217 INIT_LIST_HEAD(&rc->dirty_subvol_roots);
584fb121 4218 btrfs_backref_init_cache(fs_info, &rc->backref_cache, 1);
3fd0a558 4219 mapping_tree_init(&rc->reloc_root_tree);
43eb5f29
QW
4220 extent_io_tree_init(fs_info, &rc->processed_blocks,
4221 IO_TREE_RELOC_BLOCKS, NULL);
3fd0a558
YZ
4222 return rc;
4223}
4224
1a0afa0e
JB
4225static void free_reloc_control(struct reloc_control *rc)
4226{
4227 struct mapping_node *node, *tmp;
4228
4229 free_reloc_roots(&rc->reloc_roots);
4230 rbtree_postorder_for_each_entry_safe(node, tmp,
4231 &rc->reloc_root_tree.rb_root, rb_node)
4232 kfree(node);
4233
4234 kfree(rc);
4235}
4236
ebce0e01
AB
4237/*
4238 * Print the block group being relocated
4239 */
4240static void describe_relocation(struct btrfs_fs_info *fs_info,
32da5386 4241 struct btrfs_block_group *block_group)
ebce0e01 4242{
f89e09cf 4243 char buf[128] = {'\0'};
ebce0e01 4244
f89e09cf 4245 btrfs_describe_block_groups(block_group->flags, buf, sizeof(buf));
ebce0e01
AB
4246
4247 btrfs_info(fs_info,
4248 "relocating block group %llu flags %s",
b3470b5d 4249 block_group->start, buf);
ebce0e01
AB
4250}
4251
430640e3
QW
4252static const char *stage_to_string(int stage)
4253{
4254 if (stage == MOVE_DATA_EXTENTS)
4255 return "move data extents";
4256 if (stage == UPDATE_DATA_PTRS)
4257 return "update data pointers";
4258 return "unknown";
4259}
4260
5d4f98a2
YZ
4261/*
4262 * function to relocate all extents in a block group.
4263 */
6bccf3ab 4264int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
5d4f98a2 4265{
32da5386 4266 struct btrfs_block_group *bg;
6bccf3ab 4267 struct btrfs_root *extent_root = fs_info->extent_root;
5d4f98a2 4268 struct reloc_control *rc;
0af3d00b
JB
4269 struct inode *inode;
4270 struct btrfs_path *path;
5d4f98a2 4271 int ret;
f0486c68 4272 int rw = 0;
5d4f98a2
YZ
4273 int err = 0;
4274
eede2bf3
OS
4275 bg = btrfs_lookup_block_group(fs_info, group_start);
4276 if (!bg)
4277 return -ENOENT;
4278
4279 if (btrfs_pinned_by_swapfile(fs_info, bg)) {
4280 btrfs_put_block_group(bg);
4281 return -ETXTBSY;
4282 }
4283
c258d6e3 4284 rc = alloc_reloc_control(fs_info);
eede2bf3
OS
4285 if (!rc) {
4286 btrfs_put_block_group(bg);
5d4f98a2 4287 return -ENOMEM;
eede2bf3 4288 }
5d4f98a2 4289
f0486c68 4290 rc->extent_root = extent_root;
eede2bf3 4291 rc->block_group = bg;
5d4f98a2 4292
b12de528 4293 ret = btrfs_inc_block_group_ro(rc->block_group, true);
868f401a
Z
4294 if (ret) {
4295 err = ret;
4296 goto out;
f0486c68 4297 }
868f401a 4298 rw = 1;
f0486c68 4299
0af3d00b
JB
4300 path = btrfs_alloc_path();
4301 if (!path) {
4302 err = -ENOMEM;
4303 goto out;
4304 }
4305
7949f339 4306 inode = lookup_free_space_inode(rc->block_group, path);
0af3d00b
JB
4307 btrfs_free_path(path);
4308
4309 if (!IS_ERR(inode))
1bbc621e 4310 ret = delete_block_group_cache(fs_info, rc->block_group, inode, 0);
0af3d00b
JB
4311 else
4312 ret = PTR_ERR(inode);
4313
4314 if (ret && ret != -ENOENT) {
4315 err = ret;
4316 goto out;
4317 }
4318
5d4f98a2
YZ
4319 rc->data_inode = create_reloc_inode(fs_info, rc->block_group);
4320 if (IS_ERR(rc->data_inode)) {
4321 err = PTR_ERR(rc->data_inode);
4322 rc->data_inode = NULL;
4323 goto out;
4324 }
4325
0b246afa 4326 describe_relocation(fs_info, rc->block_group);
5d4f98a2 4327
9cfa3e34 4328 btrfs_wait_block_group_reservations(rc->block_group);
f78c436c 4329 btrfs_wait_nocow_writers(rc->block_group);
6374e57a 4330 btrfs_wait_ordered_roots(fs_info, U64_MAX,
b3470b5d
DS
4331 rc->block_group->start,
4332 rc->block_group->length);
5d4f98a2
YZ
4333
4334 while (1) {
430640e3
QW
4335 int finishes_stage;
4336
76dda93c 4337 mutex_lock(&fs_info->cleaner_mutex);
5d4f98a2 4338 ret = relocate_block_group(rc);
76dda93c 4339 mutex_unlock(&fs_info->cleaner_mutex);
ff612ba7 4340 if (ret < 0)
5d4f98a2 4341 err = ret;
5d4f98a2 4342
430640e3 4343 finishes_stage = rc->stage;
ff612ba7
JB
4344 /*
4345 * We may have gotten ENOSPC after we already dirtied some
4346 * extents. If writeout happens while we're relocating a
4347 * different block group we could end up hitting the
4348 * BUG_ON(rc->stage == UPDATE_DATA_PTRS) in
4349 * btrfs_reloc_cow_block. Make sure we write everything out
4350 * properly so we don't trip over this problem, and then break
4351 * out of the loop if we hit an error.
4352 */
5d4f98a2 4353 if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) {
0ef8b726
JB
4354 ret = btrfs_wait_ordered_range(rc->data_inode, 0,
4355 (u64)-1);
ff612ba7 4356 if (ret)
0ef8b726 4357 err = ret;
5d4f98a2
YZ
4358 invalidate_mapping_pages(rc->data_inode->i_mapping,
4359 0, -1);
4360 rc->stage = UPDATE_DATA_PTRS;
5d4f98a2 4361 }
ff612ba7
JB
4362
4363 if (err < 0)
4364 goto out;
4365
4366 if (rc->extents_found == 0)
4367 break;
4368
430640e3
QW
4369 btrfs_info(fs_info, "found %llu extents, stage: %s",
4370 rc->extents_found, stage_to_string(finishes_stage));
5d4f98a2
YZ
4371 }
4372
5d4f98a2
YZ
4373 WARN_ON(rc->block_group->pinned > 0);
4374 WARN_ON(rc->block_group->reserved > 0);
bf38be65 4375 WARN_ON(rc->block_group->used > 0);
5d4f98a2 4376out:
f0486c68 4377 if (err && rw)
2ff7e61e 4378 btrfs_dec_block_group_ro(rc->block_group);
5d4f98a2 4379 iput(rc->data_inode);
5d4f98a2 4380 btrfs_put_block_group(rc->block_group);
1a0afa0e 4381 free_reloc_control(rc);
5d4f98a2
YZ
4382 return err;
4383}
4384
76dda93c
YZ
4385static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)
4386{
0b246afa 4387 struct btrfs_fs_info *fs_info = root->fs_info;
76dda93c 4388 struct btrfs_trans_handle *trans;
79787eaa 4389 int ret, err;
76dda93c 4390
0b246afa 4391 trans = btrfs_start_transaction(fs_info->tree_root, 0);
79787eaa
JM
4392 if (IS_ERR(trans))
4393 return PTR_ERR(trans);
76dda93c
YZ
4394
4395 memset(&root->root_item.drop_progress, 0,
4396 sizeof(root->root_item.drop_progress));
4397 root->root_item.drop_level = 0;
4398 btrfs_set_root_refs(&root->root_item, 0);
0b246afa 4399 ret = btrfs_update_root(trans, fs_info->tree_root,
76dda93c 4400 &root->root_key, &root->root_item);
76dda93c 4401
3a45bb20 4402 err = btrfs_end_transaction(trans);
79787eaa
JM
4403 if (err)
4404 return err;
4405 return ret;
76dda93c
YZ
4406}
4407
5d4f98a2
YZ
4408/*
4409 * recover relocation interrupted by system crash.
4410 *
4411 * this function resumes merging reloc trees with corresponding fs trees.
4412 * this is important for keeping the sharing of tree blocks
4413 */
4414int btrfs_recover_relocation(struct btrfs_root *root)
4415{
0b246afa 4416 struct btrfs_fs_info *fs_info = root->fs_info;
5d4f98a2
YZ
4417 LIST_HEAD(reloc_roots);
4418 struct btrfs_key key;
4419 struct btrfs_root *fs_root;
4420 struct btrfs_root *reloc_root;
4421 struct btrfs_path *path;
4422 struct extent_buffer *leaf;
4423 struct reloc_control *rc = NULL;
4424 struct btrfs_trans_handle *trans;
4425 int ret;
4426 int err = 0;
4427
4428 path = btrfs_alloc_path();
4429 if (!path)
4430 return -ENOMEM;
e4058b54 4431 path->reada = READA_BACK;
5d4f98a2
YZ
4432
4433 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
4434 key.type = BTRFS_ROOT_ITEM_KEY;
4435 key.offset = (u64)-1;
4436
4437 while (1) {
0b246afa 4438 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key,
5d4f98a2
YZ
4439 path, 0, 0);
4440 if (ret < 0) {
4441 err = ret;
4442 goto out;
4443 }
4444 if (ret > 0) {
4445 if (path->slots[0] == 0)
4446 break;
4447 path->slots[0]--;
4448 }
4449 leaf = path->nodes[0];
4450 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
b3b4aa74 4451 btrfs_release_path(path);
5d4f98a2
YZ
4452
4453 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID ||
4454 key.type != BTRFS_ROOT_ITEM_KEY)
4455 break;
4456
3dbf1738 4457 reloc_root = btrfs_read_tree_root(root, &key);
5d4f98a2
YZ
4458 if (IS_ERR(reloc_root)) {
4459 err = PTR_ERR(reloc_root);
4460 goto out;
4461 }
4462
3dbf1738 4463 set_bit(BTRFS_ROOT_REF_COWS, &reloc_root->state);
5d4f98a2
YZ
4464 list_add(&reloc_root->root_list, &reloc_roots);
4465
4466 if (btrfs_root_refs(&reloc_root->root_item) > 0) {
0b246afa 4467 fs_root = read_fs_root(fs_info,
5d4f98a2
YZ
4468 reloc_root->root_key.offset);
4469 if (IS_ERR(fs_root)) {
76dda93c
YZ
4470 ret = PTR_ERR(fs_root);
4471 if (ret != -ENOENT) {
4472 err = ret;
4473 goto out;
4474 }
79787eaa
JM
4475 ret = mark_garbage_root(reloc_root);
4476 if (ret < 0) {
4477 err = ret;
4478 goto out;
4479 }
932fd26d 4480 } else {
00246528 4481 btrfs_put_root(fs_root);
5d4f98a2
YZ
4482 }
4483 }
4484
4485 if (key.offset == 0)
4486 break;
4487
4488 key.offset--;
4489 }
b3b4aa74 4490 btrfs_release_path(path);
5d4f98a2
YZ
4491
4492 if (list_empty(&reloc_roots))
4493 goto out;
4494
c258d6e3 4495 rc = alloc_reloc_control(fs_info);
5d4f98a2
YZ
4496 if (!rc) {
4497 err = -ENOMEM;
4498 goto out;
4499 }
4500
0b246afa 4501 rc->extent_root = fs_info->extent_root;
5d4f98a2
YZ
4502
4503 set_reloc_control(rc);
4504
7a7eaa40 4505 trans = btrfs_join_transaction(rc->extent_root);
3612b495 4506 if (IS_ERR(trans)) {
3612b495 4507 err = PTR_ERR(trans);
fb2d83ee 4508 goto out_unset;
3612b495 4509 }
3fd0a558
YZ
4510
4511 rc->merge_reloc_tree = 1;
4512
5d4f98a2
YZ
4513 while (!list_empty(&reloc_roots)) {
4514 reloc_root = list_entry(reloc_roots.next,
4515 struct btrfs_root, root_list);
4516 list_del(&reloc_root->root_list);
4517
4518 if (btrfs_root_refs(&reloc_root->root_item) == 0) {
4519 list_add_tail(&reloc_root->root_list,
4520 &rc->reloc_roots);
4521 continue;
4522 }
4523
0b246afa 4524 fs_root = read_fs_root(fs_info, reloc_root->root_key.offset);
79787eaa
JM
4525 if (IS_ERR(fs_root)) {
4526 err = PTR_ERR(fs_root);
ca1aa281 4527 list_add_tail(&reloc_root->root_list, &reloc_roots);
1402d17d 4528 btrfs_end_transaction(trans);
fb2d83ee 4529 goto out_unset;
79787eaa 4530 }
5d4f98a2 4531
ffd7b339 4532 err = __add_reloc_root(reloc_root);
79787eaa 4533 BUG_ON(err < 0); /* -ENOMEM or logic error */
f44deb74 4534 fs_root->reloc_root = btrfs_grab_root(reloc_root);
00246528 4535 btrfs_put_root(fs_root);
5d4f98a2
YZ
4536 }
4537
3a45bb20 4538 err = btrfs_commit_transaction(trans);
79787eaa 4539 if (err)
fb2d83ee 4540 goto out_unset;
5d4f98a2
YZ
4541
4542 merge_reloc_roots(rc);
4543
4544 unset_reloc_control(rc);
4545
7a7eaa40 4546 trans = btrfs_join_transaction(rc->extent_root);
62b99540 4547 if (IS_ERR(trans)) {
3612b495 4548 err = PTR_ERR(trans);
6217b0fa 4549 goto out_clean;
62b99540 4550 }
3a45bb20 4551 err = btrfs_commit_transaction(trans);
6217b0fa 4552out_clean:
d2311e69
QW
4553 ret = clean_dirty_subvols(rc);
4554 if (ret < 0 && !err)
4555 err = ret;
fb2d83ee
JB
4556out_unset:
4557 unset_reloc_control(rc);
1a0afa0e 4558 free_reloc_control(rc);
3612b495 4559out:
aca1bba6
LB
4560 if (!list_empty(&reloc_roots))
4561 free_reloc_roots(&reloc_roots);
4562
5d4f98a2
YZ
4563 btrfs_free_path(path);
4564
4565 if (err == 0) {
4566 /* cleanup orphan inode in data relocation tree */
0b246afa 4567 fs_root = read_fs_root(fs_info, BTRFS_DATA_RELOC_TREE_OBJECTID);
932fd26d 4568 if (IS_ERR(fs_root)) {
5d4f98a2 4569 err = PTR_ERR(fs_root);
932fd26d 4570 } else {
9f583209 4571 err = btrfs_orphan_cleanup(fs_root);
00246528 4572 btrfs_put_root(fs_root);
932fd26d 4573 }
5d4f98a2
YZ
4574 }
4575 return err;
4576}
4577
4578/*
4579 * helper to add ordered checksum for data relocation.
4580 *
4581 * cloning checksum properly handles the nodatasum extents.
4582 * it also saves CPU time to re-calculate the checksum.
4583 */
4584int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
4585{
0b246afa 4586 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
5d4f98a2 4587 struct btrfs_ordered_sum *sums;
5d4f98a2 4588 struct btrfs_ordered_extent *ordered;
5d4f98a2
YZ
4589 int ret;
4590 u64 disk_bytenr;
4577b014 4591 u64 new_bytenr;
5d4f98a2
YZ
4592 LIST_HEAD(list);
4593
4594 ordered = btrfs_lookup_ordered_extent(inode, file_pos);
bffe633e 4595 BUG_ON(ordered->file_offset != file_pos || ordered->num_bytes != len);
5d4f98a2
YZ
4596
4597 disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt;
0b246afa 4598 ret = btrfs_lookup_csums_range(fs_info->csum_root, disk_bytenr,
a2de733c 4599 disk_bytenr + len - 1, &list, 0);
79787eaa
JM
4600 if (ret)
4601 goto out;
5d4f98a2
YZ
4602
4603 while (!list_empty(&list)) {
4604 sums = list_entry(list.next, struct btrfs_ordered_sum, list);
4605 list_del_init(&sums->list);
4606
4577b014
JB
4607 /*
4608 * We need to offset the new_bytenr based on where the csum is.
4609 * We need to do this because we will read in entire prealloc
4610 * extents but we may have written to say the middle of the
4611 * prealloc extent, so we need to make sure the csum goes with
4612 * the right disk offset.
4613 *
4614 * We can do this because the data reloc inode refers strictly
4615 * to the on disk bytes, so we don't have to worry about
4616 * disk_len vs real len like with real inodes since it's all
4617 * disk length.
4618 */
bffe633e 4619 new_bytenr = ordered->disk_bytenr + sums->bytenr - disk_bytenr;
4577b014 4620 sums->bytenr = new_bytenr;
5d4f98a2 4621
f9756261 4622 btrfs_add_ordered_sum(ordered, sums);
5d4f98a2 4623 }
79787eaa 4624out:
5d4f98a2 4625 btrfs_put_ordered_extent(ordered);
411fc6bc 4626 return ret;
5d4f98a2 4627}
3fd0a558 4628
83d4cfd4
JB
4629int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
4630 struct btrfs_root *root, struct extent_buffer *buf,
4631 struct extent_buffer *cow)
3fd0a558 4632{
0b246afa 4633 struct btrfs_fs_info *fs_info = root->fs_info;
3fd0a558 4634 struct reloc_control *rc;
a26195a5 4635 struct btrfs_backref_node *node;
3fd0a558
YZ
4636 int first_cow = 0;
4637 int level;
83d4cfd4 4638 int ret = 0;
3fd0a558 4639
0b246afa 4640 rc = fs_info->reloc_ctl;
3fd0a558 4641 if (!rc)
83d4cfd4 4642 return 0;
3fd0a558
YZ
4643
4644 BUG_ON(rc->stage == UPDATE_DATA_PTRS &&
4645 root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID);
4646
4647 level = btrfs_header_level(buf);
4648 if (btrfs_header_generation(buf) <=
4649 btrfs_root_last_snapshot(&root->root_item))
4650 first_cow = 1;
4651
4652 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
4653 rc->create_reloc_tree) {
4654 WARN_ON(!first_cow && level == 0);
4655
4656 node = rc->backref_cache.path[level];
4657 BUG_ON(node->bytenr != buf->start &&
4658 node->new_bytenr != buf->start);
4659
4660 drop_node_buffer(node);
67439dad 4661 atomic_inc(&cow->refs);
3fd0a558
YZ
4662 node->eb = cow;
4663 node->new_bytenr = cow->start;
4664
4665 if (!node->pending) {
4666 list_move_tail(&node->list,
4667 &rc->backref_cache.pending[level]);
4668 node->pending = 1;
4669 }
4670
4671 if (first_cow)
9569cc20 4672 mark_block_processed(rc, node);
3fd0a558
YZ
4673
4674 if (first_cow && level > 0)
4675 rc->nodes_relocated += buf->len;
4676 }
4677
83d4cfd4 4678 if (level == 0 && first_cow && rc->stage == UPDATE_DATA_PTRS)
3fd0a558 4679 ret = replace_file_extents(trans, rc, root, cow);
83d4cfd4 4680 return ret;
3fd0a558
YZ
4681}
4682
4683/*
4684 * called before creating snapshot. it calculates metadata reservation
01327610 4685 * required for relocating tree blocks in the snapshot
3fd0a558 4686 */
147d256e 4687void btrfs_reloc_pre_snapshot(struct btrfs_pending_snapshot *pending,
3fd0a558
YZ
4688 u64 *bytes_to_reserve)
4689{
10995c04
QW
4690 struct btrfs_root *root = pending->root;
4691 struct reloc_control *rc = root->fs_info->reloc_ctl;
3fd0a558 4692
6282675e 4693 if (!rc || !have_reloc_root(root))
3fd0a558
YZ
4694 return;
4695
3fd0a558
YZ
4696 if (!rc->merge_reloc_tree)
4697 return;
4698
4699 root = root->reloc_root;
4700 BUG_ON(btrfs_root_refs(&root->root_item) == 0);
4701 /*
4702 * relocation is in the stage of merging trees. the space
4703 * used by merging a reloc tree is twice the size of
4704 * relocated tree nodes in the worst case. half for cowing
4705 * the reloc tree, half for cowing the fs tree. the space
4706 * used by cowing the reloc tree will be freed after the
4707 * tree is dropped. if we create snapshot, cowing the fs
4708 * tree may use more space than it frees. so we need
4709 * reserve extra space.
4710 */
4711 *bytes_to_reserve += rc->nodes_relocated;
4712}
4713
4714/*
4715 * called after snapshot is created. migrate block reservation
4716 * and create reloc root for the newly created snapshot
f44deb74
JB
4717 *
4718 * This is similar to btrfs_init_reloc_root(), we come out of here with two
4719 * references held on the reloc_root, one for root->reloc_root and one for
4720 * rc->reloc_roots.
3fd0a558 4721 */
49b25e05 4722int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
3fd0a558
YZ
4723 struct btrfs_pending_snapshot *pending)
4724{
4725 struct btrfs_root *root = pending->root;
4726 struct btrfs_root *reloc_root;
4727 struct btrfs_root *new_root;
10995c04 4728 struct reloc_control *rc = root->fs_info->reloc_ctl;
3fd0a558
YZ
4729 int ret;
4730
6282675e 4731 if (!rc || !have_reloc_root(root))
49b25e05 4732 return 0;
3fd0a558
YZ
4733
4734 rc = root->fs_info->reloc_ctl;
4735 rc->merging_rsv_size += rc->nodes_relocated;
4736
4737 if (rc->merge_reloc_tree) {
4738 ret = btrfs_block_rsv_migrate(&pending->block_rsv,
4739 rc->block_rsv,
3a584174 4740 rc->nodes_relocated, true);
49b25e05
JM
4741 if (ret)
4742 return ret;
3fd0a558
YZ
4743 }
4744
4745 new_root = pending->snap;
4746 reloc_root = create_reloc_root(trans, root->reloc_root,
4747 new_root->root_key.objectid);
49b25e05
JM
4748 if (IS_ERR(reloc_root))
4749 return PTR_ERR(reloc_root);
3fd0a558 4750
ffd7b339
JM
4751 ret = __add_reloc_root(reloc_root);
4752 BUG_ON(ret < 0);
f44deb74 4753 new_root->reloc_root = btrfs_grab_root(reloc_root);
3fd0a558 4754
49b25e05 4755 if (rc->create_reloc_tree)
3fd0a558 4756 ret = clone_backref_node(trans, rc, root, reloc_root);
49b25e05 4757 return ret;
3fd0a558 4758}